This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Make Changes refer to the upcoming release for the full changelog
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
04e98a4d 42#include "dquote_static.c"
378cc40b 43
eb0d8d16
NC
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
6154021b 47#define pl_yylval (PL_parser->yylval)
d3b6f988 48
199e78b7
DM
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
78cdf107
Z
51#define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52#define PL_lex_fakeeof (PL_parser->lex_fakeeof)
199e78b7
DM
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 72#define PL_linestr (PL_parser->linestr)
c2598295
DM
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
f06b5848
DM
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 83#define PL_lex_state (PL_parser->lex_state)
2f9285f8 84#define PL_rsfp (PL_parser->rsfp)
5486870f 85#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 88#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 89#define PL_multi_end (PL_parser->multi_end)
13765c85 90#define PL_error_count (PL_parser->error_count)
199e78b7
DM
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
199e78b7
DM
112#endif
113
16173588
NC
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
116static int
117S_pending_ident(pTHX);
199e78b7 118
0bd48802 119static const char ident_too_long[] = "Identifier too long";
8903cb82 120
29595ff2 121#ifdef PERL_MAD
29595ff2 122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 124#else
5db06880 125# define CURMAD(slot,sv)
9ded7720 126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
127#endif
128
a7aaec61
Z
129#define XENUMMASK 0x3f
130#define XFAKEEOF 0x40
131#define XFAKEBRACK 0x80
9059aa12 132
39e02b42
JH
133#ifdef USE_UTF8_SCRIPTS
134# define UTF (!IN_BYTES)
2b9d42f0 135#else
746b446a 136# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 137#endif
a0ed51b3 138
b1fc3636
CJ
139/* The maximum number of characters preceding the unrecognized one to display */
140#define UNRECOGNIZED_PRECEDE_COUNT 10
141
61f0cdd9 142/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
143 * 1999-02-27 mjd-perl-patch@plover.com */
144#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
145
bf4acbe4 146#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 147
ffb4593c
NT
148/* LEX_* are values for PL_lex_state, the state of the lexer.
149 * They are arranged oddly so that the guard on the switch statement
79072805
LW
150 * can get by with a single comparison (if the compiler is smart enough).
151 */
152
fb73857a 153/* #define LEX_NOTPARSING 11 is done in perl.h. */
154
b6007c36
DM
155#define LEX_NORMAL 10 /* normal code (ie not within "...") */
156#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
157#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
158#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
159#define LEX_INTERPSTART 6 /* expecting the start of a $var */
160
161 /* at end of code, eg "$x" followed by: */
162#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
163#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
164
165#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
166 string or after \E, $foo, etc */
167#define LEX_INTERPCONST 2 /* NOT USED */
168#define LEX_FORMLINE 1 /* expecting a format line */
169#define LEX_KNOWNEXT 0 /* next token known; just return it */
170
79072805 171
bbf60fe6 172#ifdef DEBUGGING
27da23d5 173static const char* const lex_state_names[] = {
bbf60fe6
DM
174 "KNOWNEXT",
175 "FORMLINE",
176 "INTERPCONST",
177 "INTERPCONCAT",
178 "INTERPENDMAYBE",
179 "INTERPEND",
180 "INTERPSTART",
181 "INTERPPUSH",
182 "INTERPCASEMOD",
183 "INTERPNORMAL",
184 "NORMAL"
185};
186#endif
187
79072805
LW
188#ifdef ff_next
189#undef ff_next
d48672a2
LW
190#endif
191
79072805 192#include "keywords.h"
fe14fcc3 193
ffb4593c
NT
194/* CLINE is a macro that ensures PL_copline has a sane value */
195
ae986130
LW
196#ifdef CLINE
197#undef CLINE
198#endif
57843af0 199#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 200
5db06880 201#ifdef PERL_MAD
29595ff2
NC
202# define SKIPSPACE0(s) skipspace0(s)
203# define SKIPSPACE1(s) skipspace1(s)
204# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
205# define PEEKSPACE(s) skipspace2(s,0)
206#else
207# define SKIPSPACE0(s) skipspace(s)
208# define SKIPSPACE1(s) skipspace(s)
209# define SKIPSPACE2(s,tsv) skipspace(s)
210# define PEEKSPACE(s) skipspace(s)
211#endif
212
ffb4593c
NT
213/*
214 * Convenience functions to return different tokens and prime the
9cbb5ea2 215 * lexer for the next token. They all take an argument.
ffb4593c
NT
216 *
217 * TOKEN : generic token (used for '(', DOLSHARP, etc)
218 * OPERATOR : generic operator
219 * AOPERATOR : assignment operator
220 * PREBLOCK : beginning the block after an if, while, foreach, ...
221 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
222 * PREREF : *EXPR where EXPR is not a simple identifier
223 * TERM : expression term
224 * LOOPX : loop exiting command (goto, last, dump, etc)
225 * FTST : file test operator
226 * FUN0 : zero-argument function
2d2e263d 227 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
228 * BOop : bitwise or or xor
229 * BAop : bitwise and
230 * SHop : shift operator
231 * PWop : power operator
9cbb5ea2 232 * PMop : pattern-matching operator
ffb4593c
NT
233 * Aop : addition-level operator
234 * Mop : multiplication-level operator
235 * Eop : equality-testing operator
e5edeb50 236 * Rop : relational operator <= != gt
ffb4593c
NT
237 *
238 * Also see LOP and lop() below.
239 */
240
998054bd 241#ifdef DEBUGGING /* Serve -DT. */
704d4215 242# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 243#else
bbf60fe6 244# define REPORT(retval) (retval)
998054bd
SC
245#endif
246
bbf60fe6
DM
247#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
248#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
249#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
250#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
251#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
252#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
253#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
254#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
255#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
256#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
257#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
258#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
259#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
260#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
261#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
262#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
263#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
264#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
265#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
266#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 267
a687059c
LW
268/* This bit of chicanery makes a unary function followed by
269 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
270 * The UNIDOR macro is for unary functions that can be followed by the //
271 * operator (such as C<shift // 0>).
a687059c 272 */
376fcdbf 273#define UNI2(f,x) { \
6154021b 274 pl_yylval.ival = f; \
376fcdbf
AL
275 PL_expect = x; \
276 PL_bufptr = s; \
277 PL_last_uni = PL_oldbufptr; \
278 PL_last_lop_op = f; \
279 if (*s == '(') \
280 return REPORT( (int)FUNC1 ); \
29595ff2 281 s = PEEKSPACE(s); \
376fcdbf
AL
282 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
283 }
6f33ba73
RGS
284#define UNI(f) UNI2(f,XTERM)
285#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 286
376fcdbf 287#define UNIBRACK(f) { \
6154021b 288 pl_yylval.ival = f; \
376fcdbf
AL
289 PL_bufptr = s; \
290 PL_last_uni = PL_oldbufptr; \
291 if (*s == '(') \
292 return REPORT( (int)FUNC1 ); \
29595ff2 293 s = PEEKSPACE(s); \
376fcdbf
AL
294 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
295 }
79072805 296
9f68db38 297/* grandfather return to old style */
78cdf107
Z
298#define OLDLOP(f) \
299 do { \
300 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
301 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
302 pl_yylval.ival = (f); \
303 PL_expect = XTERM; \
304 PL_bufptr = s; \
305 return (int)LSTOP; \
306 } while(0)
79072805 307
8fa7f367
JH
308#ifdef DEBUGGING
309
6154021b 310/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
311enum token_type {
312 TOKENTYPE_NONE,
313 TOKENTYPE_IVAL,
6154021b 314 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
315 TOKENTYPE_PVAL,
316 TOKENTYPE_OPVAL,
317 TOKENTYPE_GVVAL
318};
319
6d4a66ac
NC
320static struct debug_tokens {
321 const int token;
322 enum token_type type;
323 const char *name;
324} const debug_tokens[] =
9041c2e3 325{
bbf60fe6
DM
326 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
327 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
328 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
329 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
330 { ARROW, TOKENTYPE_NONE, "ARROW" },
331 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
332 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
333 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
334 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
335 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 336 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
337 { DO, TOKENTYPE_NONE, "DO" },
338 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
339 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
340 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
341 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
342 { ELSE, TOKENTYPE_NONE, "ELSE" },
343 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
344 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
345 { FOR, TOKENTYPE_IVAL, "FOR" },
346 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
347 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
348 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
349 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
350 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
351 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 352 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
353 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
354 { IF, TOKENTYPE_IVAL, "IF" },
355 { LABEL, TOKENTYPE_PVAL, "LABEL" },
356 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
357 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
358 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
359 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
360 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
361 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
362 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
363 { MY, TOKENTYPE_IVAL, "MY" },
364 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
365 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
366 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
367 { OROP, TOKENTYPE_IVAL, "OROP" },
368 { OROR, TOKENTYPE_NONE, "OROR" },
369 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
370 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
371 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
372 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
373 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
374 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
375 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
376 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
377 { PREINC, TOKENTYPE_NONE, "PREINC" },
378 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
379 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
380 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
381 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
382 { SUB, TOKENTYPE_NONE, "SUB" },
383 { THING, TOKENTYPE_OPVAL, "THING" },
384 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
385 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
386 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
387 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
388 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
389 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 390 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
391 { WHILE, TOKENTYPE_IVAL, "WHILE" },
392 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 393 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 394 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
395};
396
6154021b 397/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 398
bbf60fe6 399STATIC int
704d4215 400S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 401{
97aff369 402 dVAR;
7918f24d
NC
403
404 PERL_ARGS_ASSERT_TOKEREPORT;
405
bbf60fe6 406 if (DEBUG_T_TEST) {
bd61b366 407 const char *name = NULL;
bbf60fe6 408 enum token_type type = TOKENTYPE_NONE;
f54cb97a 409 const struct debug_tokens *p;
396482e1 410 SV* const report = newSVpvs("<== ");
bbf60fe6 411
f54cb97a 412 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
413 if (p->token == (int)rv) {
414 name = p->name;
415 type = p->type;
416 break;
417 }
418 }
419 if (name)
54667de8 420 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
421 else if ((char)rv > ' ' && (char)rv < '~')
422 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
423 else if (!rv)
396482e1 424 sv_catpvs(report, "EOF");
bbf60fe6
DM
425 else
426 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
427 switch (type) {
428 case TOKENTYPE_NONE:
429 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
430 break;
431 case TOKENTYPE_IVAL:
704d4215 432 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
433 break;
434 case TOKENTYPE_OPNUM:
435 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 436 PL_op_name[lvalp->ival]);
bbf60fe6
DM
437 break;
438 case TOKENTYPE_PVAL:
704d4215 439 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
440 break;
441 case TOKENTYPE_OPVAL:
704d4215 442 if (lvalp->opval) {
401441c0 443 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
444 PL_op_name[lvalp->opval->op_type]);
445 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 446 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 447 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
448 }
449
450 }
401441c0 451 else
396482e1 452 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
453 break;
454 }
b6007c36 455 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
456 };
457 return (int)rv;
998054bd
SC
458}
459
b6007c36
DM
460
461/* print the buffer with suitable escapes */
462
463STATIC void
15f169a1 464S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 465{
396482e1 466 SV* const tmp = newSVpvs("");
7918f24d
NC
467
468 PERL_ARGS_ASSERT_PRINTBUF;
469
b6007c36
DM
470 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
471 SvREFCNT_dec(tmp);
472}
473
8fa7f367
JH
474#endif
475
8290c323
NC
476static int
477S_deprecate_commaless_var_list(pTHX) {
478 PL_expect = XTERM;
479 deprecate("comma-less variable list");
480 return REPORT(','); /* grandfather non-comma-format format */
481}
482
ffb4593c
NT
483/*
484 * S_ao
485 *
c963b151
BD
486 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
487 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
488 */
489
76e3520e 490STATIC int
cea2e8a9 491S_ao(pTHX_ int toketype)
a0d0e21e 492{
97aff369 493 dVAR;
3280af22
NIS
494 if (*PL_bufptr == '=') {
495 PL_bufptr++;
a0d0e21e 496 if (toketype == ANDAND)
6154021b 497 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 498 else if (toketype == OROR)
6154021b 499 pl_yylval.ival = OP_ORASSIGN;
c963b151 500 else if (toketype == DORDOR)
6154021b 501 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
502 toketype = ASSIGNOP;
503 }
504 return toketype;
505}
506
ffb4593c
NT
507/*
508 * S_no_op
509 * When Perl expects an operator and finds something else, no_op
510 * prints the warning. It always prints "<something> found where
511 * operator expected. It prints "Missing semicolon on previous line?"
512 * if the surprise occurs at the start of the line. "do you need to
513 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
514 * where the compiler doesn't know if foo is a method call or a function.
515 * It prints "Missing operator before end of line" if there's nothing
516 * after the missing operator, or "... before <...>" if there is something
517 * after the missing operator.
518 */
519
76e3520e 520STATIC void
15f169a1 521S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 522{
97aff369 523 dVAR;
9d4ba2ae
AL
524 char * const oldbp = PL_bufptr;
525 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 526
7918f24d
NC
527 PERL_ARGS_ASSERT_NO_OP;
528
1189a94a
GS
529 if (!s)
530 s = oldbp;
07c798fb 531 else
1189a94a 532 PL_bufptr = s;
cea2e8a9 533 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
534 if (ckWARN_d(WARN_SYNTAX)) {
535 if (is_first)
536 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
537 "\t(Missing semicolon on previous line?)\n");
538 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 539 const char *t;
c35e046a
AL
540 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
541 NOOP;
56da5a46
RGS
542 if (t < PL_bufptr && isSPACE(*t))
543 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
544 "\t(Do you need to predeclare %.*s?)\n",
551405c4 545 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
546 }
547 else {
548 assert(s >= oldbp);
549 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 550 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 551 }
07c798fb 552 }
3280af22 553 PL_bufptr = oldbp;
8990e307
LW
554}
555
ffb4593c
NT
556/*
557 * S_missingterm
558 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 559 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
560 * If we're in a delimited string and the delimiter is a control
561 * character, it's reformatted into a two-char sequence like ^C.
562 * This is fatal.
563 */
564
76e3520e 565STATIC void
cea2e8a9 566S_missingterm(pTHX_ char *s)
8990e307 567{
97aff369 568 dVAR;
8990e307
LW
569 char tmpbuf[3];
570 char q;
571 if (s) {
9d4ba2ae 572 char * const nl = strrchr(s,'\n');
d2719217 573 if (nl)
8990e307
LW
574 *nl = '\0';
575 }
463559e7 576 else if (isCNTRL(PL_multi_close)) {
8990e307 577 *tmpbuf = '^';
585ec06d 578 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
579 tmpbuf[2] = '\0';
580 s = tmpbuf;
581 }
582 else {
eb160463 583 *tmpbuf = (char)PL_multi_close;
8990e307
LW
584 tmpbuf[1] = '\0';
585 s = tmpbuf;
586 }
587 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 588 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 589}
79072805 590
0d863452 591/*
0d863452
RH
592 * Check whether the named feature is enabled.
593 */
26ea9e12
NC
594bool
595Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 596{
97aff369 597 dVAR;
0d863452 598 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 599 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
600
601 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
602
26ea9e12
NC
603 if (namelen > MAX_FEATURE_LEN)
604 return FALSE;
4a731d7b 605 memcpy(&he_name[8], name, namelen);
d4c19fe8 606
7b9ef140 607 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
608}
609
ffb4593c 610/*
9cbb5ea2
GS
611 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
612 * utf16-to-utf8-reversed.
ffb4593c
NT
613 */
614
c39cd008
GS
615#ifdef PERL_CR_FILTER
616static void
617strip_return(SV *sv)
618{
95a20fc0 619 register const char *s = SvPVX_const(sv);
9d4ba2ae 620 register const char * const e = s + SvCUR(sv);
7918f24d
NC
621
622 PERL_ARGS_ASSERT_STRIP_RETURN;
623
c39cd008
GS
624 /* outer loop optimized to do nothing if there are no CR-LFs */
625 while (s < e) {
626 if (*s++ == '\r' && *s == '\n') {
627 /* hit a CR-LF, need to copy the rest */
628 register char *d = s - 1;
629 *d++ = *s++;
630 while (s < e) {
631 if (*s == '\r' && s[1] == '\n')
632 s++;
633 *d++ = *s++;
634 }
635 SvCUR(sv) -= s - d;
636 return;
637 }
638 }
639}
a868473f 640
76e3520e 641STATIC I32
c39cd008 642S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 643{
f54cb97a 644 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
645 if (count > 0 && !maxlen)
646 strip_return(sv);
647 return count;
a868473f
NIS
648}
649#endif
650
ffb4593c 651/*
8eaa0acf
Z
652=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
653
654Creates and initialises a new lexer/parser state object, supplying
655a context in which to lex and parse from a new source of Perl code.
656A pointer to the new state object is placed in L</PL_parser>. An entry
657is made on the save stack so that upon unwinding the new state object
658will be destroyed and the former value of L</PL_parser> will be restored.
659Nothing else need be done to clean up the parsing context.
660
661The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
662non-null, provides a string (in SV form) containing code to be parsed.
663A copy of the string is made, so subsequent modification of I<line>
664does not affect parsing. I<rsfp>, if non-null, provides an input stream
665from which code will be read to be parsed. If both are non-null, the
666code in I<line> comes first and must consist of complete lines of input,
667and I<rsfp> supplies the remainder of the source.
668
669The I<flags> parameter is reserved for future use, and must always
670be zero.
671
672=cut
673*/
ffb4593c 674
a0d0e21e 675void
8eaa0acf 676Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
79072805 677{
97aff369 678 dVAR;
6ef55633 679 const char *s = NULL;
8990e307 680 STRLEN len;
5486870f 681 yy_parser *parser, *oparser;
8eaa0acf
Z
682 if (flags)
683 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
acdf0a21
DM
684
685 /* create and initialise a parser */
686
199e78b7 687 Newxz(parser, 1, yy_parser);
5486870f 688 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
689 PL_parser = parser;
690
28ac2b49
Z
691 parser->stack = NULL;
692 parser->ps = NULL;
693 parser->stack_size = 0;
acdf0a21 694
e3abe207
DM
695 /* on scope exit, free this parser and restore any outer one */
696 SAVEPARSER(parser);
7c4baf47 697 parser->saved_curcop = PL_curcop;
e3abe207 698
acdf0a21 699 /* initialise lexer state */
8990e307 700
fb205e7a
DM
701#ifdef PERL_MAD
702 parser->curforce = -1;
703#else
704 parser->nexttoke = 0;
705#endif
ca4cfd28 706 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 707 parser->copline = NOLINE;
5afb0a62 708 parser->lex_state = LEX_NORMAL;
c2598295 709 parser->expect = XSTATE;
2f9285f8 710 parser->rsfp = rsfp;
f07ec6dd 711 parser->rsfp_filters = newAV();
2f9285f8 712
199e78b7
DM
713 Newx(parser->lex_brackstack, 120, char);
714 Newx(parser->lex_casestack, 12, char);
715 *parser->lex_casestack = '\0';
02b34bbe 716
10efb74f
NC
717 if (line) {
718 s = SvPV_const(line, len);
719 } else {
720 len = 0;
721 }
bdc0bf6f 722
10efb74f 723 if (!len) {
bdc0bf6f 724 parser->linestr = newSVpvs("\n;");
805700c1 725 } else {
719a9bb0 726 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
10efb74f 727 if (s[len-1] != ';')
bdc0bf6f 728 sv_catpvs(parser->linestr, "\n;");
8990e307 729 }
f06b5848
DM
730 parser->oldoldbufptr =
731 parser->oldbufptr =
732 parser->bufptr =
733 parser->linestart = SvPVX(parser->linestr);
734 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
735 parser->last_lop = parser->last_uni = NULL;
737c24fc
Z
736
737 parser->in_pod = 0;
79072805 738}
a687059c 739
e3abe207
DM
740
741/* delete a parser object */
742
743void
744Perl_parser_free(pTHX_ const yy_parser *parser)
745{
7918f24d
NC
746 PERL_ARGS_ASSERT_PARSER_FREE;
747
7c4baf47 748 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
749 SvREFCNT_dec(parser->linestr);
750
2f9285f8
DM
751 if (parser->rsfp == PerlIO_stdin())
752 PerlIO_clearerr(parser->rsfp);
799361c3
SH
753 else if (parser->rsfp && (!parser->old_parser ||
754 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 755 PerlIO_close(parser->rsfp);
5486870f 756 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 757
e3abe207
DM
758 Safefree(parser->lex_brackstack);
759 Safefree(parser->lex_casestack);
760 PL_parser = parser->old_parser;
761 Safefree(parser);
762}
763
764
ffb4593c 765/*
f0e67a1d
Z
766=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
767
768Buffer scalar containing the chunk currently under consideration of the
769text currently being lexed. This is always a plain string scalar (for
770which C<SvPOK> is true). It is not intended to be used as a scalar by
771normal scalar means; instead refer to the buffer directly by the pointer
772variables described below.
773
774The lexer maintains various C<char*> pointers to things in the
775C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
776reallocated, all of these pointers must be updated. Don't attempt to
777do this manually, but rather use L</lex_grow_linestr> if you need to
778reallocate the buffer.
779
780The content of the text chunk in the buffer is commonly exactly one
781complete line of input, up to and including a newline terminator,
782but there are situations where it is otherwise. The octets of the
783buffer may be intended to be interpreted as either UTF-8 or Latin-1.
784The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
785flag on this scalar, which may disagree with it.
786
787For direct examination of the buffer, the variable
788L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
789lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
790of these pointers is usually preferable to examination of the scalar
791through normal scalar means.
792
793=for apidoc AmxU|char *|PL_parser-E<gt>bufend
794
795Direct pointer to the end of the chunk of text currently being lexed, the
796end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
797+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
798always located at the end of the buffer, and does not count as part of
799the buffer's contents.
800
801=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
802
803Points to the current position of lexing inside the lexer buffer.
804Characters around this point may be freely examined, within
805the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
806L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
807interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
808
809Lexing code (whether in the Perl core or not) moves this pointer past
810the characters that it consumes. It is also expected to perform some
811bookkeeping whenever a newline character is consumed. This movement
812can be more conveniently performed by the function L</lex_read_to>,
813which handles newlines appropriately.
814
815Interpretation of the buffer's octets can be abstracted out by
816using the slightly higher-level functions L</lex_peek_unichar> and
817L</lex_read_unichar>.
818
819=for apidoc AmxU|char *|PL_parser-E<gt>linestart
820
821Points to the start of the current line inside the lexer buffer.
822This is useful for indicating at which column an error occurred, and
823not much else. This must be updated by any lexing code that consumes
824a newline; the function L</lex_read_to> handles this detail.
825
826=cut
827*/
828
829/*
830=for apidoc Amx|bool|lex_bufutf8
831
832Indicates whether the octets in the lexer buffer
833(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
834of Unicode characters. If not, they should be interpreted as Latin-1
835characters. This is analogous to the C<SvUTF8> flag for scalars.
836
837In UTF-8 mode, it is not guaranteed that the lexer buffer actually
838contains valid UTF-8. Lexing code must be robust in the face of invalid
839encoding.
840
841The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
842is significant, but not the whole story regarding the input character
843encoding. Normally, when a file is being read, the scalar contains octets
844and its C<SvUTF8> flag is off, but the octets should be interpreted as
845UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
846however, the scalar may have the C<SvUTF8> flag on, and in this case its
847octets should be interpreted as UTF-8 unless the C<use bytes> pragma
848is in effect. This logic may change in the future; use this function
849instead of implementing the logic yourself.
850
851=cut
852*/
853
854bool
855Perl_lex_bufutf8(pTHX)
856{
857 return UTF;
858}
859
860/*
861=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
862
863Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
864at least I<len> octets (including terminating NUL). Returns a
865pointer to the reallocated buffer. This is necessary before making
866any direct modification of the buffer that would increase its length.
867L</lex_stuff_pvn> provides a more convenient way to insert text into
868the buffer.
869
870Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
871this function updates all of the lexer's variables that point directly
872into the buffer.
873
874=cut
875*/
876
877char *
878Perl_lex_grow_linestr(pTHX_ STRLEN len)
879{
880 SV *linestr;
881 char *buf;
882 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
883 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
884 linestr = PL_parser->linestr;
885 buf = SvPVX(linestr);
886 if (len <= SvLEN(linestr))
887 return buf;
888 bufend_pos = PL_parser->bufend - buf;
889 bufptr_pos = PL_parser->bufptr - buf;
890 oldbufptr_pos = PL_parser->oldbufptr - buf;
891 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
892 linestart_pos = PL_parser->linestart - buf;
893 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
894 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
895 buf = sv_grow(linestr, len);
896 PL_parser->bufend = buf + bufend_pos;
897 PL_parser->bufptr = buf + bufptr_pos;
898 PL_parser->oldbufptr = buf + oldbufptr_pos;
899 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
900 PL_parser->linestart = buf + linestart_pos;
901 if (PL_parser->last_uni)
902 PL_parser->last_uni = buf + last_uni_pos;
903 if (PL_parser->last_lop)
904 PL_parser->last_lop = buf + last_lop_pos;
905 return buf;
906}
907
908/*
83aa740e 909=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
910
911Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
912immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
913reallocating the buffer if necessary. This means that lexing code that
914runs later will see the characters as if they had appeared in the input.
915It is not recommended to do this as part of normal parsing, and most
916uses of this facility run the risk of the inserted characters being
917interpreted in an unintended manner.
918
919The string to be inserted is represented by I<len> octets starting
920at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
921according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
922The characters are recoded for the lexer buffer, according to how the
923buffer is currently being interpreted (L</lex_bufutf8>). If a string
9dcc53ea 924to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
f0e67a1d
Z
925function is more convenient.
926
927=cut
928*/
929
930void
83aa740e 931Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 932{
749123ff 933 dVAR;
f0e67a1d
Z
934 char *bufptr;
935 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
936 if (flags & ~(LEX_STUFF_UTF8))
937 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
938 if (UTF) {
939 if (flags & LEX_STUFF_UTF8) {
940 goto plain_copy;
941 } else {
942 STRLEN highhalf = 0;
83aa740e 943 const char *p, *e = pv+len;
f0e67a1d
Z
944 for (p = pv; p != e; p++)
945 highhalf += !!(((U8)*p) & 0x80);
946 if (!highhalf)
947 goto plain_copy;
948 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
949 bufptr = PL_parser->bufptr;
950 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
951 SvCUR_set(PL_parser->linestr,
952 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
953 PL_parser->bufend += len+highhalf;
954 for (p = pv; p != e; p++) {
955 U8 c = (U8)*p;
956 if (c & 0x80) {
957 *bufptr++ = (char)(0xc0 | (c >> 6));
958 *bufptr++ = (char)(0x80 | (c & 0x3f));
959 } else {
960 *bufptr++ = (char)c;
961 }
962 }
963 }
964 } else {
965 if (flags & LEX_STUFF_UTF8) {
966 STRLEN highhalf = 0;
83aa740e 967 const char *p, *e = pv+len;
f0e67a1d
Z
968 for (p = pv; p != e; p++) {
969 U8 c = (U8)*p;
970 if (c >= 0xc4) {
971 Perl_croak(aTHX_ "Lexing code attempted to stuff "
972 "non-Latin-1 character into Latin-1 input");
973 } else if (c >= 0xc2 && p+1 != e &&
974 (((U8)p[1]) & 0xc0) == 0x80) {
975 p++;
976 highhalf++;
977 } else if (c >= 0x80) {
978 /* malformed UTF-8 */
979 ENTER;
980 SAVESPTR(PL_warnhook);
981 PL_warnhook = PERL_WARNHOOK_FATAL;
982 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
983 LEAVE;
984 }
985 }
986 if (!highhalf)
987 goto plain_copy;
988 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
989 bufptr = PL_parser->bufptr;
990 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
991 SvCUR_set(PL_parser->linestr,
992 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
993 PL_parser->bufend += len-highhalf;
994 for (p = pv; p != e; p++) {
995 U8 c = (U8)*p;
996 if (c & 0x80) {
997 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
998 p++;
999 } else {
1000 *bufptr++ = (char)c;
1001 }
1002 }
1003 } else {
1004 plain_copy:
1005 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1006 bufptr = PL_parser->bufptr;
1007 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1008 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1009 PL_parser->bufend += len;
1010 Copy(pv, bufptr, len, char);
1011 }
1012 }
1013}
1014
1015/*
9dcc53ea
Z
1016=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1017
1018Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1019immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1020reallocating the buffer if necessary. This means that lexing code that
1021runs later will see the characters as if they had appeared in the input.
1022It is not recommended to do this as part of normal parsing, and most
1023uses of this facility run the risk of the inserted characters being
1024interpreted in an unintended manner.
1025
1026The string to be inserted is represented by octets starting at I<pv>
1027and continuing to the first nul. These octets are interpreted as either
1028UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1029in I<flags>. The characters are recoded for the lexer buffer, according
1030to how the buffer is currently being interpreted (L</lex_bufutf8>).
1031If it is not convenient to nul-terminate a string to be inserted, the
1032L</lex_stuff_pvn> function is more appropriate.
1033
1034=cut
1035*/
1036
1037void
1038Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1039{
1040 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1041 lex_stuff_pvn(pv, strlen(pv), flags);
1042}
1043
1044/*
f0e67a1d
Z
1045=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1046
1047Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1048immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1049reallocating the buffer if necessary. This means that lexing code that
1050runs later will see the characters as if they had appeared in the input.
1051It is not recommended to do this as part of normal parsing, and most
1052uses of this facility run the risk of the inserted characters being
1053interpreted in an unintended manner.
1054
1055The string to be inserted is the string value of I<sv>. The characters
1056are recoded for the lexer buffer, according to how the buffer is currently
9dcc53ea 1057being interpreted (L</lex_bufutf8>). If a string to be inserted is
f0e67a1d
Z
1058not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1059need to construct a scalar.
1060
1061=cut
1062*/
1063
1064void
1065Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1066{
1067 char *pv;
1068 STRLEN len;
1069 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1070 if (flags)
1071 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1072 pv = SvPV(sv, len);
1073 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1074}
1075
1076/*
1077=for apidoc Amx|void|lex_unstuff|char *ptr
1078
1079Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1080I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1081This hides the discarded text from any lexing code that runs later,
1082as if the text had never appeared.
1083
1084This is not the normal way to consume lexed text. For that, use
1085L</lex_read_to>.
1086
1087=cut
1088*/
1089
1090void
1091Perl_lex_unstuff(pTHX_ char *ptr)
1092{
1093 char *buf, *bufend;
1094 STRLEN unstuff_len;
1095 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1096 buf = PL_parser->bufptr;
1097 if (ptr < buf)
1098 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1099 if (ptr == buf)
1100 return;
1101 bufend = PL_parser->bufend;
1102 if (ptr > bufend)
1103 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1104 unstuff_len = ptr - buf;
1105 Move(ptr, buf, bufend+1-ptr, char);
1106 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1107 PL_parser->bufend = bufend - unstuff_len;
1108}
1109
1110/*
1111=for apidoc Amx|void|lex_read_to|char *ptr
1112
1113Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1114to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1115performing the correct bookkeeping whenever a newline character is passed.
1116This is the normal way to consume lexed text.
1117
1118Interpretation of the buffer's octets can be abstracted out by
1119using the slightly higher-level functions L</lex_peek_unichar> and
1120L</lex_read_unichar>.
1121
1122=cut
1123*/
1124
1125void
1126Perl_lex_read_to(pTHX_ char *ptr)
1127{
1128 char *s;
1129 PERL_ARGS_ASSERT_LEX_READ_TO;
1130 s = PL_parser->bufptr;
1131 if (ptr < s || ptr > PL_parser->bufend)
1132 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1133 for (; s != ptr; s++)
1134 if (*s == '\n') {
1135 CopLINE_inc(PL_curcop);
1136 PL_parser->linestart = s+1;
1137 }
1138 PL_parser->bufptr = ptr;
1139}
1140
1141/*
1142=for apidoc Amx|void|lex_discard_to|char *ptr
1143
1144Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1145up to I<ptr>. The remaining content of the buffer will be moved, and
1146all pointers into the buffer updated appropriately. I<ptr> must not
1147be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1148it is not permitted to discard text that has yet to be lexed.
1149
1150Normally it is not necessarily to do this directly, because it suffices to
1151use the implicit discarding behaviour of L</lex_next_chunk> and things
1152based on it. However, if a token stretches across multiple lines,
1f317c95 1153and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1154that purpose, then after completion of the token it would be wise to
1155explicitly discard the now-unneeded earlier lines, to avoid future
1156multi-line tokens growing the buffer without bound.
1157
1158=cut
1159*/
1160
1161void
1162Perl_lex_discard_to(pTHX_ char *ptr)
1163{
1164 char *buf;
1165 STRLEN discard_len;
1166 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1167 buf = SvPVX(PL_parser->linestr);
1168 if (ptr < buf)
1169 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1170 if (ptr == buf)
1171 return;
1172 if (ptr > PL_parser->bufptr)
1173 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1174 discard_len = ptr - buf;
1175 if (PL_parser->oldbufptr < ptr)
1176 PL_parser->oldbufptr = ptr;
1177 if (PL_parser->oldoldbufptr < ptr)
1178 PL_parser->oldoldbufptr = ptr;
1179 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1180 PL_parser->last_uni = NULL;
1181 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1182 PL_parser->last_lop = NULL;
1183 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1184 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1185 PL_parser->bufend -= discard_len;
1186 PL_parser->bufptr -= discard_len;
1187 PL_parser->oldbufptr -= discard_len;
1188 PL_parser->oldoldbufptr -= discard_len;
1189 if (PL_parser->last_uni)
1190 PL_parser->last_uni -= discard_len;
1191 if (PL_parser->last_lop)
1192 PL_parser->last_lop -= discard_len;
1193}
1194
1195/*
1196=for apidoc Amx|bool|lex_next_chunk|U32 flags
1197
1198Reads in the next chunk of text to be lexed, appending it to
1199L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1200looked to the end of the current chunk and wants to know more. It is
1201usual, but not necessary, for lexing to have consumed the entirety of
1202the current chunk at this time.
1203
1204If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1205chunk (i.e., the current chunk has been entirely consumed), normally the
1206current chunk will be discarded at the same time that the new chunk is
1207read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1208will not be discarded. If the current chunk has not been entirely
1209consumed, then it will not be discarded regardless of the flag.
1210
1211Returns true if some new text was added to the buffer, or false if the
1212buffer has reached the end of the input text.
1213
1214=cut
1215*/
1216
1217#define LEX_FAKE_EOF 0x80000000
1218
1219bool
1220Perl_lex_next_chunk(pTHX_ U32 flags)
1221{
1222 SV *linestr;
1223 char *buf;
1224 STRLEN old_bufend_pos, new_bufend_pos;
1225 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1226 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1227 bool got_some_for_debugger = 0;
f0e67a1d
Z
1228 bool got_some;
1229 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1230 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1231 linestr = PL_parser->linestr;
1232 buf = SvPVX(linestr);
1233 if (!(flags & LEX_KEEP_PREVIOUS) &&
1234 PL_parser->bufptr == PL_parser->bufend) {
1235 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1236 linestart_pos = 0;
1237 if (PL_parser->last_uni != PL_parser->bufend)
1238 PL_parser->last_uni = NULL;
1239 if (PL_parser->last_lop != PL_parser->bufend)
1240 PL_parser->last_lop = NULL;
1241 last_uni_pos = last_lop_pos = 0;
1242 *buf = 0;
1243 SvCUR(linestr) = 0;
1244 } else {
1245 old_bufend_pos = PL_parser->bufend - buf;
1246 bufptr_pos = PL_parser->bufptr - buf;
1247 oldbufptr_pos = PL_parser->oldbufptr - buf;
1248 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1249 linestart_pos = PL_parser->linestart - buf;
1250 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1251 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1252 }
1253 if (flags & LEX_FAKE_EOF) {
1254 goto eof;
1255 } else if (!PL_parser->rsfp) {
1256 got_some = 0;
1257 } else if (filter_gets(linestr, old_bufend_pos)) {
1258 got_some = 1;
17cc9359 1259 got_some_for_debugger = 1;
f0e67a1d 1260 } else {
580561a3
Z
1261 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1262 sv_setpvs(linestr, "");
f0e67a1d
Z
1263 eof:
1264 /* End of real input. Close filehandle (unless it was STDIN),
1265 * then add implicit termination.
1266 */
1267 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1268 PerlIO_clearerr(PL_parser->rsfp);
1269 else if (PL_parser->rsfp)
1270 (void)PerlIO_close(PL_parser->rsfp);
1271 PL_parser->rsfp = NULL;
737c24fc 1272 PL_parser->in_pod = 0;
f0e67a1d
Z
1273#ifdef PERL_MAD
1274 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1275 PL_faketokens = 1;
1276#endif
1277 if (!PL_in_eval && PL_minus_p) {
1278 sv_catpvs(linestr,
1279 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1280 PL_minus_n = PL_minus_p = 0;
1281 } else if (!PL_in_eval && PL_minus_n) {
1282 sv_catpvs(linestr, /*{*/";}");
1283 PL_minus_n = 0;
1284 } else
1285 sv_catpvs(linestr, ";");
1286 got_some = 1;
1287 }
1288 buf = SvPVX(linestr);
1289 new_bufend_pos = SvCUR(linestr);
1290 PL_parser->bufend = buf + new_bufend_pos;
1291 PL_parser->bufptr = buf + bufptr_pos;
1292 PL_parser->oldbufptr = buf + oldbufptr_pos;
1293 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1294 PL_parser->linestart = buf + linestart_pos;
1295 if (PL_parser->last_uni)
1296 PL_parser->last_uni = buf + last_uni_pos;
1297 if (PL_parser->last_lop)
1298 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1299 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1300 PL_curstash != PL_debstash) {
1301 /* debugger active and we're not compiling the debugger code,
1302 * so store the line into the debugger's array of lines
1303 */
1304 update_debugger_info(NULL, buf+old_bufend_pos,
1305 new_bufend_pos-old_bufend_pos);
1306 }
1307 return got_some;
1308}
1309
1310/*
1311=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1312
1313Looks ahead one (Unicode) character in the text currently being lexed.
1314Returns the codepoint (unsigned integer value) of the next character,
1315or -1 if lexing has reached the end of the input text. To consume the
1316peeked character, use L</lex_read_unichar>.
1317
1318If the next character is in (or extends into) the next chunk of input
1319text, the next chunk will be read in. Normally the current chunk will be
1320discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1321then the current chunk will not be discarded.
1322
1323If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1324is encountered, an exception is generated.
1325
1326=cut
1327*/
1328
1329I32
1330Perl_lex_peek_unichar(pTHX_ U32 flags)
1331{
749123ff 1332 dVAR;
f0e67a1d
Z
1333 char *s, *bufend;
1334 if (flags & ~(LEX_KEEP_PREVIOUS))
1335 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1336 s = PL_parser->bufptr;
1337 bufend = PL_parser->bufend;
1338 if (UTF) {
1339 U8 head;
1340 I32 unichar;
1341 STRLEN len, retlen;
1342 if (s == bufend) {
1343 if (!lex_next_chunk(flags))
1344 return -1;
1345 s = PL_parser->bufptr;
1346 bufend = PL_parser->bufend;
1347 }
1348 head = (U8)*s;
1349 if (!(head & 0x80))
1350 return head;
1351 if (head & 0x40) {
1352 len = PL_utf8skip[head];
1353 while ((STRLEN)(bufend-s) < len) {
1354 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1355 break;
1356 s = PL_parser->bufptr;
1357 bufend = PL_parser->bufend;
1358 }
1359 }
1360 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1361 if (retlen == (STRLEN)-1) {
1362 /* malformed UTF-8 */
1363 ENTER;
1364 SAVESPTR(PL_warnhook);
1365 PL_warnhook = PERL_WARNHOOK_FATAL;
1366 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1367 LEAVE;
1368 }
1369 return unichar;
1370 } else {
1371 if (s == bufend) {
1372 if (!lex_next_chunk(flags))
1373 return -1;
1374 s = PL_parser->bufptr;
1375 }
1376 return (U8)*s;
1377 }
1378}
1379
1380/*
1381=for apidoc Amx|I32|lex_read_unichar|U32 flags
1382
1383Reads the next (Unicode) character in the text currently being lexed.
1384Returns the codepoint (unsigned integer value) of the character read,
1385and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1386if lexing has reached the end of the input text. To non-destructively
1387examine the next character, use L</lex_peek_unichar> instead.
1388
1389If the next character is in (or extends into) the next chunk of input
1390text, the next chunk will be read in. Normally the current chunk will be
1391discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1392then the current chunk will not be discarded.
1393
1394If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1395is encountered, an exception is generated.
1396
1397=cut
1398*/
1399
1400I32
1401Perl_lex_read_unichar(pTHX_ U32 flags)
1402{
1403 I32 c;
1404 if (flags & ~(LEX_KEEP_PREVIOUS))
1405 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1406 c = lex_peek_unichar(flags);
1407 if (c != -1) {
1408 if (c == '\n')
1409 CopLINE_inc(PL_curcop);
1410 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1411 }
1412 return c;
1413}
1414
1415/*
1416=for apidoc Amx|void|lex_read_space|U32 flags
1417
1418Reads optional spaces, in Perl style, in the text currently being
1419lexed. The spaces may include ordinary whitespace characters and
1420Perl-style comments. C<#line> directives are processed if encountered.
1421L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1422at a non-space character (or the end of the input text).
1423
1424If spaces extend into the next chunk of input text, the next chunk will
1425be read in. Normally the current chunk will be discarded at the same
1426time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1427chunk will not be discarded.
1428
1429=cut
1430*/
1431
f0998909
Z
1432#define LEX_NO_NEXT_CHUNK 0x80000000
1433
f0e67a1d
Z
1434void
1435Perl_lex_read_space(pTHX_ U32 flags)
1436{
1437 char *s, *bufend;
1438 bool need_incline = 0;
f0998909 1439 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1440 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1441#ifdef PERL_MAD
1442 if (PL_skipwhite) {
1443 sv_free(PL_skipwhite);
1444 PL_skipwhite = NULL;
1445 }
1446 if (PL_madskills)
1447 PL_skipwhite = newSVpvs("");
1448#endif /* PERL_MAD */
1449 s = PL_parser->bufptr;
1450 bufend = PL_parser->bufend;
1451 while (1) {
1452 char c = *s;
1453 if (c == '#') {
1454 do {
1455 c = *++s;
1456 } while (!(c == '\n' || (c == 0 && s == bufend)));
1457 } else if (c == '\n') {
1458 s++;
1459 PL_parser->linestart = s;
1460 if (s == bufend)
1461 need_incline = 1;
1462 else
1463 incline(s);
1464 } else if (isSPACE(c)) {
1465 s++;
1466 } else if (c == 0 && s == bufend) {
1467 bool got_more;
1468#ifdef PERL_MAD
1469 if (PL_madskills)
1470 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1471#endif /* PERL_MAD */
f0998909
Z
1472 if (flags & LEX_NO_NEXT_CHUNK)
1473 break;
f0e67a1d
Z
1474 PL_parser->bufptr = s;
1475 CopLINE_inc(PL_curcop);
1476 got_more = lex_next_chunk(flags);
1477 CopLINE_dec(PL_curcop);
1478 s = PL_parser->bufptr;
1479 bufend = PL_parser->bufend;
1480 if (!got_more)
1481 break;
1482 if (need_incline && PL_parser->rsfp) {
1483 incline(s);
1484 need_incline = 0;
1485 }
1486 } else {
1487 break;
1488 }
1489 }
1490#ifdef PERL_MAD
1491 if (PL_madskills)
1492 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1493#endif /* PERL_MAD */
1494 PL_parser->bufptr = s;
1495}
1496
1497/*
ffb4593c
NT
1498 * S_incline
1499 * This subroutine has nothing to do with tilting, whether at windmills
1500 * or pinball tables. Its name is short for "increment line". It
57843af0 1501 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1502 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1503 * # line 500 "foo.pm"
1504 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1505 */
1506
76e3520e 1507STATIC void
d9095cec 1508S_incline(pTHX_ const char *s)
463ee0b2 1509{
97aff369 1510 dVAR;
d9095cec
NC
1511 const char *t;
1512 const char *n;
1513 const char *e;
8818d409 1514 line_t line_num;
463ee0b2 1515
7918f24d
NC
1516 PERL_ARGS_ASSERT_INCLINE;
1517
57843af0 1518 CopLINE_inc(PL_curcop);
463ee0b2
LW
1519 if (*s++ != '#')
1520 return;
d4c19fe8
AL
1521 while (SPACE_OR_TAB(*s))
1522 s++;
73659bf1
GS
1523 if (strnEQ(s, "line", 4))
1524 s += 4;
1525 else
1526 return;
084592ab 1527 if (SPACE_OR_TAB(*s))
73659bf1 1528 s++;
4e553d73 1529 else
73659bf1 1530 return;
d4c19fe8
AL
1531 while (SPACE_OR_TAB(*s))
1532 s++;
463ee0b2
LW
1533 if (!isDIGIT(*s))
1534 return;
d4c19fe8 1535
463ee0b2
LW
1536 n = s;
1537 while (isDIGIT(*s))
1538 s++;
07714eb4 1539 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1540 return;
bf4acbe4 1541 while (SPACE_OR_TAB(*s))
463ee0b2 1542 s++;
73659bf1 1543 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1544 s++;
73659bf1
GS
1545 e = t + 1;
1546 }
463ee0b2 1547 else {
c35e046a
AL
1548 t = s;
1549 while (!isSPACE(*t))
1550 t++;
73659bf1 1551 e = t;
463ee0b2 1552 }
bf4acbe4 1553 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1554 e++;
1555 if (*e != '\n' && *e != '\0')
1556 return; /* false alarm */
1557
8818d409
FC
1558 line_num = atoi(n)-1;
1559
f4dd75d9 1560 if (t - s > 0) {
d9095cec 1561 const STRLEN len = t - s;
19bad673
NC
1562 SV *const temp_sv = CopFILESV(PL_curcop);
1563 const char *cf;
1564 STRLEN tmplen;
1565
1566 if (temp_sv) {
1567 cf = SvPVX(temp_sv);
1568 tmplen = SvCUR(temp_sv);
1569 } else {
1570 cf = NULL;
1571 tmplen = 0;
1572 }
1573
42d9b98d 1574 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1575 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1576 * to *{"::_<newfilename"} */
44867030
NC
1577 /* However, the long form of evals is only turned on by the
1578 debugger - usually they're "(eval %lu)" */
1579 char smallbuf[128];
1580 char *tmpbuf;
1581 GV **gvp;
d9095cec 1582 STRLEN tmplen2 = len;
798b63bc 1583 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1584 tmpbuf = smallbuf;
1585 else
2ae0db35 1586 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1587 tmpbuf[0] = '_';
1588 tmpbuf[1] = '<';
2ae0db35 1589 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1590 tmplen += 2;
8a5ee598
RGS
1591 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1592 if (gvp) {
44867030
NC
1593 char *tmpbuf2;
1594 GV *gv2;
1595
1596 if (tmplen2 + 2 <= sizeof smallbuf)
1597 tmpbuf2 = smallbuf;
1598 else
1599 Newx(tmpbuf2, tmplen2 + 2, char);
1600
1601 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1602 /* Either they malloc'd it, or we malloc'd it,
1603 so no prefix is present in ours. */
1604 tmpbuf2[0] = '_';
1605 tmpbuf2[1] = '<';
1606 }
1607
1608 memcpy(tmpbuf2 + 2, s, tmplen2);
1609 tmplen2 += 2;
1610
8a5ee598 1611 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1612 if (!isGV(gv2)) {
8a5ee598 1613 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1614 /* adjust ${"::_<newfilename"} to store the new file name */
1615 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
8818d409
FC
1616 /* The line number may differ. If that is the case,
1617 alias the saved lines that are in the array.
1618 Otherwise alias the whole array. */
1619 if (CopLINE(PL_curcop) == line_num) {
1620 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1621 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1622 }
1623 else if (GvAV(*gvp)) {
1624 AV * const av = GvAV(*gvp);
1625 const I32 start = CopLINE(PL_curcop)+1;
1626 I32 items = AvFILLp(av) - start;
1627 if (items > 0) {
1628 AV * const av2 = GvAVn(gv2);
1629 SV **svp = AvARRAY(av) + start;
1630 I32 l = (I32)line_num+1;
1631 while (items--)
1632 av_store(av2, l++, SvREFCNT_inc(*svp++));
1633 }
1634 }
e5527e4b 1635 }
44867030
NC
1636
1637 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1638 }
e66cf94c 1639 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1640 }
05ec9bb3 1641 CopFILE_free(PL_curcop);
d9095cec 1642 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1643 }
8818d409 1644 CopLINE_set(PL_curcop, line_num);
463ee0b2
LW
1645}
1646
29595ff2 1647#ifdef PERL_MAD
cd81e915 1648/* skip space before PL_thistoken */
29595ff2
NC
1649
1650STATIC char *
1651S_skipspace0(pTHX_ register char *s)
1652{
7918f24d
NC
1653 PERL_ARGS_ASSERT_SKIPSPACE0;
1654
29595ff2
NC
1655 s = skipspace(s);
1656 if (!PL_madskills)
1657 return s;
cd81e915
NC
1658 if (PL_skipwhite) {
1659 if (!PL_thiswhite)
6b29d1f5 1660 PL_thiswhite = newSVpvs("");
cd81e915
NC
1661 sv_catsv(PL_thiswhite, PL_skipwhite);
1662 sv_free(PL_skipwhite);
1663 PL_skipwhite = 0;
1664 }
1665 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1666 return s;
1667}
1668
cd81e915 1669/* skip space after PL_thistoken */
29595ff2
NC
1670
1671STATIC char *
1672S_skipspace1(pTHX_ register char *s)
1673{
d4c19fe8 1674 const char *start = s;
29595ff2
NC
1675 I32 startoff = start - SvPVX(PL_linestr);
1676
7918f24d
NC
1677 PERL_ARGS_ASSERT_SKIPSPACE1;
1678
29595ff2
NC
1679 s = skipspace(s);
1680 if (!PL_madskills)
1681 return s;
1682 start = SvPVX(PL_linestr) + startoff;
cd81e915 1683 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1684 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1685 PL_thistoken = newSVpvn(tstart, start - tstart);
1686 }
1687 PL_realtokenstart = -1;
1688 if (PL_skipwhite) {
1689 if (!PL_nextwhite)
6b29d1f5 1690 PL_nextwhite = newSVpvs("");
cd81e915
NC
1691 sv_catsv(PL_nextwhite, PL_skipwhite);
1692 sv_free(PL_skipwhite);
1693 PL_skipwhite = 0;
29595ff2
NC
1694 }
1695 return s;
1696}
1697
1698STATIC char *
1699S_skipspace2(pTHX_ register char *s, SV **svp)
1700{
c35e046a
AL
1701 char *start;
1702 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1703 const I32 startoff = s - SvPVX(PL_linestr);
1704
7918f24d
NC
1705 PERL_ARGS_ASSERT_SKIPSPACE2;
1706
29595ff2
NC
1707 s = skipspace(s);
1708 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1709 if (!PL_madskills || !svp)
1710 return s;
1711 start = SvPVX(PL_linestr) + startoff;
cd81e915 1712 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1713 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1714 PL_thistoken = newSVpvn(tstart, start - tstart);
1715 PL_realtokenstart = -1;
29595ff2 1716 }
cd81e915 1717 if (PL_skipwhite) {
29595ff2 1718 if (!*svp)
6b29d1f5 1719 *svp = newSVpvs("");
cd81e915
NC
1720 sv_setsv(*svp, PL_skipwhite);
1721 sv_free(PL_skipwhite);
1722 PL_skipwhite = 0;
29595ff2
NC
1723 }
1724
1725 return s;
1726}
1727#endif
1728
80a702cd 1729STATIC void
15f169a1 1730S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1731{
1732 AV *av = CopFILEAVx(PL_curcop);
1733 if (av) {
b9f83d2f 1734 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1735 if (orig_sv)
1736 sv_setsv(sv, orig_sv);
1737 else
1738 sv_setpvn(sv, buf, len);
80a702cd
RGS
1739 (void)SvIOK_on(sv);
1740 SvIV_set(sv, 0);
1741 av_store(av, (I32)CopLINE(PL_curcop), sv);
1742 }
1743}
1744
ffb4593c
NT
1745/*
1746 * S_skipspace
1747 * Called to gobble the appropriate amount and type of whitespace.
1748 * Skips comments as well.
1749 */
1750
76e3520e 1751STATIC char *
cea2e8a9 1752S_skipspace(pTHX_ register char *s)
a687059c 1753{
5db06880 1754#ifdef PERL_MAD
f0e67a1d
Z
1755 char *start = s;
1756#endif /* PERL_MAD */
7918f24d 1757 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1758#ifdef PERL_MAD
cd81e915
NC
1759 if (PL_skipwhite) {
1760 sv_free(PL_skipwhite);
f0e67a1d 1761 PL_skipwhite = NULL;
5db06880 1762 }
f0e67a1d 1763#endif /* PERL_MAD */
3280af22 1764 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1765 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1766 s++;
f0e67a1d
Z
1767 } else {
1768 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1769 PL_bufptr = s;
f0998909
Z
1770 lex_read_space(LEX_KEEP_PREVIOUS |
1771 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1772 LEX_NO_NEXT_CHUNK : 0));
3280af22 1773 s = PL_bufptr;
f0e67a1d
Z
1774 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1775 if (PL_linestart > PL_bufptr)
1776 PL_bufptr = PL_linestart;
1777 return s;
463ee0b2 1778 }
5db06880 1779#ifdef PERL_MAD
f0e67a1d
Z
1780 if (PL_madskills)
1781 PL_skipwhite = newSVpvn(start, s-start);
1782#endif /* PERL_MAD */
5db06880 1783 return s;
a687059c 1784}
378cc40b 1785
ffb4593c
NT
1786/*
1787 * S_check_uni
1788 * Check the unary operators to ensure there's no ambiguity in how they're
1789 * used. An ambiguous piece of code would be:
1790 * rand + 5
1791 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1792 * the +5 is its argument.
1793 */
1794
76e3520e 1795STATIC void
cea2e8a9 1796S_check_uni(pTHX)
ba106d47 1797{
97aff369 1798 dVAR;
d4c19fe8
AL
1799 const char *s;
1800 const char *t;
2f3197b3 1801
3280af22 1802 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1803 return;
3280af22
NIS
1804 while (isSPACE(*PL_last_uni))
1805 PL_last_uni++;
c35e046a
AL
1806 s = PL_last_uni;
1807 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1808 s++;
3280af22 1809 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1810 return;
6136c704 1811
9b387841
NC
1812 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1813 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1814 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1815}
1816
ffb4593c
NT
1817/*
1818 * LOP : macro to build a list operator. Its behaviour has been replaced
1819 * with a subroutine, S_lop() for which LOP is just another name.
1820 */
1821
a0d0e21e
LW
1822#define LOP(f,x) return lop(f,x,s)
1823
ffb4593c
NT
1824/*
1825 * S_lop
1826 * Build a list operator (or something that might be one). The rules:
1827 * - if we have a next token, then it's a list operator [why?]
1828 * - if the next thing is an opening paren, then it's a function
1829 * - else it's a list operator
1830 */
1831
76e3520e 1832STATIC I32
a0be28da 1833S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1834{
97aff369 1835 dVAR;
7918f24d
NC
1836
1837 PERL_ARGS_ASSERT_LOP;
1838
6154021b 1839 pl_yylval.ival = f;
35c8bce7 1840 CLINE;
3280af22
NIS
1841 PL_expect = x;
1842 PL_bufptr = s;
1843 PL_last_lop = PL_oldbufptr;
eb160463 1844 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1845#ifdef PERL_MAD
1846 if (PL_lasttoke)
78cdf107 1847 goto lstop;
5db06880 1848#else
3280af22 1849 if (PL_nexttoke)
78cdf107 1850 goto lstop;
5db06880 1851#endif
79072805 1852 if (*s == '(')
bbf60fe6 1853 return REPORT(FUNC);
29595ff2 1854 s = PEEKSPACE(s);
79072805 1855 if (*s == '(')
bbf60fe6 1856 return REPORT(FUNC);
78cdf107
Z
1857 else {
1858 lstop:
1859 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1860 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 1861 return REPORT(LSTOP);
78cdf107 1862 }
79072805
LW
1863}
1864
5db06880
NC
1865#ifdef PERL_MAD
1866 /*
1867 * S_start_force
1868 * Sets up for an eventual force_next(). start_force(0) basically does
1869 * an unshift, while start_force(-1) does a push. yylex removes items
1870 * on the "pop" end.
1871 */
1872
1873STATIC void
1874S_start_force(pTHX_ int where)
1875{
1876 int i;
1877
cd81e915 1878 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1879 where = PL_lasttoke;
cd81e915
NC
1880 assert(PL_curforce < 0 || PL_curforce == where);
1881 if (PL_curforce != where) {
5db06880
NC
1882 for (i = PL_lasttoke; i > where; --i) {
1883 PL_nexttoke[i] = PL_nexttoke[i-1];
1884 }
1885 PL_lasttoke++;
1886 }
cd81e915 1887 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1888 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1889 PL_curforce = where;
1890 if (PL_nextwhite) {
5db06880 1891 if (PL_madskills)
6b29d1f5 1892 curmad('^', newSVpvs(""));
cd81e915 1893 CURMAD('_', PL_nextwhite);
5db06880
NC
1894 }
1895}
1896
1897STATIC void
1898S_curmad(pTHX_ char slot, SV *sv)
1899{
1900 MADPROP **where;
1901
1902 if (!sv)
1903 return;
cd81e915
NC
1904 if (PL_curforce < 0)
1905 where = &PL_thismad;
5db06880 1906 else
cd81e915 1907 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1908
cd81e915 1909 if (PL_faketokens)
76f68e9b 1910 sv_setpvs(sv, "");
5db06880
NC
1911 else {
1912 if (!IN_BYTES) {
1913 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1914 SvUTF8_on(sv);
1915 else if (PL_encoding) {
1916 sv_recode_to_utf8(sv, PL_encoding);
1917 }
1918 }
1919 }
1920
1921 /* keep a slot open for the head of the list? */
1922 if (slot != '_' && *where && (*where)->mad_key == '^') {
1923 (*where)->mad_key = slot;
daba3364 1924 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1925 (*where)->mad_val = (void*)sv;
1926 }
1927 else
1928 addmad(newMADsv(slot, sv), where, 0);
1929}
1930#else
b3f24c00
MHM
1931# define start_force(where) NOOP
1932# define curmad(slot, sv) NOOP
5db06880
NC
1933#endif
1934
ffb4593c
NT
1935/*
1936 * S_force_next
9cbb5ea2 1937 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1938 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1939 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1940 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1941 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1942 */
1943
4e553d73 1944STATIC void
cea2e8a9 1945S_force_next(pTHX_ I32 type)
79072805 1946{
97aff369 1947 dVAR;
704d4215
GG
1948#ifdef DEBUGGING
1949 if (DEBUG_T_TEST) {
1950 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1951 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1952 }
1953#endif
5db06880 1954#ifdef PERL_MAD
cd81e915 1955 if (PL_curforce < 0)
5db06880 1956 start_force(PL_lasttoke);
cd81e915 1957 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1958 if (PL_lex_state != LEX_KNOWNEXT)
1959 PL_lex_defer = PL_lex_state;
1960 PL_lex_state = LEX_KNOWNEXT;
1961 PL_lex_expect = PL_expect;
cd81e915 1962 PL_curforce = -1;
5db06880 1963#else
3280af22
NIS
1964 PL_nexttype[PL_nexttoke] = type;
1965 PL_nexttoke++;
1966 if (PL_lex_state != LEX_KNOWNEXT) {
1967 PL_lex_defer = PL_lex_state;
1968 PL_lex_expect = PL_expect;
1969 PL_lex_state = LEX_KNOWNEXT;
79072805 1970 }
5db06880 1971#endif
79072805
LW
1972}
1973
28ac2b49
Z
1974void
1975Perl_yyunlex(pTHX)
1976{
a7aaec61
Z
1977 int yyc = PL_parser->yychar;
1978 if (yyc != YYEMPTY) {
1979 if (yyc) {
1980 start_force(-1);
1981 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1982 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
78cdf107 1983 PL_lex_allbrackets--;
a7aaec61 1984 PL_lex_brackets--;
78cdf107
Z
1985 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1986 } else if (yyc == '('/*)*/) {
1987 PL_lex_allbrackets--;
1988 yyc |= (2<<24);
a7aaec61
Z
1989 }
1990 force_next(yyc);
1991 }
28ac2b49
Z
1992 PL_parser->yychar = YYEMPTY;
1993 }
1994}
1995
d0a148a6 1996STATIC SV *
15f169a1 1997S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1998{
97aff369 1999 dVAR;
740cce10 2000 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
2001 !IN_BYTES
2002 && UTF
2003 && !is_ascii_string((const U8*)start, len)
740cce10 2004 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
2005 return sv;
2006}
2007
ffb4593c
NT
2008/*
2009 * S_force_word
2010 * When the lexer knows the next thing is a word (for instance, it has
2011 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
2012 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2013 * lookahead.
ffb4593c
NT
2014 *
2015 * Arguments:
b1b65b59 2016 * char *start : buffer position (must be within PL_linestr)
02b34bbe 2017 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
2018 * int check_keyword : if true, Perl checks to make sure the word isn't
2019 * a keyword (do this if the word is a label, e.g. goto FOO)
2020 * int allow_pack : if true, : characters will also be allowed (require,
2021 * use, etc. do this)
9cbb5ea2 2022 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
2023 */
2024
76e3520e 2025STATIC char *
cea2e8a9 2026S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 2027{
97aff369 2028 dVAR;
463ee0b2
LW
2029 register char *s;
2030 STRLEN len;
4e553d73 2031
7918f24d
NC
2032 PERL_ARGS_ASSERT_FORCE_WORD;
2033
29595ff2 2034 start = SKIPSPACE1(start);
463ee0b2 2035 s = start;
7e2040f0 2036 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 2037 (allow_pack && *s == ':') ||
15f0808c 2038 (allow_initial_tick && *s == '\'') )
a0d0e21e 2039 {
3280af22 2040 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 2041 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 2042 return start;
cd81e915 2043 start_force(PL_curforce);
5db06880
NC
2044 if (PL_madskills)
2045 curmad('X', newSVpvn(start,s-start));
463ee0b2 2046 if (token == METHOD) {
29595ff2 2047 s = SKIPSPACE1(s);
463ee0b2 2048 if (*s == '(')
3280af22 2049 PL_expect = XTERM;
463ee0b2 2050 else {
3280af22 2051 PL_expect = XOPERATOR;
463ee0b2 2052 }
79072805 2053 }
e74e6b3d 2054 if (PL_madskills)
63575281 2055 curmad('g', newSVpvs( "forced" ));
9ded7720 2056 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
2057 = (OP*)newSVOP(OP_CONST,0,
2058 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 2059 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
2060 force_next(token);
2061 }
2062 return s;
2063}
2064
ffb4593c
NT
2065/*
2066 * S_force_ident
9cbb5ea2 2067 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2068 * text only contains the "foo" portion. The first argument is a pointer
2069 * to the "foo", and the second argument is the type symbol to prefix.
2070 * Forces the next token to be a "WORD".
9cbb5ea2 2071 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2072 */
2073
76e3520e 2074STATIC void
bfed75c6 2075S_force_ident(pTHX_ register const char *s, int kind)
79072805 2076{
97aff369 2077 dVAR;
7918f24d
NC
2078
2079 PERL_ARGS_ASSERT_FORCE_IDENT;
2080
c35e046a 2081 if (*s) {
90e5519e
NC
2082 const STRLEN len = strlen(s);
2083 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2084 start_force(PL_curforce);
9ded7720 2085 NEXTVAL_NEXTTOKE.opval = o;
79072805 2086 force_next(WORD);
748a9306 2087 if (kind) {
11343788 2088 o->op_private = OPpCONST_ENTERED;
55497cff 2089 /* XXX see note in pp_entereval() for why we forgo typo
2090 warnings if the symbol must be introduced in an eval.
2091 GSAR 96-10-12 */
90e5519e
NC
2092 gv_fetchpvn_flags(s, len,
2093 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2094 : GV_ADD,
2095 kind == '$' ? SVt_PV :
2096 kind == '@' ? SVt_PVAV :
2097 kind == '%' ? SVt_PVHV :
a0d0e21e 2098 SVt_PVGV
90e5519e 2099 );
748a9306 2100 }
79072805
LW
2101 }
2102}
2103
1571675a
GS
2104NV
2105Perl_str_to_version(pTHX_ SV *sv)
2106{
2107 NV retval = 0.0;
2108 NV nshift = 1.0;
2109 STRLEN len;
cfd0369c 2110 const char *start = SvPV_const(sv,len);
9d4ba2ae 2111 const char * const end = start + len;
504618e9 2112 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2113
2114 PERL_ARGS_ASSERT_STR_TO_VERSION;
2115
1571675a 2116 while (start < end) {
ba210ebe 2117 STRLEN skip;
1571675a
GS
2118 UV n;
2119 if (utf)
9041c2e3 2120 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2121 else {
2122 n = *(U8*)start;
2123 skip = 1;
2124 }
2125 retval += ((NV)n)/nshift;
2126 start += skip;
2127 nshift *= 1000;
2128 }
2129 return retval;
2130}
2131
4e553d73 2132/*
ffb4593c
NT
2133 * S_force_version
2134 * Forces the next token to be a version number.
e759cc13
RGS
2135 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2136 * and if "guessing" is TRUE, then no new token is created (and the caller
2137 * must use an alternative parsing method).
ffb4593c
NT
2138 */
2139
76e3520e 2140STATIC char *
e759cc13 2141S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2142{
97aff369 2143 dVAR;
5f66b61c 2144 OP *version = NULL;
44dcb63b 2145 char *d;
5db06880
NC
2146#ifdef PERL_MAD
2147 I32 startoff = s - SvPVX(PL_linestr);
2148#endif
89bfa8cd 2149
7918f24d
NC
2150 PERL_ARGS_ASSERT_FORCE_VERSION;
2151
29595ff2 2152 s = SKIPSPACE1(s);
89bfa8cd 2153
44dcb63b 2154 d = s;
dd629d5b 2155 if (*d == 'v')
44dcb63b 2156 d++;
44dcb63b 2157 if (isDIGIT(*d)) {
e759cc13
RGS
2158 while (isDIGIT(*d) || *d == '_' || *d == '.')
2159 d++;
5db06880
NC
2160#ifdef PERL_MAD
2161 if (PL_madskills) {
cd81e915 2162 start_force(PL_curforce);
5db06880
NC
2163 curmad('X', newSVpvn(s,d-s));
2164 }
2165#endif
4e4da3ac 2166 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2167 SV *ver;
8d08d9ba
DG
2168#ifdef USE_LOCALE_NUMERIC
2169 char *loc = setlocale(LC_NUMERIC, "C");
2170#endif
6154021b 2171 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2172#ifdef USE_LOCALE_NUMERIC
2173 setlocale(LC_NUMERIC, loc);
2174#endif
6154021b 2175 version = pl_yylval.opval;
dd629d5b
GS
2176 ver = cSVOPx(version)->op_sv;
2177 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2178 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2179 SvNV_set(ver, str_to_version(ver));
1571675a 2180 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2181 }
89bfa8cd 2182 }
5db06880
NC
2183 else if (guessing) {
2184#ifdef PERL_MAD
2185 if (PL_madskills) {
cd81e915
NC
2186 sv_free(PL_nextwhite); /* let next token collect whitespace */
2187 PL_nextwhite = 0;
5db06880
NC
2188 s = SvPVX(PL_linestr) + startoff;
2189 }
2190#endif
e759cc13 2191 return s;
5db06880 2192 }
89bfa8cd 2193 }
2194
5db06880
NC
2195#ifdef PERL_MAD
2196 if (PL_madskills && !version) {
cd81e915
NC
2197 sv_free(PL_nextwhite); /* let next token collect whitespace */
2198 PL_nextwhite = 0;
5db06880
NC
2199 s = SvPVX(PL_linestr) + startoff;
2200 }
2201#endif
89bfa8cd 2202 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2203 start_force(PL_curforce);
9ded7720 2204 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2205 force_next(WORD);
89bfa8cd 2206
e759cc13 2207 return s;
89bfa8cd 2208}
2209
ffb4593c 2210/*
91152fc1
DG
2211 * S_force_strict_version
2212 * Forces the next token to be a version number using strict syntax rules.
2213 */
2214
2215STATIC char *
2216S_force_strict_version(pTHX_ char *s)
2217{
2218 dVAR;
2219 OP *version = NULL;
2220#ifdef PERL_MAD
2221 I32 startoff = s - SvPVX(PL_linestr);
2222#endif
2223 const char *errstr = NULL;
2224
2225 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2226
2227 while (isSPACE(*s)) /* leading whitespace */
2228 s++;
2229
2230 if (is_STRICT_VERSION(s,&errstr)) {
2231 SV *ver = newSV(0);
2232 s = (char *)scan_version(s, ver, 0);
2233 version = newSVOP(OP_CONST, 0, ver);
2234 }
4e4da3ac
Z
2235 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2236 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2237 {
91152fc1
DG
2238 PL_bufptr = s;
2239 if (errstr)
2240 yyerror(errstr); /* version required */
2241 return s;
2242 }
2243
2244#ifdef PERL_MAD
2245 if (PL_madskills && !version) {
2246 sv_free(PL_nextwhite); /* let next token collect whitespace */
2247 PL_nextwhite = 0;
2248 s = SvPVX(PL_linestr) + startoff;
2249 }
2250#endif
2251 /* NOTE: The parser sees the package name and the VERSION swapped */
2252 start_force(PL_curforce);
2253 NEXTVAL_NEXTTOKE.opval = version;
2254 force_next(WORD);
2255
2256 return s;
2257}
2258
2259/*
ffb4593c
NT
2260 * S_tokeq
2261 * Tokenize a quoted string passed in as an SV. It finds the next
2262 * chunk, up to end of string or a backslash. It may make a new
2263 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2264 * turns \\ into \.
2265 */
2266
76e3520e 2267STATIC SV *
cea2e8a9 2268S_tokeq(pTHX_ SV *sv)
79072805 2269{
97aff369 2270 dVAR;
79072805
LW
2271 register char *s;
2272 register char *send;
2273 register char *d;
b3ac6de7
IZ
2274 STRLEN len = 0;
2275 SV *pv = sv;
79072805 2276
7918f24d
NC
2277 PERL_ARGS_ASSERT_TOKEQ;
2278
79072805 2279 if (!SvLEN(sv))
b3ac6de7 2280 goto finish;
79072805 2281
a0d0e21e 2282 s = SvPV_force(sv, len);
21a311ee 2283 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2284 goto finish;
463ee0b2 2285 send = s + len;
dcb21ed6
NC
2286 /* This is relying on the SV being "well formed" with a trailing '\0' */
2287 while (s < send && !(*s == '\\' && s[1] == '\\'))
79072805
LW
2288 s++;
2289 if (s == send)
b3ac6de7 2290 goto finish;
79072805 2291 d = s;
be4731d2 2292 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2293 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2294 }
79072805
LW
2295 while (s < send) {
2296 if (*s == '\\') {
a0d0e21e 2297 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2298 s++; /* all that, just for this */
2299 }
2300 *d++ = *s++;
2301 }
2302 *d = '\0';
95a20fc0 2303 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2304 finish:
3280af22 2305 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2306 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2307 return sv;
2308}
2309
ffb4593c
NT
2310/*
2311 * Now come three functions related to double-quote context,
2312 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2313 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2314 * interact with PL_lex_state, and create fake ( ... ) argument lists
2315 * to handle functions and concatenation.
2316 * They assume that whoever calls them will be setting up a fake
2317 * join call, because each subthing puts a ',' after it. This lets
2318 * "lower \luPpEr"
2319 * become
2320 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2321 *
2322 * (I'm not sure whether the spurious commas at the end of lcfirst's
2323 * arguments and join's arguments are created or not).
2324 */
2325
2326/*
2327 * S_sublex_start
6154021b 2328 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2329 *
2330 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2331 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2332 *
2333 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2334 *
2335 * Everything else becomes a FUNC.
2336 *
2337 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2338 * had an OP_CONST or OP_READLINE). This just sets us up for a
2339 * call to S_sublex_push().
2340 */
2341
76e3520e 2342STATIC I32
cea2e8a9 2343S_sublex_start(pTHX)
79072805 2344{
97aff369 2345 dVAR;
6154021b 2346 register const I32 op_type = pl_yylval.ival;
79072805
LW
2347
2348 if (op_type == OP_NULL) {
6154021b 2349 pl_yylval.opval = PL_lex_op;
5f66b61c 2350 PL_lex_op = NULL;
79072805
LW
2351 return THING;
2352 }
2353 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2354 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2355
2356 if (SvTYPE(sv) == SVt_PVIV) {
2357 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2358 STRLEN len;
96a5add6 2359 const char * const p = SvPV_const(sv, len);
740cce10 2360 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2361 SvREFCNT_dec(sv);
2362 sv = nsv;
4e553d73 2363 }
6154021b 2364 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2365 PL_lex_stuff = NULL;
6f33ba73
RGS
2366 /* Allow <FH> // "foo" */
2367 if (op_type == OP_READLINE)
2368 PL_expect = XTERMORDORDOR;
79072805
LW
2369 return THING;
2370 }
e3f73d4e
RGS
2371 else if (op_type == OP_BACKTICK && PL_lex_op) {
2372 /* readpipe() vas overriden */
2373 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2374 pl_yylval.opval = PL_lex_op;
9b201d7d 2375 PL_lex_op = NULL;
e3f73d4e
RGS
2376 PL_lex_stuff = NULL;
2377 return THING;
2378 }
79072805 2379
3280af22 2380 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2381 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2382 PL_sublex_info.sub_op = PL_lex_op;
2383 PL_lex_state = LEX_INTERPPUSH;
55497cff 2384
3280af22
NIS
2385 PL_expect = XTERM;
2386 if (PL_lex_op) {
6154021b 2387 pl_yylval.opval = PL_lex_op;
5f66b61c 2388 PL_lex_op = NULL;
55497cff 2389 return PMFUNC;
2390 }
2391 else
2392 return FUNC;
2393}
2394
ffb4593c
NT
2395/*
2396 * S_sublex_push
2397 * Create a new scope to save the lexing state. The scope will be
2398 * ended in S_sublex_done. Returns a '(', starting the function arguments
2399 * to the uc, lc, etc. found before.
2400 * Sets PL_lex_state to LEX_INTERPCONCAT.
2401 */
2402
76e3520e 2403STATIC I32
cea2e8a9 2404S_sublex_push(pTHX)
55497cff 2405{
27da23d5 2406 dVAR;
f46d017c 2407 ENTER;
55497cff 2408
3280af22 2409 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2410 SAVEBOOL(PL_lex_dojoin);
3280af22 2411 SAVEI32(PL_lex_brackets);
78cdf107
Z
2412 SAVEI32(PL_lex_allbrackets);
2413 SAVEI8(PL_lex_fakeeof);
3280af22
NIS
2414 SAVEI32(PL_lex_casemods);
2415 SAVEI32(PL_lex_starts);
651b5b28 2416 SAVEI8(PL_lex_state);
7766f137 2417 SAVEVPTR(PL_lex_inpat);
98246f1e 2418 SAVEI16(PL_lex_inwhat);
57843af0 2419 SAVECOPLINE(PL_curcop);
3280af22 2420 SAVEPPTR(PL_bufptr);
8452ff4b 2421 SAVEPPTR(PL_bufend);
3280af22
NIS
2422 SAVEPPTR(PL_oldbufptr);
2423 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2424 SAVEPPTR(PL_last_lop);
2425 SAVEPPTR(PL_last_uni);
3280af22
NIS
2426 SAVEPPTR(PL_linestart);
2427 SAVESPTR(PL_linestr);
8edd5f42
RGS
2428 SAVEGENERICPV(PL_lex_brackstack);
2429 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2430
2431 PL_linestr = PL_lex_stuff;
a0714e2c 2432 PL_lex_stuff = NULL;
3280af22 2433
9cbb5ea2
GS
2434 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2435 = SvPVX(PL_linestr);
3280af22 2436 PL_bufend += SvCUR(PL_linestr);
bd61b366 2437 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2438 SAVEFREESV(PL_linestr);
2439
2440 PL_lex_dojoin = FALSE;
2441 PL_lex_brackets = 0;
78cdf107
Z
2442 PL_lex_allbrackets = 0;
2443 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
a02a5408
JC
2444 Newx(PL_lex_brackstack, 120, char);
2445 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2446 PL_lex_casemods = 0;
2447 *PL_lex_casestack = '\0';
2448 PL_lex_starts = 0;
2449 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2450 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2451
2452 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
bb16bae8 2453 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
3280af22
NIS
2454 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2455 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2456 else
5f66b61c 2457 PL_lex_inpat = NULL;
79072805 2458
55497cff 2459 return '(';
79072805
LW
2460}
2461
ffb4593c
NT
2462/*
2463 * S_sublex_done
2464 * Restores lexer state after a S_sublex_push.
2465 */
2466
76e3520e 2467STATIC I32
cea2e8a9 2468S_sublex_done(pTHX)
79072805 2469{
27da23d5 2470 dVAR;
3280af22 2471 if (!PL_lex_starts++) {
396482e1 2472 SV * const sv = newSVpvs("");
9aa983d2
JH
2473 if (SvUTF8(PL_linestr))
2474 SvUTF8_on(sv);
3280af22 2475 PL_expect = XOPERATOR;
6154021b 2476 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2477 return THING;
2478 }
2479
3280af22
NIS
2480 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2481 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2482 return yylex();
79072805
LW
2483 }
2484
ffb4593c 2485 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
bb16bae8 2486 assert(PL_lex_inwhat != OP_TRANSR);
3280af22
NIS
2487 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2488 PL_linestr = PL_lex_repl;
2489 PL_lex_inpat = 0;
2490 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2491 PL_bufend += SvCUR(PL_linestr);
bd61b366 2492 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2493 SAVEFREESV(PL_linestr);
2494 PL_lex_dojoin = FALSE;
2495 PL_lex_brackets = 0;
78cdf107
Z
2496 PL_lex_allbrackets = 0;
2497 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
3280af22
NIS
2498 PL_lex_casemods = 0;
2499 *PL_lex_casestack = '\0';
2500 PL_lex_starts = 0;
25da4f38 2501 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2502 PL_lex_state = LEX_INTERPNORMAL;
2503 PL_lex_starts++;
e9fa98b2
HS
2504 /* we don't clear PL_lex_repl here, so that we can check later
2505 whether this is an evalled subst; that means we rely on the
2506 logic to ensure sublex_done() is called again only via the
2507 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2508 }
e9fa98b2 2509 else {
3280af22 2510 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2511 PL_lex_repl = NULL;
e9fa98b2 2512 }
79072805 2513 return ',';
ffed7fef
LW
2514 }
2515 else {
5db06880
NC
2516#ifdef PERL_MAD
2517 if (PL_madskills) {
cd81e915
NC
2518 if (PL_thiswhite) {
2519 if (!PL_endwhite)
6b29d1f5 2520 PL_endwhite = newSVpvs("");
cd81e915
NC
2521 sv_catsv(PL_endwhite, PL_thiswhite);
2522 PL_thiswhite = 0;
2523 }
2524 if (PL_thistoken)
76f68e9b 2525 sv_setpvs(PL_thistoken,"");
5db06880 2526 else
cd81e915 2527 PL_realtokenstart = -1;
5db06880
NC
2528 }
2529#endif
f46d017c 2530 LEAVE;
3280af22
NIS
2531 PL_bufend = SvPVX(PL_linestr);
2532 PL_bufend += SvCUR(PL_linestr);
2533 PL_expect = XOPERATOR;
09bef843 2534 PL_sublex_info.sub_inwhat = 0;
79072805 2535 return ')';
ffed7fef
LW
2536 }
2537}
2538
02aa26ce
NT
2539/*
2540 scan_const
2541
2542 Extracts a pattern, double-quoted string, or transliteration. This
2543 is terrifying code.
2544
94def140 2545 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2546 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2547 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2548
94def140
TS
2549 Returns a pointer to the character scanned up to. If this is
2550 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2551 successfully parsed), will leave an OP for the substring scanned
6154021b 2552 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2553 by looking at the next characters herself.
2554
02aa26ce
NT
2555 In patterns:
2556 backslashes:
ff3f963a 2557 constants: \N{NAME} only
02aa26ce
NT
2558 case and quoting: \U \Q \E
2559 stops on @ and $, but not for $ as tail anchor
2560
2561 In transliterations:
2562 characters are VERY literal, except for - not at the start or end
94def140
TS
2563 of the string, which indicates a range. If the range is in bytes,
2564 scan_const expands the range to the full set of intermediate
2565 characters. If the range is in utf8, the hyphen is replaced with
2566 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2567
2568 In double-quoted strings:
2569 backslashes:
2570 double-quoted style: \r and \n
ff3f963a 2571 constants: \x31, etc.
94def140 2572 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2573 case and quoting: \U \Q \E
2574 stops on @ and $
2575
2576 scan_const does *not* construct ops to handle interpolated strings.
2577 It stops processing as soon as it finds an embedded $ or @ variable
2578 and leaves it to the caller to work out what's going on.
2579
94def140
TS
2580 embedded arrays (whether in pattern or not) could be:
2581 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2582
2583 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2584
2585 $ in pattern could be $foo or could be tail anchor. Assumption:
2586 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2587 followed by one of "()| \r\n\t"
02aa26ce
NT
2588
2589 \1 (backreferences) are turned into $1
2590
2591 The structure of the code is
2592 while (there's a character to process) {
94def140
TS
2593 handle transliteration ranges
2594 skip regexp comments /(?#comment)/ and codes /(?{code})/
2595 skip #-initiated comments in //x patterns
2596 check for embedded arrays
02aa26ce
NT
2597 check for embedded scalars
2598 if (backslash) {
94def140 2599 deprecate \1 in substitution replacements
02aa26ce
NT
2600 handle string-changing backslashes \l \U \Q \E, etc.
2601 switch (what was escaped) {
94def140 2602 handle \- in a transliteration (becomes a literal -)
ff3f963a 2603 if a pattern and not \N{, go treat as regular character
94def140
TS
2604 handle \132 (octal characters)
2605 handle \x15 and \x{1234} (hex characters)
ff3f963a 2606 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2607 handle \cV (control characters)
2608 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2609 } (end switch)
77a135fe 2610 continue
02aa26ce 2611 } (end if backslash)
77a135fe 2612 handle regular character
02aa26ce 2613 } (end while character to read)
4e553d73 2614
02aa26ce
NT
2615*/
2616
76e3520e 2617STATIC char *
cea2e8a9 2618S_scan_const(pTHX_ char *start)
79072805 2619{
97aff369 2620 dVAR;
3280af22 2621 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2622 SV *sv = newSV(send - start); /* sv for the constant. See
2623 note below on sizing. */
02aa26ce
NT
2624 register char *s = start; /* start of the constant */
2625 register char *d = SvPVX(sv); /* destination for copies */
2626 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2627 bool didrange = FALSE; /* did we just finish a range? */
b953e60c
KW
2628 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2629 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
77a135fe
KW
2630 to be UTF8? But, this can
2631 show as true when the source
2632 isn't utf8, as for example
2633 when it is entirely composed
2634 of hex constants */
2635
2636 /* Note on sizing: The scanned constant is placed into sv, which is
2637 * initialized by newSV() assuming one byte of output for every byte of
2638 * input. This routine expects newSV() to allocate an extra byte for a
2639 * trailing NUL, which this routine will append if it gets to the end of
2640 * the input. There may be more bytes of input than output (eg., \N{LATIN
2641 * CAPITAL LETTER A}), or more output than input if the constant ends up
2642 * recoded to utf8, but each time a construct is found that might increase
2643 * the needed size, SvGROW() is called. Its size parameter each time is
2644 * based on the best guess estimate at the time, namely the length used so
2645 * far, plus the length the current construct will occupy, plus room for
2646 * the trailing NUL, plus one byte for every input byte still unscanned */
2647
012bcf8d 2648 UV uv;
4c3a8340
TS
2649#ifdef EBCDIC
2650 UV literal_endpoint = 0;
e294cc5d 2651 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2652#endif
012bcf8d 2653
7918f24d
NC
2654 PERL_ARGS_ASSERT_SCAN_CONST;
2655
bb16bae8 2656 assert(PL_lex_inwhat != OP_TRANSR);
2b9d42f0
NIS
2657 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2658 /* If we are doing a trans and we know we want UTF8 set expectation */
2659 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2660 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2661 }
2662
2663
79072805 2664 while (s < send || dorange) {
ff3f963a 2665
02aa26ce 2666 /* get transliterations out of the way (they're most literal) */
3280af22 2667 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2668 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2669 if (dorange) {
1ba5c669
JH
2670 I32 i; /* current expanded character */
2671 I32 min; /* first character in range */
2672 I32 max; /* last character in range */
02aa26ce 2673
e294cc5d
JH
2674#ifdef EBCDIC
2675 UV uvmax = 0;
2676#endif
2677
2678 if (has_utf8
2679#ifdef EBCDIC
2680 && !native_range
2681#endif
2682 ) {
9d4ba2ae 2683 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2684 char *e = d++;
2685 while (e-- > c)
2686 *(e + 1) = *e;
25716404 2687 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2688 /* mark the range as done, and continue */
2689 dorange = FALSE;
2690 didrange = TRUE;
2691 continue;
2692 }
2b9d42f0 2693
95a20fc0 2694 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2695#ifdef EBCDIC
2696 SvGROW(sv,
2697 SvLEN(sv) + (has_utf8 ?
2698 (512 - UTF_CONTINUATION_MARK +
2699 UNISKIP(0x100))
2700 : 256));
2701 /* How many two-byte within 0..255: 128 in UTF-8,
2702 * 96 in UTF-8-mod. */
2703#else
9cbb5ea2 2704 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2705#endif
9cbb5ea2 2706 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2707#ifdef EBCDIC
2708 if (has_utf8) {
2709 int j;
2710 for (j = 0; j <= 1; j++) {
2711 char * const c = (char*)utf8_hop((U8*)d, -1);
2712 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2713 if (j)
2714 min = (U8)uv;
2715 else if (uv < 256)
2716 max = (U8)uv;
2717 else {
2718 max = (U8)0xff; /* only to \xff */
2719 uvmax = uv; /* \x{100} to uvmax */
2720 }
2721 d = c; /* eat endpoint chars */
2722 }
2723 }
2724 else {
2725#endif
2726 d -= 2; /* eat the first char and the - */
2727 min = (U8)*d; /* first char in range */
2728 max = (U8)d[1]; /* last char in range */
2729#ifdef EBCDIC
2730 }
2731#endif
8ada0baa 2732
c2e66d9e 2733 if (min > max) {
01ec43d0 2734 Perl_croak(aTHX_
d1573ac7 2735 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2736 (char)min, (char)max);
c2e66d9e
GS
2737 }
2738
c7f1f016 2739#ifdef EBCDIC
4c3a8340
TS
2740 if (literal_endpoint == 2 &&
2741 ((isLOWER(min) && isLOWER(max)) ||
2742 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2743 if (isLOWER(min)) {
2744 for (i = min; i <= max; i++)
2745 if (isLOWER(i))
db42d148 2746 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2747 } else {
2748 for (i = min; i <= max; i++)
2749 if (isUPPER(i))
db42d148 2750 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2751 }
2752 }
2753 else
2754#endif
2755 for (i = min; i <= max; i++)
e294cc5d
JH
2756#ifdef EBCDIC
2757 if (has_utf8) {
2758 const U8 ch = (U8)NATIVE_TO_UTF(i);
2759 if (UNI_IS_INVARIANT(ch))
2760 *d++ = (U8)i;
2761 else {
2762 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2763 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2764 }
2765 }
2766 else
2767#endif
2768 *d++ = (char)i;
2769
2770#ifdef EBCDIC
2771 if (uvmax) {
2772 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2773 if (uvmax > 0x101)
2774 *d++ = (char)UTF_TO_NATIVE(0xff);
2775 if (uvmax > 0x100)
2776 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2777 }
2778#endif
02aa26ce
NT
2779
2780 /* mark the range as done, and continue */
79072805 2781 dorange = FALSE;
01ec43d0 2782 didrange = TRUE;
4c3a8340
TS
2783#ifdef EBCDIC
2784 literal_endpoint = 0;
2785#endif
79072805 2786 continue;
4e553d73 2787 }
02aa26ce
NT
2788
2789 /* range begins (ignore - as first or last char) */
79072805 2790 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2791 if (didrange) {
1fafa243 2792 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2793 }
e294cc5d
JH
2794 if (has_utf8
2795#ifdef EBCDIC
2796 && !native_range
2797#endif
2798 ) {
25716404 2799 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2800 s++;
2801 continue;
2802 }
79072805
LW
2803 dorange = TRUE;
2804 s++;
01ec43d0
GS
2805 }
2806 else {
2807 didrange = FALSE;
4c3a8340
TS
2808#ifdef EBCDIC
2809 literal_endpoint = 0;
e294cc5d 2810 native_range = TRUE;
4c3a8340 2811#endif
01ec43d0 2812 }
79072805 2813 }
02aa26ce
NT
2814
2815 /* if we get here, we're not doing a transliteration */
2816
0f5d15d6
IZ
2817 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2818 except for the last char, which will be done separately. */
3280af22 2819 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2820 if (s[2] == '#') {
e994fd66 2821 while (s+1 < send && *s != ')')
db42d148 2822 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2823 }
2824 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2825 || (s[2] == '?' && s[3] == '{'))
155aba94 2826 {
cc6b7395 2827 I32 count = 1;
0f5d15d6 2828 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2829 char c;
2830
d9f97599
GS
2831 while (count && (c = *regparse)) {
2832 if (c == '\\' && regparse[1])
2833 regparse++;
4e553d73 2834 else if (c == '{')
cc6b7395 2835 count++;
4e553d73 2836 else if (c == '}')
cc6b7395 2837 count--;
d9f97599 2838 regparse++;
cc6b7395 2839 }
e994fd66 2840 if (*regparse != ')')
5bdf89e7 2841 regparse--; /* Leave one char for continuation. */
0f5d15d6 2842 while (s < regparse)
db42d148 2843 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2844 }
748a9306 2845 }
02aa26ce
NT
2846
2847 /* likewise skip #-initiated comments in //x patterns */
3280af22 2848 else if (*s == '#' && PL_lex_inpat &&
73134a2e 2849 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
748a9306 2850 while (s+1 < send && *s != '\n')
db42d148 2851 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2852 }
02aa26ce 2853
5d1d4326 2854 /* check for embedded arrays
da6eedaa 2855 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2856 */
1749ea0d
TS
2857 else if (*s == '@' && s[1]) {
2858 if (isALNUM_lazy_if(s+1,UTF))
2859 break;
2860 if (strchr(":'{$", s[1]))
2861 break;
2862 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2863 break; /* in regexp, neither @+ nor @- are interpolated */
2864 }
02aa26ce
NT
2865
2866 /* check for embedded scalars. only stop if we're sure it's a
2867 variable.
2868 */
79072805 2869 else if (*s == '$') {
3280af22 2870 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2871 break;
77772344 2872 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2873 if (s[1] == '\\') {
2874 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2875 "Possible unintended interpolation of $\\ in regex");
77772344 2876 }
79072805 2877 break; /* in regexp, $ might be tail anchor */
77772344 2878 }
79072805 2879 }
02aa26ce 2880
2b9d42f0
NIS
2881 /* End of else if chain - OP_TRANS rejoin rest */
2882
02aa26ce 2883 /* backslashes */
79072805 2884 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2885 char* e; /* Can be used for ending '}', etc. */
2886
79072805 2887 s++;
02aa26ce 2888
7d0fc23c
KW
2889 /* warn on \1 - \9 in substitution replacements, but note that \11
2890 * is an octal; and \19 is \1 followed by '9' */
3280af22 2891 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2892 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2893 {
a2a5de95 2894 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2895 *--s = '$';
2896 break;
2897 }
02aa26ce
NT
2898
2899 /* string-change backslash escapes */
3280af22 2900 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2901 --s;
2902 break;
2903 }
ff3f963a
KW
2904 /* In a pattern, process \N, but skip any other backslash escapes.
2905 * This is because we don't want to translate an escape sequence
2906 * into a meta symbol and have the regex compiler use the meta
2907 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2908 * in spite of this, we do have to process \N here while the proper
2909 * charnames handler is in scope. See bugs #56444 and #62056.
2910 * There is a complication because \N in a pattern may also stand
2911 * for 'match a non-nl', and not mean a charname, in which case its
2912 * processing should be deferred to the regex compiler. To be a
2913 * charname it must be followed immediately by a '{', and not look
2914 * like \N followed by a curly quantifier, i.e., not something like
2915 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2916 * quantifier */
2917 else if (PL_lex_inpat
2918 && (*s != 'N'
2919 || s[1] != '{'
2920 || regcurly(s + 1)))
2921 {
cc74c5bd
TS
2922 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2923 goto default_action;
2924 }
02aa26ce 2925
79072805 2926 switch (*s) {
02aa26ce
NT
2927
2928 /* quoted - in transliterations */
79072805 2929 case '-':
3280af22 2930 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2931 *d++ = *s++;
2932 continue;
2933 }
2934 /* FALL THROUGH */
2935 default:
11b8faa4 2936 {
a2a5de95
NC
2937 if ((isALPHA(*s) || isDIGIT(*s)))
2938 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2939 "Unrecognized escape \\%c passed through",
2940 *s);
11b8faa4 2941 /* default action is to copy the quoted character */
f9a63242 2942 goto default_action;
11b8faa4 2943 }
02aa26ce 2944
632403cc 2945 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
2946 case '0': case '1': case '2': case '3':
2947 case '4': case '5': case '6': case '7':
ba210ebe 2948 {
53305cf1
NC
2949 I32 flags = 0;
2950 STRLEN len = 3;
77a135fe 2951 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2952 s += len;
2953 }
012bcf8d 2954 goto NUM_ESCAPE_INSERT;
02aa26ce 2955
f0a2b745
KW
2956 /* eg. \o{24} indicates the octal constant \024 */
2957 case 'o':
2958 {
2959 STRLEN len;
454155d9 2960 const char* error;
f0a2b745 2961
454155d9 2962 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
f0a2b745 2963 s += len;
454155d9 2964 if (! valid) {
f0a2b745
KW
2965 yyerror(error);
2966 continue;
2967 }
2968 goto NUM_ESCAPE_INSERT;
2969 }
2970
77a135fe 2971 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2972 case 'x':
a0ed51b3
LW
2973 ++s;
2974 if (*s == '{') {
9d4ba2ae 2975 char* const e = strchr(s, '}');
a4c04bdc
NC
2976 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2977 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2978 STRLEN len;
355860ce 2979
53305cf1 2980 ++s;
adaeee49 2981 if (!e) {
a0ed51b3 2982 yyerror("Missing right brace on \\x{}");
355860ce 2983 continue;
ba210ebe 2984 }
53305cf1 2985 len = e - s;
77a135fe 2986 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2987 s = e + 1;
a0ed51b3
LW
2988 }
2989 else {
ba210ebe 2990 {
53305cf1 2991 STRLEN len = 2;
a4c04bdc 2992 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2993 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2994 s += len;
2995 }
012bcf8d
GS
2996 }
2997
2998 NUM_ESCAPE_INSERT:
ff3f963a
KW
2999 /* Insert oct or hex escaped character. There will always be
3000 * enough room in sv since such escapes will be longer than any
3001 * UTF-8 sequence they can end up as, except if they force us
3002 * to recode the rest of the string into utf8 */
ba7cea30 3003
77a135fe 3004 /* Here uv is the ordinal of the next character being added in
ff3f963a 3005 * unicode (converted from native). */
77a135fe 3006 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 3007 if (!has_utf8 && uv > 255) {
77a135fe
KW
3008 /* Might need to recode whatever we have accumulated so
3009 * far if it contains any chars variant in utf8 or
3010 * utf-ebcdic. */
3011
3012 SvCUR_set(sv, d - SvPVX_const(sv));
3013 SvPOK_on(sv);
3014 *d = '\0';
77a135fe 3015 /* See Note on sizing above. */
7bf79863
KW
3016 sv_utf8_upgrade_flags_grow(sv,
3017 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3018 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
3019 d = SvPVX(sv) + SvCUR(sv);
3020 has_utf8 = TRUE;
012bcf8d
GS
3021 }
3022
77a135fe
KW
3023 if (has_utf8) {
3024 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
3025 if (PL_lex_inwhat == OP_TRANS &&
3026 PL_sublex_info.sub_op) {
3027 PL_sublex_info.sub_op->op_private |=
3028 (PL_lex_repl ? OPpTRANS_FROM_UTF
3029 : OPpTRANS_TO_UTF);
f9a63242 3030 }
e294cc5d
JH
3031#ifdef EBCDIC
3032 if (uv > 255 && !dorange)
3033 native_range = FALSE;
3034#endif
012bcf8d 3035 }
a0ed51b3 3036 else {
012bcf8d 3037 *d++ = (char)uv;
a0ed51b3 3038 }
012bcf8d
GS
3039 }
3040 else {
c4d5f83a 3041 *d++ = (char) uv;
a0ed51b3 3042 }
79072805 3043 continue;
02aa26ce 3044
4a2d328f 3045 case 'N':
ff3f963a
KW
3046 /* In a non-pattern \N must be a named character, like \N{LATIN
3047 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3048 * mean to match a non-newline. For non-patterns, named
3049 * characters are converted to their string equivalents. In
3050 * patterns, named characters are not converted to their
3051 * ultimate forms for the same reasons that other escapes
3052 * aren't. Instead, they are converted to the \N{U+...} form
3053 * to get the value from the charnames that is in effect right
3054 * now, while preserving the fact that it was a named character
3055 * so that the regex compiler knows this */
3056
3057 /* This section of code doesn't generally use the
3058 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3059 * a close examination of this macro and determined it is a
3060 * no-op except on utfebcdic variant characters. Every
3061 * character generated by this that would normally need to be
3062 * enclosed by this macro is invariant, so the macro is not
7538f724
KW
3063 * needed, and would complicate use of copy(). XXX There are
3064 * other parts of this file where the macro is used
3065 * inconsistently, but are saved by it being a no-op */
ff3f963a
KW
3066
3067 /* The structure of this section of code (besides checking for
3068 * errors and upgrading to utf8) is:
3069 * Further disambiguate between the two meanings of \N, and if
3070 * not a charname, go process it elsewhere
0a96133f
KW
3071 * If of form \N{U+...}, pass it through if a pattern;
3072 * otherwise convert to utf8
3073 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3074 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3075
3076 /* Here, s points to the 'N'; the test below is guaranteed to
3077 * succeed if we are being called on a pattern as we already
3078 * know from a test above that the next character is a '{'.
3079 * On a non-pattern \N must mean 'named sequence, which
3080 * requires braces */
3081 s++;
3082 if (*s != '{') {
3083 yyerror("Missing braces on \\N{}");
3084 continue;
3085 }
3086 s++;
3087
0a96133f 3088 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3089 if (! (e = strchr(s, '}'))) {
3090 if (! PL_lex_inpat) {
5777a3f7 3091 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3092 } else {
3093 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3094 }
0a96133f 3095 continue;
ff3f963a 3096 }
cddc7ef4 3097
ff3f963a 3098 /* Here it looks like a named character */
cddc7ef4 3099
ff3f963a
KW
3100 if (PL_lex_inpat) {
3101
3102 /* XXX This block is temporary code. \N{} implies that the
3103 * pattern is to have Unicode semantics, and therefore
3104 * currently has to be encoded in utf8. By putting it in
3105 * utf8 now, we save a whole pass in the regular expression
3106 * compiler. Once that code is changed so Unicode
3107 * semantics doesn't necessarily have to be in utf8, this
da3a4baf
KW
3108 * block should be removed. However, the code that parses
3109 * the output of this would have to be changed to not
3110 * necessarily expect utf8 */
ff3f963a 3111 if (!has_utf8) {
77a135fe 3112 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3113 SvPOK_on(sv);
e4f3eed8 3114 *d = '\0';
77a135fe 3115 /* See Note on sizing above. */
7bf79863 3116 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3117 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3118 /* 5 = '\N{' + cur char + NUL */
3119 (STRLEN)(send - s) + 5);
f08d6ad9 3120 d = SvPVX(sv) + SvCUR(sv);
89491803 3121 has_utf8 = TRUE;
ff3f963a
KW
3122 }
3123 }
423cee85 3124
ff3f963a
KW
3125 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3126 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3127 | PERL_SCAN_DISALLOW_PREFIX;
3128 STRLEN len;
3129
3130 /* For \N{U+...}, the '...' is a unicode value even on
3131 * EBCDIC machines */
3132 s += 2; /* Skip to next char after the 'U+' */
3133 len = e - s;
3134 uv = grok_hex(s, &len, &flags, NULL);
3135 if (len == 0 || len != (STRLEN)(e - s)) {
3136 yyerror("Invalid hexadecimal number in \\N{U+...}");
3137 s = e + 1;
3138 continue;
3139 }
3140
3141 if (PL_lex_inpat) {
3142
e2a7e165
KW
3143 /* On non-EBCDIC platforms, pass through to the regex
3144 * compiler unchanged. The reason we evaluated the
3145 * number above is to make sure there wasn't a syntax
3146 * error. But on EBCDIC we convert to native so
3147 * downstream code can continue to assume it's native
3148 */
ff3f963a 3149 s -= 5; /* Include the '\N{U+' */
e2a7e165
KW
3150#ifdef EBCDIC
3151 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3152 and the \0 */
3153 "\\N{U+%X}",
3154 (unsigned int) UNI_TO_NATIVE(uv));
3155#else
ff3f963a
KW
3156 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3157 d += e - s + 1;
e2a7e165 3158#endif
ff3f963a
KW
3159 }
3160 else { /* Not a pattern: convert the hex to string */
3161
3162 /* If destination is not in utf8, unconditionally
3163 * recode it to be so. This is because \N{} implies
3164 * Unicode semantics, and scalars have to be in utf8
3165 * to guarantee those semantics */
3166 if (! has_utf8) {
3167 SvCUR_set(sv, d - SvPVX_const(sv));
3168 SvPOK_on(sv);
3169 *d = '\0';
3170 /* See Note on sizing above. */
3171 sv_utf8_upgrade_flags_grow(
3172 sv,
3173 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3174 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3175 d = SvPVX(sv) + SvCUR(sv);
3176 has_utf8 = TRUE;
3177 }
3178
3179 /* Add the string to the output */
3180 if (UNI_IS_INVARIANT(uv)) {
3181 *d++ = (char) uv;
3182 }
3183 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3184 }
3185 }
3186 else { /* Here is \N{NAME} but not \N{U+...}. */
3187
3188 SV *res; /* result from charnames */
3189 const char *str; /* the string in 'res' */
3190 STRLEN len; /* its length */
3191
3192 /* Get the value for NAME */
3193 res = newSVpvn(s, e - s);
3194 res = new_constant( NULL, 0, "charnames",
3195 /* includes all of: \N{...} */
3196 res, NULL, s - 3, e - s + 4 );
3197
3198 /* Most likely res will be in utf8 already since the
3199 * standard charnames uses pack U, but a custom translator
3200 * can leave it otherwise, so make sure. XXX This can be
3201 * revisited to not have charnames use utf8 for characters
3202 * that don't need it when regexes don't have to be in utf8
3203 * for Unicode semantics. If doing so, remember EBCDIC */
3204 sv_utf8_upgrade(res);
3205 str = SvPV_const(res, len);
3206
3207 /* Don't accept malformed input */
3208 if (! is_utf8_string((U8 *) str, len)) {
3209 yyerror("Malformed UTF-8 returned by \\N");
3210 }
3211 else if (PL_lex_inpat) {
3212
3213 if (! len) { /* The name resolved to an empty string */
3214 Copy("\\N{}", d, 4, char);
3215 d += 4;
3216 }
3217 else {
3218 /* In order to not lose information for the regex
3219 * compiler, pass the result in the specially made
3220 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3221 * the code points in hex of each character
3222 * returned by charnames */
3223
3224 const char *str_end = str + len;
3225 STRLEN char_length; /* cur char's byte length */
3226 STRLEN output_length; /* and the number of bytes
3227 after this is translated
3228 into hex digits */
3229 const STRLEN off = d - SvPVX_const(sv);
3230
3231 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3232 * max('U+', '.'); and 1 for NUL */
3233 char hex_string[2 * UTF8_MAXBYTES + 5];
3234
3235 /* Get the first character of the result. */
3236 U32 uv = utf8n_to_uvuni((U8 *) str,
3237 len,
3238 &char_length,
3239 UTF8_ALLOW_ANYUV);
3240
3241 /* The call to is_utf8_string() above hopefully
3242 * guarantees that there won't be an error. But
3243 * it's easy here to make sure. The function just
3244 * above warns and returns 0 if invalid utf8, but
3245 * it can also return 0 if the input is validly a
3246 * NUL. Disambiguate */
3247 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3248 uv = UNICODE_REPLACEMENT;
3249 }
3250
3251 /* Convert first code point to hex, including the
e2a7e165
KW
3252 * boiler plate before it. For all these, we
3253 * convert to native format so that downstream code
3254 * can continue to assume the input is native */
78c35590 3255 output_length =
3353de27 3256 my_snprintf(hex_string, sizeof(hex_string),
e2a7e165
KW
3257 "\\N{U+%X",
3258 (unsigned int) UNI_TO_NATIVE(uv));
ff3f963a
KW
3259
3260 /* Make sure there is enough space to hold it */
3261 d = off + SvGROW(sv, off
3262 + output_length
3263 + (STRLEN)(send - e)
3264 + 2); /* '}' + NUL */
3265 /* And output it */
3266 Copy(hex_string, d, output_length, char);
3267 d += output_length;
3268
3269 /* For each subsequent character, append dot and
3270 * its ordinal in hex */
3271 while ((str += char_length) < str_end) {
3272 const STRLEN off = d - SvPVX_const(sv);
3273 U32 uv = utf8n_to_uvuni((U8 *) str,
3274 str_end - str,
3275 &char_length,
3276 UTF8_ALLOW_ANYUV);
3277 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3278 uv = UNICODE_REPLACEMENT;
3279 }
3280
78c35590 3281 output_length =
3353de27 3282 my_snprintf(hex_string, sizeof(hex_string),
e2a7e165
KW
3283 ".%X",
3284 (unsigned int) UNI_TO_NATIVE(uv));
ff3f963a
KW
3285
3286 d = off + SvGROW(sv, off
3287 + output_length
3288 + (STRLEN)(send - e)
3289 + 2); /* '}' + NUL */
3290 Copy(hex_string, d, output_length, char);
3291 d += output_length;
3292 }
3293
3294 *d++ = '}'; /* Done. Add the trailing brace */
3295 }
3296 }
3297 else { /* Here, not in a pattern. Convert the name to a
3298 * string. */
3299
3300 /* If destination is not in utf8, unconditionally
3301 * recode it to be so. This is because \N{} implies
3302 * Unicode semantics, and scalars have to be in utf8
3303 * to guarantee those semantics */
3304 if (! has_utf8) {
3305 SvCUR_set(sv, d - SvPVX_const(sv));
3306 SvPOK_on(sv);
3307 *d = '\0';
3308 /* See Note on sizing above. */
3309 sv_utf8_upgrade_flags_grow(sv,
3310 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3311 len + (STRLEN)(send - s) + 1);
3312 d = SvPVX(sv) + SvCUR(sv);
3313 has_utf8 = TRUE;
3314 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3315
3316 /* See Note on sizing above. (NOTE: SvCUR() is not
3317 * set correctly here). */
3318 const STRLEN off = d - SvPVX_const(sv);
3319 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3320 }
3321 Copy(str, d, len, char);
3322 d += len;
423cee85 3323 }
423cee85 3324 SvREFCNT_dec(res);
cb233ae3
KW
3325
3326 /* Deprecate non-approved name syntax */
3327 if (ckWARN_d(WARN_DEPRECATED)) {
3328 bool problematic = FALSE;
3329 char* i = s;
3330
3331 /* For non-ut8 input, look to see that the first
3332 * character is an alpha, then loop through the rest
3333 * checking that each is a continuation */
3334 if (! this_utf8) {
3335 if (! isALPHAU(*i)) problematic = TRUE;
3336 else for (i = s + 1; i < e; i++) {
3337 if (isCHARNAME_CONT(*i)) continue;
3338 problematic = TRUE;
3339 break;
3340 }
3341 }
3342 else {
3343 /* Similarly for utf8. For invariants can check
3344 * directly. We accept anything above the latin1
3345 * range because it is immaterial to Perl if it is
3346 * correct or not, and is expensive to check. But
3347 * it is fairly easy in the latin1 range to convert
3348 * the variants into a single character and check
3349 * those */
3350 if (UTF8_IS_INVARIANT(*i)) {
3351 if (! isALPHAU(*i)) problematic = TRUE;
3352 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
81c14aa2 3353 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
cb233ae3
KW
3354 *(i+1)))))
3355 {
3356 problematic = TRUE;
3357 }
3358 }
3359 if (! problematic) for (i = s + UTF8SKIP(s);
3360 i < e;
3361 i+= UTF8SKIP(i))
3362 {
3363 if (UTF8_IS_INVARIANT(*i)) {
3364 if (isCHARNAME_CONT(*i)) continue;
3365 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3366 continue;
3367 } else if (isCHARNAME_CONT(
3368 UNI_TO_NATIVE(
81c14aa2 3369 TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
cb233ae3
KW
3370 {
3371 continue;
3372 }
3373 problematic = TRUE;
3374 break;
3375 }
3376 }
3377 if (problematic) {
6e1bad6c
KW
3378 /* The e-i passed to the final %.*s makes sure that
3379 * should the trailing NUL be missing that this
3380 * print won't run off the end of the string */
cb233ae3 3381 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3382 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3383 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3384 }
3385 }
3386 } /* End \N{NAME} */
ff3f963a
KW
3387#ifdef EBCDIC
3388 if (!dorange)
3389 native_range = FALSE; /* \N{} is defined to be Unicode */
3390#endif
3391 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3392 continue;
3393
02aa26ce 3394 /* \c is a control character */
79072805
LW
3395 case 'c':
3396 s++;
961ce445 3397 if (s < send) {
17a3df4c 3398 *d++ = grok_bslash_c(*s++, has_utf8, 1);
ba210ebe 3399 }
961ce445
RGS
3400 else {
3401 yyerror("Missing control char name in \\c");
3402 }
79072805 3403 continue;
02aa26ce
NT
3404
3405 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3406 case 'b':
db42d148 3407 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3408 break;
3409 case 'n':
db42d148 3410 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3411 break;
3412 case 'r':
db42d148 3413 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3414 break;
3415 case 'f':
db42d148 3416 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3417 break;
3418 case 't':
db42d148 3419 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3420 break;
34a3fe2a 3421 case 'e':
db42d148 3422 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3423 break;
3424 case 'a':
db42d148 3425 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3426 break;
02aa26ce
NT
3427 } /* end switch */
3428
79072805
LW
3429 s++;
3430 continue;
02aa26ce 3431 } /* end if (backslash) */
4c3a8340
TS
3432#ifdef EBCDIC
3433 else
3434 literal_endpoint++;
3435#endif
02aa26ce 3436
f9a63242 3437 default_action:
77a135fe
KW
3438 /* If we started with encoded form, or already know we want it,
3439 then encode the next character */
3440 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3441 STRLEN len = 1;
77a135fe
KW
3442
3443
3444 /* One might think that it is wasted effort in the case of the
3445 * source being utf8 (this_utf8 == TRUE) to take the next character
3446 * in the source, convert it to an unsigned value, and then convert
3447 * it back again. But the source has not been validated here. The
3448 * routine that does the conversion checks for errors like
3449 * malformed utf8 */
3450
5f66b61c
AL
3451 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3452 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3453 if (!has_utf8) {
3454 SvCUR_set(sv, d - SvPVX_const(sv));
3455 SvPOK_on(sv);
3456 *d = '\0';
77a135fe 3457 /* See Note on sizing above. */
7bf79863
KW
3458 sv_utf8_upgrade_flags_grow(sv,
3459 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3460 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3461 d = SvPVX(sv) + SvCUR(sv);
3462 has_utf8 = TRUE;
3463 } else if (need > len) {
3464 /* encoded value larger than old, may need extra space (NOTE:
3465 * SvCUR() is not set correctly here). See Note on sizing
3466 * above. */
9d4ba2ae 3467 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3468 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3469 }
77a135fe
KW
3470 s += len;
3471
5f66b61c 3472 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3473#ifdef EBCDIC
3474 if (uv > 255 && !dorange)
3475 native_range = FALSE;
3476#endif
2b9d42f0
NIS
3477 }
3478 else {
3479 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3480 }
02aa26ce
NT
3481 } /* while loop to process each character */
3482
3483 /* terminate the string and set up the sv */
79072805 3484 *d = '\0';
95a20fc0 3485 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3486 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3487 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3488
79072805 3489 SvPOK_on(sv);
9f4817db 3490 if (PL_encoding && !has_utf8) {
d0063567
DK
3491 sv_recode_to_utf8(sv, PL_encoding);
3492 if (SvUTF8(sv))
3493 has_utf8 = TRUE;
9f4817db 3494 }
2b9d42f0 3495 if (has_utf8) {
7e2040f0 3496 SvUTF8_on(sv);
2b9d42f0 3497 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3498 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3499 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3500 }
3501 }
79072805 3502
02aa26ce 3503 /* shrink the sv if we allocated more than we used */
79072805 3504 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3505 SvPV_shrink_to_cur(sv);
79072805 3506 }
02aa26ce 3507
6154021b 3508 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3509 if (s > PL_bufptr) {
eb0d8d16
NC
3510 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3511 const char *const key = PL_lex_inpat ? "qr" : "q";
3512 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3513 const char *type;
3514 STRLEN typelen;
3515
3516 if (PL_lex_inwhat == OP_TRANS) {
3517 type = "tr";
3518 typelen = 2;
3519 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3520 type = "s";
3521 typelen = 1;
3522 } else {
3523 type = "qq";
3524 typelen = 2;
3525 }
3526
3527 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3528 type, typelen);
3529 }
6154021b 3530 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3531 } else
8990e307 3532 SvREFCNT_dec(sv);
79072805
LW
3533 return s;
3534}
3535
ffb4593c
NT
3536/* S_intuit_more
3537 * Returns TRUE if there's more to the expression (e.g., a subscript),
3538 * FALSE otherwise.
ffb4593c
NT
3539 *
3540 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3541 *
3542 * ->[ and ->{ return TRUE
3543 * { and [ outside a pattern are always subscripts, so return TRUE
3544 * if we're outside a pattern and it's not { or [, then return FALSE
3545 * if we're in a pattern and the first char is a {
3546 * {4,5} (any digits around the comma) returns FALSE
3547 * if we're in a pattern and the first char is a [
3548 * [] returns FALSE
3549 * [SOMETHING] has a funky algorithm to decide whether it's a
3550 * character class or not. It has to deal with things like
3551 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3552 * anything else returns TRUE
3553 */
3554
9cbb5ea2
GS
3555/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3556
76e3520e 3557STATIC int
cea2e8a9 3558S_intuit_more(pTHX_ register char *s)
79072805 3559{
97aff369 3560 dVAR;
7918f24d
NC
3561
3562 PERL_ARGS_ASSERT_INTUIT_MORE;
3563
3280af22 3564 if (PL_lex_brackets)
79072805
LW
3565 return TRUE;
3566 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3567 return TRUE;
3568 if (*s != '{' && *s != '[')
3569 return FALSE;
3280af22 3570 if (!PL_lex_inpat)
79072805
LW
3571 return TRUE;
3572
3573 /* In a pattern, so maybe we have {n,m}. */
3574 if (*s == '{') {
b3155d95 3575 if (regcurly(s)) {
79072805 3576 return FALSE;
b3155d95 3577 }
79072805 3578 return TRUE;
79072805
LW
3579 }
3580
3581 /* On the other hand, maybe we have a character class */
3582
3583 s++;
3584 if (*s == ']' || *s == '^')
3585 return FALSE;
3586 else {
ffb4593c 3587 /* this is terrifying, and it works */
79072805
LW
3588 int weight = 2; /* let's weigh the evidence */
3589 char seen[256];
f27ffc4a 3590 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3591 const char * const send = strchr(s,']');
3280af22 3592 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3593
3594 if (!send) /* has to be an expression */
3595 return TRUE;
3596
3597 Zero(seen,256,char);
3598 if (*s == '$')
3599 weight -= 3;
3600 else if (isDIGIT(*s)) {
3601 if (s[1] != ']') {
3602 if (isDIGIT(s[1]) && s[2] == ']')
3603 weight -= 10;
3604 }
3605 else
3606 weight -= 100;
3607 }
3608 for (; s < send; s++) {
3609 last_un_char = un_char;
3610 un_char = (unsigned char)*s;
3611 switch (*s) {
3612 case '@':
3613 case '&':
3614 case '$':
3615 weight -= seen[un_char] * 10;
7e2040f0 3616 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3617 int len;
8903cb82 3618 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3619 len = (int)strlen(tmpbuf);
3620 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3621 weight -= 100;
3622 else
3623 weight -= 10;
3624 }
3625 else if (*s == '$' && s[1] &&
93a17b20
LW
3626 strchr("[#!%*<>()-=",s[1])) {
3627 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3628 weight -= 10;
3629 else
3630 weight -= 1;
3631 }
3632 break;
3633 case '\\':
3634 un_char = 254;
3635 if (s[1]) {
93a17b20 3636 if (strchr("wds]",s[1]))
79072805 3637 weight += 100;
10edeb5d 3638 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3639 weight += 1;
93a17b20 3640 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3641 weight += 40;
3642 else if (isDIGIT(s[1])) {
3643 weight += 40;
3644 while (s[1] && isDIGIT(s[1]))
3645 s++;
3646 }
3647 }
3648 else
3649 weight += 100;
3650 break;
3651 case '-':
3652 if (s[1] == '\\')
3653 weight += 50;
93a17b20 3654 if (strchr("aA01! ",last_un_char))
79072805 3655 weight += 30;
93a17b20 3656 if (strchr("zZ79~",s[1]))
79072805 3657 weight += 30;
f27ffc4a
GS
3658 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3659 weight -= 5; /* cope with negative subscript */
79072805
LW
3660 break;
3661 default:
3792a11b
NC
3662 if (!isALNUM(last_un_char)
3663 && !(last_un_char == '$' || last_un_char == '@'
3664 || last_un_char == '&')
3665 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3666 char *d = tmpbuf;
3667 while (isALPHA(*s))
3668 *d++ = *s++;
3669 *d = '\0';
5458a98a 3670 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3671 weight -= 150;
3672 }
3673 if (un_char == last_un_char + 1)
3674 weight += 5;
3675 weight -= seen[un_char];
3676 break;
3677 }
3678 seen[un_char]++;
3679 }
3680 if (weight >= 0) /* probably a character class */
3681 return FALSE;
3682 }
3683
3684 return TRUE;
3685}
ffed7fef 3686
ffb4593c
NT
3687/*
3688 * S_intuit_method
3689 *
3690 * Does all the checking to disambiguate
3691 * foo bar
3692 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3693 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3694 *
3695 * First argument is the stuff after the first token, e.g. "bar".
3696 *
3697 * Not a method if bar is a filehandle.
3698 * Not a method if foo is a subroutine prototyped to take a filehandle.
3699 * Not a method if it's really "Foo $bar"
3700 * Method if it's "foo $bar"
3701 * Not a method if it's really "print foo $bar"
3702 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3703 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3704 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3705 * =>
3706 */
3707
76e3520e 3708STATIC int
62d55b22 3709S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3710{
97aff369 3711 dVAR;
a0d0e21e 3712 char *s = start + (*start == '$');
3280af22 3713 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3714 STRLEN len;
3715 GV* indirgv;
5db06880
NC
3716#ifdef PERL_MAD
3717 int soff;
3718#endif
a0d0e21e 3719
7918f24d
NC
3720 PERL_ARGS_ASSERT_INTUIT_METHOD;
3721
a0d0e21e 3722 if (gv) {
62d55b22 3723 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3724 return 0;
62d55b22
NC
3725 if (cv) {
3726 if (SvPOK(cv)) {
3727 const char *proto = SvPVX_const(cv);
3728 if (proto) {
3729 if (*proto == ';')
3730 proto++;
3731 if (*proto == '*')
3732 return 0;
3733 }
b6c543e3
IZ
3734 }
3735 } else
c35e046a 3736 gv = NULL;
a0d0e21e 3737 }
8903cb82 3738 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3739 /* start is the beginning of the possible filehandle/object,
3740 * and s is the end of it
3741 * tmpbuf is a copy of it
3742 */
3743
a0d0e21e 3744 if (*start == '$') {
3ef1310e
RGS
3745 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3746 isUPPER(*PL_tokenbuf))
a0d0e21e 3747 return 0;
5db06880
NC
3748#ifdef PERL_MAD
3749 len = start - SvPVX(PL_linestr);
3750#endif
29595ff2 3751 s = PEEKSPACE(s);
f0092767 3752#ifdef PERL_MAD
5db06880
NC
3753 start = SvPVX(PL_linestr) + len;
3754#endif
3280af22
NIS
3755 PL_bufptr = start;
3756 PL_expect = XREF;
a0d0e21e
LW
3757 return *s == '(' ? FUNCMETH : METHOD;
3758 }
5458a98a 3759 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3760 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3761 len -= 2;
3762 tmpbuf[len] = '\0';
5db06880
NC
3763#ifdef PERL_MAD
3764 soff = s - SvPVX(PL_linestr);
3765#endif
c3e0f903
GS
3766 goto bare_package;
3767 }
90e5519e 3768 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3769 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3770 return 0;
3771 /* filehandle or package name makes it a method */
da51bb9b 3772 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3773#ifdef PERL_MAD
3774 soff = s - SvPVX(PL_linestr);
3775#endif
29595ff2 3776 s = PEEKSPACE(s);
3280af22 3777 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
486ec47a 3778 return 0; /* no assumptions -- "=>" quotes bareword */