This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
CH] Change usage of regex/op common to common names
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
04e98a4d 42#include "dquote_static.c"
378cc40b 43
eb0d8d16
NC
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
6154021b 47#define pl_yylval (PL_parser->yylval)
d3b6f988 48
199e78b7
DM
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
78cdf107
Z
51#define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52#define PL_lex_fakeeof (PL_parser->lex_fakeeof)
199e78b7
DM
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 72#define PL_linestr (PL_parser->linestr)
c2598295
DM
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
f06b5848
DM
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 83#define PL_lex_state (PL_parser->lex_state)
2f9285f8 84#define PL_rsfp (PL_parser->rsfp)
5486870f 85#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 88#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 89#define PL_multi_end (PL_parser->multi_end)
13765c85 90#define PL_error_count (PL_parser->error_count)
199e78b7
DM
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
199e78b7
DM
112#endif
113
16173588
NC
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
116static int
117S_pending_ident(pTHX);
199e78b7 118
0bd48802 119static const char ident_too_long[] = "Identifier too long";
8903cb82 120
29595ff2 121#ifdef PERL_MAD
29595ff2 122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 124#else
5db06880 125# define CURMAD(slot,sv)
9ded7720 126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
127#endif
128
a7aaec61
Z
129#define XENUMMASK 0x3f
130#define XFAKEEOF 0x40
131#define XFAKEBRACK 0x80
9059aa12 132
39e02b42
JH
133#ifdef USE_UTF8_SCRIPTS
134# define UTF (!IN_BYTES)
2b9d42f0 135#else
746b446a 136# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 137#endif
a0ed51b3 138
b1fc3636
CJ
139/* The maximum number of characters preceding the unrecognized one to display */
140#define UNRECOGNIZED_PRECEDE_COUNT 10
141
61f0cdd9 142/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
143 * 1999-02-27 mjd-perl-patch@plover.com */
144#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
145
bf4acbe4 146#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 147
ffb4593c
NT
148/* LEX_* are values for PL_lex_state, the state of the lexer.
149 * They are arranged oddly so that the guard on the switch statement
79072805
LW
150 * can get by with a single comparison (if the compiler is smart enough).
151 */
152
fb73857a 153/* #define LEX_NOTPARSING 11 is done in perl.h. */
154
b6007c36
DM
155#define LEX_NORMAL 10 /* normal code (ie not within "...") */
156#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
157#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
158#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
159#define LEX_INTERPSTART 6 /* expecting the start of a $var */
160
161 /* at end of code, eg "$x" followed by: */
162#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
163#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
164
165#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
166 string or after \E, $foo, etc */
167#define LEX_INTERPCONST 2 /* NOT USED */
168#define LEX_FORMLINE 1 /* expecting a format line */
169#define LEX_KNOWNEXT 0 /* next token known; just return it */
170
79072805 171
bbf60fe6 172#ifdef DEBUGGING
27da23d5 173static const char* const lex_state_names[] = {
bbf60fe6
DM
174 "KNOWNEXT",
175 "FORMLINE",
176 "INTERPCONST",
177 "INTERPCONCAT",
178 "INTERPENDMAYBE",
179 "INTERPEND",
180 "INTERPSTART",
181 "INTERPPUSH",
182 "INTERPCASEMOD",
183 "INTERPNORMAL",
184 "NORMAL"
185};
186#endif
187
79072805
LW
188#ifdef ff_next
189#undef ff_next
d48672a2
LW
190#endif
191
79072805 192#include "keywords.h"
fe14fcc3 193
ffb4593c
NT
194/* CLINE is a macro that ensures PL_copline has a sane value */
195
ae986130
LW
196#ifdef CLINE
197#undef CLINE
198#endif
57843af0 199#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 200
5db06880 201#ifdef PERL_MAD
29595ff2
NC
202# define SKIPSPACE0(s) skipspace0(s)
203# define SKIPSPACE1(s) skipspace1(s)
204# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
205# define PEEKSPACE(s) skipspace2(s,0)
206#else
207# define SKIPSPACE0(s) skipspace(s)
208# define SKIPSPACE1(s) skipspace(s)
209# define SKIPSPACE2(s,tsv) skipspace(s)
210# define PEEKSPACE(s) skipspace(s)
211#endif
212
ffb4593c
NT
213/*
214 * Convenience functions to return different tokens and prime the
9cbb5ea2 215 * lexer for the next token. They all take an argument.
ffb4593c
NT
216 *
217 * TOKEN : generic token (used for '(', DOLSHARP, etc)
218 * OPERATOR : generic operator
219 * AOPERATOR : assignment operator
220 * PREBLOCK : beginning the block after an if, while, foreach, ...
221 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
222 * PREREF : *EXPR where EXPR is not a simple identifier
223 * TERM : expression term
224 * LOOPX : loop exiting command (goto, last, dump, etc)
225 * FTST : file test operator
226 * FUN0 : zero-argument function
2d2e263d 227 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
228 * BOop : bitwise or or xor
229 * BAop : bitwise and
230 * SHop : shift operator
231 * PWop : power operator
9cbb5ea2 232 * PMop : pattern-matching operator
ffb4593c
NT
233 * Aop : addition-level operator
234 * Mop : multiplication-level operator
235 * Eop : equality-testing operator
e5edeb50 236 * Rop : relational operator <= != gt
ffb4593c
NT
237 *
238 * Also see LOP and lop() below.
239 */
240
998054bd 241#ifdef DEBUGGING /* Serve -DT. */
704d4215 242# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 243#else
bbf60fe6 244# define REPORT(retval) (retval)
998054bd
SC
245#endif
246
bbf60fe6
DM
247#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
248#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
249#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
250#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
251#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
252#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
253#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
254#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
255#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
256#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
257#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
258#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
259#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
260#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
261#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
262#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
263#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
264#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
265#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
266#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 267
a687059c
LW
268/* This bit of chicanery makes a unary function followed by
269 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
270 * The UNIDOR macro is for unary functions that can be followed by the //
271 * operator (such as C<shift // 0>).
a687059c 272 */
376fcdbf 273#define UNI2(f,x) { \
6154021b 274 pl_yylval.ival = f; \
376fcdbf
AL
275 PL_expect = x; \
276 PL_bufptr = s; \
277 PL_last_uni = PL_oldbufptr; \
278 PL_last_lop_op = f; \
279 if (*s == '(') \
280 return REPORT( (int)FUNC1 ); \
29595ff2 281 s = PEEKSPACE(s); \
376fcdbf
AL
282 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
283 }
6f33ba73
RGS
284#define UNI(f) UNI2(f,XTERM)
285#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 286
376fcdbf 287#define UNIBRACK(f) { \
6154021b 288 pl_yylval.ival = f; \
376fcdbf
AL
289 PL_bufptr = s; \
290 PL_last_uni = PL_oldbufptr; \
291 if (*s == '(') \
292 return REPORT( (int)FUNC1 ); \
29595ff2 293 s = PEEKSPACE(s); \
376fcdbf
AL
294 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
295 }
79072805 296
9f68db38 297/* grandfather return to old style */
78cdf107
Z
298#define OLDLOP(f) \
299 do { \
300 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
301 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
302 pl_yylval.ival = (f); \
303 PL_expect = XTERM; \
304 PL_bufptr = s; \
305 return (int)LSTOP; \
306 } while(0)
79072805 307
8fa7f367
JH
308#ifdef DEBUGGING
309
6154021b 310/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
311enum token_type {
312 TOKENTYPE_NONE,
313 TOKENTYPE_IVAL,
6154021b 314 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
315 TOKENTYPE_PVAL,
316 TOKENTYPE_OPVAL,
317 TOKENTYPE_GVVAL
318};
319
6d4a66ac
NC
320static struct debug_tokens {
321 const int token;
322 enum token_type type;
323 const char *name;
324} const debug_tokens[] =
9041c2e3 325{
bbf60fe6
DM
326 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
327 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
328 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
329 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
330 { ARROW, TOKENTYPE_NONE, "ARROW" },
331 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
332 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
333 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
334 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
335 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 336 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
337 { DO, TOKENTYPE_NONE, "DO" },
338 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
339 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
340 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
341 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
342 { ELSE, TOKENTYPE_NONE, "ELSE" },
343 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
344 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
345 { FOR, TOKENTYPE_IVAL, "FOR" },
346 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
347 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
348 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
349 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
350 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
351 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 352 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
353 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
354 { IF, TOKENTYPE_IVAL, "IF" },
355 { LABEL, TOKENTYPE_PVAL, "LABEL" },
356 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
357 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
358 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
359 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
360 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
361 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
362 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
363 { MY, TOKENTYPE_IVAL, "MY" },
364 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
365 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
366 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
367 { OROP, TOKENTYPE_IVAL, "OROP" },
368 { OROR, TOKENTYPE_NONE, "OROR" },
369 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
370 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
371 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
372 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
373 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
374 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
375 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
376 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
377 { PREINC, TOKENTYPE_NONE, "PREINC" },
378 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
379 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
380 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
381 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
382 { SUB, TOKENTYPE_NONE, "SUB" },
383 { THING, TOKENTYPE_OPVAL, "THING" },
384 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
385 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
386 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
387 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
388 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
389 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 390 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
391 { WHILE, TOKENTYPE_IVAL, "WHILE" },
392 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 393 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 394 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
395};
396
6154021b 397/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 398
bbf60fe6 399STATIC int
704d4215 400S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 401{
97aff369 402 dVAR;
7918f24d
NC
403
404 PERL_ARGS_ASSERT_TOKEREPORT;
405
bbf60fe6 406 if (DEBUG_T_TEST) {
bd61b366 407 const char *name = NULL;
bbf60fe6 408 enum token_type type = TOKENTYPE_NONE;
f54cb97a 409 const struct debug_tokens *p;
396482e1 410 SV* const report = newSVpvs("<== ");
bbf60fe6 411
f54cb97a 412 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
413 if (p->token == (int)rv) {
414 name = p->name;
415 type = p->type;
416 break;
417 }
418 }
419 if (name)
54667de8 420 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
421 else if ((char)rv > ' ' && (char)rv < '~')
422 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
423 else if (!rv)
396482e1 424 sv_catpvs(report, "EOF");
bbf60fe6
DM
425 else
426 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
427 switch (type) {
428 case TOKENTYPE_NONE:
429 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
430 break;
431 case TOKENTYPE_IVAL:
704d4215 432 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
433 break;
434 case TOKENTYPE_OPNUM:
435 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 436 PL_op_name[lvalp->ival]);
bbf60fe6
DM
437 break;
438 case TOKENTYPE_PVAL:
704d4215 439 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
440 break;
441 case TOKENTYPE_OPVAL:
704d4215 442 if (lvalp->opval) {
401441c0 443 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
444 PL_op_name[lvalp->opval->op_type]);
445 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 446 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 447 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
448 }
449
450 }
401441c0 451 else
396482e1 452 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
453 break;
454 }
b6007c36 455 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
456 };
457 return (int)rv;
998054bd
SC
458}
459
b6007c36
DM
460
461/* print the buffer with suitable escapes */
462
463STATIC void
15f169a1 464S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 465{
396482e1 466 SV* const tmp = newSVpvs("");
7918f24d
NC
467
468 PERL_ARGS_ASSERT_PRINTBUF;
469
b6007c36
DM
470 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
471 SvREFCNT_dec(tmp);
472}
473
8fa7f367
JH
474#endif
475
8290c323
NC
476static int
477S_deprecate_commaless_var_list(pTHX) {
478 PL_expect = XTERM;
479 deprecate("comma-less variable list");
480 return REPORT(','); /* grandfather non-comma-format format */
481}
482
ffb4593c
NT
483/*
484 * S_ao
485 *
c963b151
BD
486 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
487 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
488 */
489
76e3520e 490STATIC int
cea2e8a9 491S_ao(pTHX_ int toketype)
a0d0e21e 492{
97aff369 493 dVAR;
3280af22
NIS
494 if (*PL_bufptr == '=') {
495 PL_bufptr++;
a0d0e21e 496 if (toketype == ANDAND)
6154021b 497 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 498 else if (toketype == OROR)
6154021b 499 pl_yylval.ival = OP_ORASSIGN;
c963b151 500 else if (toketype == DORDOR)
6154021b 501 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
502 toketype = ASSIGNOP;
503 }
504 return toketype;
505}
506
ffb4593c
NT
507/*
508 * S_no_op
509 * When Perl expects an operator and finds something else, no_op
510 * prints the warning. It always prints "<something> found where
511 * operator expected. It prints "Missing semicolon on previous line?"
512 * if the surprise occurs at the start of the line. "do you need to
513 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
514 * where the compiler doesn't know if foo is a method call or a function.
515 * It prints "Missing operator before end of line" if there's nothing
516 * after the missing operator, or "... before <...>" if there is something
517 * after the missing operator.
518 */
519
76e3520e 520STATIC void
15f169a1 521S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 522{
97aff369 523 dVAR;
9d4ba2ae
AL
524 char * const oldbp = PL_bufptr;
525 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 526
7918f24d
NC
527 PERL_ARGS_ASSERT_NO_OP;
528
1189a94a
GS
529 if (!s)
530 s = oldbp;
07c798fb 531 else
1189a94a 532 PL_bufptr = s;
cea2e8a9 533 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
534 if (ckWARN_d(WARN_SYNTAX)) {
535 if (is_first)
536 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
537 "\t(Missing semicolon on previous line?)\n");
538 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 539 const char *t;
c35e046a
AL
540 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
541 NOOP;
56da5a46
RGS
542 if (t < PL_bufptr && isSPACE(*t))
543 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
544 "\t(Do you need to predeclare %.*s?)\n",
551405c4 545 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
546 }
547 else {
548 assert(s >= oldbp);
549 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 550 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 551 }
07c798fb 552 }
3280af22 553 PL_bufptr = oldbp;
8990e307
LW
554}
555
ffb4593c
NT
556/*
557 * S_missingterm
558 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 559 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
560 * If we're in a delimited string and the delimiter is a control
561 * character, it's reformatted into a two-char sequence like ^C.
562 * This is fatal.
563 */
564
76e3520e 565STATIC void
cea2e8a9 566S_missingterm(pTHX_ char *s)
8990e307 567{
97aff369 568 dVAR;
8990e307
LW
569 char tmpbuf[3];
570 char q;
571 if (s) {
9d4ba2ae 572 char * const nl = strrchr(s,'\n');
d2719217 573 if (nl)
8990e307
LW
574 *nl = '\0';
575 }
463559e7 576 else if (isCNTRL(PL_multi_close)) {
8990e307 577 *tmpbuf = '^';
585ec06d 578 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
579 tmpbuf[2] = '\0';
580 s = tmpbuf;
581 }
582 else {
eb160463 583 *tmpbuf = (char)PL_multi_close;
8990e307
LW
584 tmpbuf[1] = '\0';
585 s = tmpbuf;
586 }
587 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 588 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 589}
79072805 590
ef89dcc3 591#define FEATURE_IS_ENABLED(name) \
0d863452 592 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 593 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b 594/* The longest string we pass in. */
1863b879 595#define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
4a731d7b 596
0d863452
RH
597/*
598 * S_feature_is_enabled
599 * Check whether the named feature is enabled.
600 */
601STATIC bool
15f169a1 602S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 603{
97aff369 604 dVAR;
0d863452 605 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 606 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
607
608 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
609
4a731d7b
NC
610 assert(namelen <= MAX_FEATURE_LEN);
611 memcpy(&he_name[8], name, namelen);
d4c19fe8 612
7b9ef140 613 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
614}
615
ffb4593c 616/*
9cbb5ea2
GS
617 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
618 * utf16-to-utf8-reversed.
ffb4593c
NT
619 */
620
c39cd008
GS
621#ifdef PERL_CR_FILTER
622static void
623strip_return(SV *sv)
624{
95a20fc0 625 register const char *s = SvPVX_const(sv);
9d4ba2ae 626 register const char * const e = s + SvCUR(sv);
7918f24d
NC
627
628 PERL_ARGS_ASSERT_STRIP_RETURN;
629
c39cd008
GS
630 /* outer loop optimized to do nothing if there are no CR-LFs */
631 while (s < e) {
632 if (*s++ == '\r' && *s == '\n') {
633 /* hit a CR-LF, need to copy the rest */
634 register char *d = s - 1;
635 *d++ = *s++;
636 while (s < e) {
637 if (*s == '\r' && s[1] == '\n')
638 s++;
639 *d++ = *s++;
640 }
641 SvCUR(sv) -= s - d;
642 return;
643 }
644 }
645}
a868473f 646
76e3520e 647STATIC I32
c39cd008 648S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 649{
f54cb97a 650 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
651 if (count > 0 && !maxlen)
652 strip_return(sv);
653 return count;
a868473f
NIS
654}
655#endif
656
ffb4593c 657/*
8eaa0acf
Z
658=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
659
660Creates and initialises a new lexer/parser state object, supplying
661a context in which to lex and parse from a new source of Perl code.
662A pointer to the new state object is placed in L</PL_parser>. An entry
663is made on the save stack so that upon unwinding the new state object
664will be destroyed and the former value of L</PL_parser> will be restored.
665Nothing else need be done to clean up the parsing context.
666
667The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
668non-null, provides a string (in SV form) containing code to be parsed.
669A copy of the string is made, so subsequent modification of I<line>
670does not affect parsing. I<rsfp>, if non-null, provides an input stream
671from which code will be read to be parsed. If both are non-null, the
672code in I<line> comes first and must consist of complete lines of input,
673and I<rsfp> supplies the remainder of the source.
674
675The I<flags> parameter is reserved for future use, and must always
676be zero.
677
678=cut
679*/
ffb4593c 680
a0d0e21e 681void
8eaa0acf 682Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
79072805 683{
97aff369 684 dVAR;
6ef55633 685 const char *s = NULL;
8990e307 686 STRLEN len;
5486870f 687 yy_parser *parser, *oparser;
8eaa0acf
Z
688 if (flags)
689 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
acdf0a21
DM
690
691 /* create and initialise a parser */
692
199e78b7 693 Newxz(parser, 1, yy_parser);
5486870f 694 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
695 PL_parser = parser;
696
28ac2b49
Z
697 parser->stack = NULL;
698 parser->ps = NULL;
699 parser->stack_size = 0;
acdf0a21 700
e3abe207
DM
701 /* on scope exit, free this parser and restore any outer one */
702 SAVEPARSER(parser);
7c4baf47 703 parser->saved_curcop = PL_curcop;
e3abe207 704
acdf0a21 705 /* initialise lexer state */
8990e307 706
fb205e7a
DM
707#ifdef PERL_MAD
708 parser->curforce = -1;
709#else
710 parser->nexttoke = 0;
711#endif
ca4cfd28 712 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 713 parser->copline = NOLINE;
5afb0a62 714 parser->lex_state = LEX_NORMAL;
c2598295 715 parser->expect = XSTATE;
2f9285f8 716 parser->rsfp = rsfp;
f07ec6dd 717 parser->rsfp_filters = newAV();
2f9285f8 718
199e78b7
DM
719 Newx(parser->lex_brackstack, 120, char);
720 Newx(parser->lex_casestack, 12, char);
721 *parser->lex_casestack = '\0';
02b34bbe 722
10efb74f
NC
723 if (line) {
724 s = SvPV_const(line, len);
725 } else {
726 len = 0;
727 }
bdc0bf6f 728
10efb74f 729 if (!len) {
bdc0bf6f 730 parser->linestr = newSVpvs("\n;");
805700c1 731 } else {
719a9bb0 732 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
10efb74f 733 if (s[len-1] != ';')
bdc0bf6f 734 sv_catpvs(parser->linestr, "\n;");
8990e307 735 }
f06b5848
DM
736 parser->oldoldbufptr =
737 parser->oldbufptr =
738 parser->bufptr =
739 parser->linestart = SvPVX(parser->linestr);
740 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
741 parser->last_lop = parser->last_uni = NULL;
737c24fc
Z
742
743 parser->in_pod = 0;
79072805 744}
a687059c 745
e3abe207
DM
746
747/* delete a parser object */
748
749void
750Perl_parser_free(pTHX_ const yy_parser *parser)
751{
7918f24d
NC
752 PERL_ARGS_ASSERT_PARSER_FREE;
753
7c4baf47 754 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
755 SvREFCNT_dec(parser->linestr);
756
2f9285f8
DM
757 if (parser->rsfp == PerlIO_stdin())
758 PerlIO_clearerr(parser->rsfp);
799361c3
SH
759 else if (parser->rsfp && (!parser->old_parser ||
760 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 761 PerlIO_close(parser->rsfp);
5486870f 762 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 763
e3abe207
DM
764 Safefree(parser->lex_brackstack);
765 Safefree(parser->lex_casestack);
766 PL_parser = parser->old_parser;
767 Safefree(parser);
768}
769
770
ffb4593c 771/*
f0e67a1d
Z
772=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
773
774Buffer scalar containing the chunk currently under consideration of the
775text currently being lexed. This is always a plain string scalar (for
776which C<SvPOK> is true). It is not intended to be used as a scalar by
777normal scalar means; instead refer to the buffer directly by the pointer
778variables described below.
779
780The lexer maintains various C<char*> pointers to things in the
781C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
782reallocated, all of these pointers must be updated. Don't attempt to
783do this manually, but rather use L</lex_grow_linestr> if you need to
784reallocate the buffer.
785
786The content of the text chunk in the buffer is commonly exactly one
787complete line of input, up to and including a newline terminator,
788but there are situations where it is otherwise. The octets of the
789buffer may be intended to be interpreted as either UTF-8 or Latin-1.
790The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
791flag on this scalar, which may disagree with it.
792
793For direct examination of the buffer, the variable
794L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
795lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
796of these pointers is usually preferable to examination of the scalar
797through normal scalar means.
798
799=for apidoc AmxU|char *|PL_parser-E<gt>bufend
800
801Direct pointer to the end of the chunk of text currently being lexed, the
802end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
803+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
804always located at the end of the buffer, and does not count as part of
805the buffer's contents.
806
807=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
808
809Points to the current position of lexing inside the lexer buffer.
810Characters around this point may be freely examined, within
811the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
812L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
813interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
814
815Lexing code (whether in the Perl core or not) moves this pointer past
816the characters that it consumes. It is also expected to perform some
817bookkeeping whenever a newline character is consumed. This movement
818can be more conveniently performed by the function L</lex_read_to>,
819which handles newlines appropriately.
820
821Interpretation of the buffer's octets can be abstracted out by
822using the slightly higher-level functions L</lex_peek_unichar> and
823L</lex_read_unichar>.
824
825=for apidoc AmxU|char *|PL_parser-E<gt>linestart
826
827Points to the start of the current line inside the lexer buffer.
828This is useful for indicating at which column an error occurred, and
829not much else. This must be updated by any lexing code that consumes
830a newline; the function L</lex_read_to> handles this detail.
831
832=cut
833*/
834
835/*
836=for apidoc Amx|bool|lex_bufutf8
837
838Indicates whether the octets in the lexer buffer
839(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
840of Unicode characters. If not, they should be interpreted as Latin-1
841characters. This is analogous to the C<SvUTF8> flag for scalars.
842
843In UTF-8 mode, it is not guaranteed that the lexer buffer actually
844contains valid UTF-8. Lexing code must be robust in the face of invalid
845encoding.
846
847The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
848is significant, but not the whole story regarding the input character
849encoding. Normally, when a file is being read, the scalar contains octets
850and its C<SvUTF8> flag is off, but the octets should be interpreted as
851UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
852however, the scalar may have the C<SvUTF8> flag on, and in this case its
853octets should be interpreted as UTF-8 unless the C<use bytes> pragma
854is in effect. This logic may change in the future; use this function
855instead of implementing the logic yourself.
856
857=cut
858*/
859
860bool
861Perl_lex_bufutf8(pTHX)
862{
863 return UTF;
864}
865
866/*
867=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
868
869Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
870at least I<len> octets (including terminating NUL). Returns a
871pointer to the reallocated buffer. This is necessary before making
872any direct modification of the buffer that would increase its length.
873L</lex_stuff_pvn> provides a more convenient way to insert text into
874the buffer.
875
876Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
877this function updates all of the lexer's variables that point directly
878into the buffer.
879
880=cut
881*/
882
883char *
884Perl_lex_grow_linestr(pTHX_ STRLEN len)
885{
886 SV *linestr;
887 char *buf;
888 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
889 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
890 linestr = PL_parser->linestr;
891 buf = SvPVX(linestr);
892 if (len <= SvLEN(linestr))
893 return buf;
894 bufend_pos = PL_parser->bufend - buf;
895 bufptr_pos = PL_parser->bufptr - buf;
896 oldbufptr_pos = PL_parser->oldbufptr - buf;
897 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
898 linestart_pos = PL_parser->linestart - buf;
899 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
900 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
901 buf = sv_grow(linestr, len);
902 PL_parser->bufend = buf + bufend_pos;
903 PL_parser->bufptr = buf + bufptr_pos;
904 PL_parser->oldbufptr = buf + oldbufptr_pos;
905 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
906 PL_parser->linestart = buf + linestart_pos;
907 if (PL_parser->last_uni)
908 PL_parser->last_uni = buf + last_uni_pos;
909 if (PL_parser->last_lop)
910 PL_parser->last_lop = buf + last_lop_pos;
911 return buf;
912}
913
914/*
83aa740e 915=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
916
917Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
918immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
919reallocating the buffer if necessary. This means that lexing code that
920runs later will see the characters as if they had appeared in the input.
921It is not recommended to do this as part of normal parsing, and most
922uses of this facility run the risk of the inserted characters being
923interpreted in an unintended manner.
924
925The string to be inserted is represented by I<len> octets starting
926at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
927according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
928The characters are recoded for the lexer buffer, according to how the
929buffer is currently being interpreted (L</lex_bufutf8>). If a string
9dcc53ea 930to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
f0e67a1d
Z
931function is more convenient.
932
933=cut
934*/
935
936void
83aa740e 937Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 938{
749123ff 939 dVAR;
f0e67a1d
Z
940 char *bufptr;
941 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
942 if (flags & ~(LEX_STUFF_UTF8))
943 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
944 if (UTF) {
945 if (flags & LEX_STUFF_UTF8) {
946 goto plain_copy;
947 } else {
948 STRLEN highhalf = 0;
83aa740e 949 const char *p, *e = pv+len;
f0e67a1d
Z
950 for (p = pv; p != e; p++)
951 highhalf += !!(((U8)*p) & 0x80);
952 if (!highhalf)
953 goto plain_copy;
954 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
955 bufptr = PL_parser->bufptr;
956 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
957 SvCUR_set(PL_parser->linestr,
958 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
959 PL_parser->bufend += len+highhalf;
960 for (p = pv; p != e; p++) {
961 U8 c = (U8)*p;
962 if (c & 0x80) {
963 *bufptr++ = (char)(0xc0 | (c >> 6));
964 *bufptr++ = (char)(0x80 | (c & 0x3f));
965 } else {
966 *bufptr++ = (char)c;
967 }
968 }
969 }
970 } else {
971 if (flags & LEX_STUFF_UTF8) {
972 STRLEN highhalf = 0;
83aa740e 973 const char *p, *e = pv+len;
f0e67a1d
Z
974 for (p = pv; p != e; p++) {
975 U8 c = (U8)*p;
976 if (c >= 0xc4) {
977 Perl_croak(aTHX_ "Lexing code attempted to stuff "
978 "non-Latin-1 character into Latin-1 input");
979 } else if (c >= 0xc2 && p+1 != e &&
980 (((U8)p[1]) & 0xc0) == 0x80) {
981 p++;
982 highhalf++;
983 } else if (c >= 0x80) {
984 /* malformed UTF-8 */
985 ENTER;
986 SAVESPTR(PL_warnhook);
987 PL_warnhook = PERL_WARNHOOK_FATAL;
988 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
989 LEAVE;
990 }
991 }
992 if (!highhalf)
993 goto plain_copy;
994 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
995 bufptr = PL_parser->bufptr;
996 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
997 SvCUR_set(PL_parser->linestr,
998 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
999 PL_parser->bufend += len-highhalf;
1000 for (p = pv; p != e; p++) {
1001 U8 c = (U8)*p;
1002 if (c & 0x80) {
1003 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1004 p++;
1005 } else {
1006 *bufptr++ = (char)c;
1007 }
1008 }
1009 } else {
1010 plain_copy:
1011 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1012 bufptr = PL_parser->bufptr;
1013 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1014 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1015 PL_parser->bufend += len;
1016 Copy(pv, bufptr, len, char);
1017 }
1018 }
1019}
1020
1021/*
9dcc53ea
Z
1022=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1023
1024Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1025immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1026reallocating the buffer if necessary. This means that lexing code that
1027runs later will see the characters as if they had appeared in the input.
1028It is not recommended to do this as part of normal parsing, and most
1029uses of this facility run the risk of the inserted characters being
1030interpreted in an unintended manner.
1031
1032The string to be inserted is represented by octets starting at I<pv>
1033and continuing to the first nul. These octets are interpreted as either
1034UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1035in I<flags>. The characters are recoded for the lexer buffer, according
1036to how the buffer is currently being interpreted (L</lex_bufutf8>).
1037If it is not convenient to nul-terminate a string to be inserted, the
1038L</lex_stuff_pvn> function is more appropriate.
1039
1040=cut
1041*/
1042
1043void
1044Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1045{
1046 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1047 lex_stuff_pvn(pv, strlen(pv), flags);
1048}
1049
1050/*
f0e67a1d
Z
1051=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1052
1053Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1054immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1055reallocating the buffer if necessary. This means that lexing code that
1056runs later will see the characters as if they had appeared in the input.
1057It is not recommended to do this as part of normal parsing, and most
1058uses of this facility run the risk of the inserted characters being
1059interpreted in an unintended manner.
1060
1061The string to be inserted is the string value of I<sv>. The characters
1062are recoded for the lexer buffer, according to how the buffer is currently
9dcc53ea 1063being interpreted (L</lex_bufutf8>). If a string to be inserted is
f0e67a1d
Z
1064not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1065need to construct a scalar.
1066
1067=cut
1068*/
1069
1070void
1071Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1072{
1073 char *pv;
1074 STRLEN len;
1075 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1076 if (flags)
1077 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1078 pv = SvPV(sv, len);
1079 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1080}
1081
1082/*
1083=for apidoc Amx|void|lex_unstuff|char *ptr
1084
1085Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1086I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1087This hides the discarded text from any lexing code that runs later,
1088as if the text had never appeared.
1089
1090This is not the normal way to consume lexed text. For that, use
1091L</lex_read_to>.
1092
1093=cut
1094*/
1095
1096void
1097Perl_lex_unstuff(pTHX_ char *ptr)
1098{
1099 char *buf, *bufend;
1100 STRLEN unstuff_len;
1101 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1102 buf = PL_parser->bufptr;
1103 if (ptr < buf)
1104 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1105 if (ptr == buf)
1106 return;
1107 bufend = PL_parser->bufend;
1108 if (ptr > bufend)
1109 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1110 unstuff_len = ptr - buf;
1111 Move(ptr, buf, bufend+1-ptr, char);
1112 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1113 PL_parser->bufend = bufend - unstuff_len;
1114}
1115
1116/*
1117=for apidoc Amx|void|lex_read_to|char *ptr
1118
1119Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1120to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1121performing the correct bookkeeping whenever a newline character is passed.
1122This is the normal way to consume lexed text.
1123
1124Interpretation of the buffer's octets can be abstracted out by
1125using the slightly higher-level functions L</lex_peek_unichar> and
1126L</lex_read_unichar>.
1127
1128=cut
1129*/
1130
1131void
1132Perl_lex_read_to(pTHX_ char *ptr)
1133{
1134 char *s;
1135 PERL_ARGS_ASSERT_LEX_READ_TO;
1136 s = PL_parser->bufptr;
1137 if (ptr < s || ptr > PL_parser->bufend)
1138 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1139 for (; s != ptr; s++)
1140 if (*s == '\n') {
1141 CopLINE_inc(PL_curcop);
1142 PL_parser->linestart = s+1;
1143 }
1144 PL_parser->bufptr = ptr;
1145}
1146
1147/*
1148=for apidoc Amx|void|lex_discard_to|char *ptr
1149
1150Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1151up to I<ptr>. The remaining content of the buffer will be moved, and
1152all pointers into the buffer updated appropriately. I<ptr> must not
1153be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1154it is not permitted to discard text that has yet to be lexed.
1155
1156Normally it is not necessarily to do this directly, because it suffices to
1157use the implicit discarding behaviour of L</lex_next_chunk> and things
1158based on it. However, if a token stretches across multiple lines,
1f317c95 1159and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1160that purpose, then after completion of the token it would be wise to
1161explicitly discard the now-unneeded earlier lines, to avoid future
1162multi-line tokens growing the buffer without bound.
1163
1164=cut
1165*/
1166
1167void
1168Perl_lex_discard_to(pTHX_ char *ptr)
1169{
1170 char *buf;
1171 STRLEN discard_len;
1172 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1173 buf = SvPVX(PL_parser->linestr);
1174 if (ptr < buf)
1175 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1176 if (ptr == buf)
1177 return;
1178 if (ptr > PL_parser->bufptr)
1179 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1180 discard_len = ptr - buf;
1181 if (PL_parser->oldbufptr < ptr)
1182 PL_parser->oldbufptr = ptr;
1183 if (PL_parser->oldoldbufptr < ptr)
1184 PL_parser->oldoldbufptr = ptr;
1185 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1186 PL_parser->last_uni = NULL;
1187 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1188 PL_parser->last_lop = NULL;
1189 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1190 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1191 PL_parser->bufend -= discard_len;
1192 PL_parser->bufptr -= discard_len;
1193 PL_parser->oldbufptr -= discard_len;
1194 PL_parser->oldoldbufptr -= discard_len;
1195 if (PL_parser->last_uni)
1196 PL_parser->last_uni -= discard_len;
1197 if (PL_parser->last_lop)
1198 PL_parser->last_lop -= discard_len;
1199}
1200
1201/*
1202=for apidoc Amx|bool|lex_next_chunk|U32 flags
1203
1204Reads in the next chunk of text to be lexed, appending it to
1205L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1206looked to the end of the current chunk and wants to know more. It is
1207usual, but not necessary, for lexing to have consumed the entirety of
1208the current chunk at this time.
1209
1210If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1211chunk (i.e., the current chunk has been entirely consumed), normally the
1212current chunk will be discarded at the same time that the new chunk is
1213read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1214will not be discarded. If the current chunk has not been entirely
1215consumed, then it will not be discarded regardless of the flag.
1216
1217Returns true if some new text was added to the buffer, or false if the
1218buffer has reached the end of the input text.
1219
1220=cut
1221*/
1222
1223#define LEX_FAKE_EOF 0x80000000
1224
1225bool
1226Perl_lex_next_chunk(pTHX_ U32 flags)
1227{
1228 SV *linestr;
1229 char *buf;
1230 STRLEN old_bufend_pos, new_bufend_pos;
1231 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1232 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1233 bool got_some_for_debugger = 0;
f0e67a1d
Z
1234 bool got_some;
1235 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1236 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1237 linestr = PL_parser->linestr;
1238 buf = SvPVX(linestr);
1239 if (!(flags & LEX_KEEP_PREVIOUS) &&
1240 PL_parser->bufptr == PL_parser->bufend) {
1241 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1242 linestart_pos = 0;
1243 if (PL_parser->last_uni != PL_parser->bufend)
1244 PL_parser->last_uni = NULL;
1245 if (PL_parser->last_lop != PL_parser->bufend)
1246 PL_parser->last_lop = NULL;
1247 last_uni_pos = last_lop_pos = 0;
1248 *buf = 0;
1249 SvCUR(linestr) = 0;
1250 } else {
1251 old_bufend_pos = PL_parser->bufend - buf;
1252 bufptr_pos = PL_parser->bufptr - buf;
1253 oldbufptr_pos = PL_parser->oldbufptr - buf;
1254 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1255 linestart_pos = PL_parser->linestart - buf;
1256 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1257 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1258 }
1259 if (flags & LEX_FAKE_EOF) {
1260 goto eof;
1261 } else if (!PL_parser->rsfp) {
1262 got_some = 0;
1263 } else if (filter_gets(linestr, old_bufend_pos)) {
1264 got_some = 1;
17cc9359 1265 got_some_for_debugger = 1;
f0e67a1d 1266 } else {
580561a3
Z
1267 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1268 sv_setpvs(linestr, "");
f0e67a1d
Z
1269 eof:
1270 /* End of real input. Close filehandle (unless it was STDIN),
1271 * then add implicit termination.
1272 */
1273 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1274 PerlIO_clearerr(PL_parser->rsfp);
1275 else if (PL_parser->rsfp)
1276 (void)PerlIO_close(PL_parser->rsfp);
1277 PL_parser->rsfp = NULL;
737c24fc 1278 PL_parser->in_pod = 0;
f0e67a1d
Z
1279#ifdef PERL_MAD
1280 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1281 PL_faketokens = 1;
1282#endif
1283 if (!PL_in_eval && PL_minus_p) {
1284 sv_catpvs(linestr,
1285 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1286 PL_minus_n = PL_minus_p = 0;
1287 } else if (!PL_in_eval && PL_minus_n) {
1288 sv_catpvs(linestr, /*{*/";}");
1289 PL_minus_n = 0;
1290 } else
1291 sv_catpvs(linestr, ";");
1292 got_some = 1;
1293 }
1294 buf = SvPVX(linestr);
1295 new_bufend_pos = SvCUR(linestr);
1296 PL_parser->bufend = buf + new_bufend_pos;
1297 PL_parser->bufptr = buf + bufptr_pos;
1298 PL_parser->oldbufptr = buf + oldbufptr_pos;
1299 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1300 PL_parser->linestart = buf + linestart_pos;
1301 if (PL_parser->last_uni)
1302 PL_parser->last_uni = buf + last_uni_pos;
1303 if (PL_parser->last_lop)
1304 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1305 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1306 PL_curstash != PL_debstash) {
1307 /* debugger active and we're not compiling the debugger code,
1308 * so store the line into the debugger's array of lines
1309 */
1310 update_debugger_info(NULL, buf+old_bufend_pos,
1311 new_bufend_pos-old_bufend_pos);
1312 }
1313 return got_some;
1314}
1315
1316/*
1317=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1318
1319Looks ahead one (Unicode) character in the text currently being lexed.
1320Returns the codepoint (unsigned integer value) of the next character,
1321or -1 if lexing has reached the end of the input text. To consume the
1322peeked character, use L</lex_read_unichar>.
1323
1324If the next character is in (or extends into) the next chunk of input
1325text, the next chunk will be read in. Normally the current chunk will be
1326discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1327then the current chunk will not be discarded.
1328
1329If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1330is encountered, an exception is generated.
1331
1332=cut
1333*/
1334
1335I32
1336Perl_lex_peek_unichar(pTHX_ U32 flags)
1337{
749123ff 1338 dVAR;
f0e67a1d
Z
1339 char *s, *bufend;
1340 if (flags & ~(LEX_KEEP_PREVIOUS))
1341 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1342 s = PL_parser->bufptr;
1343 bufend = PL_parser->bufend;
1344 if (UTF) {
1345 U8 head;
1346 I32 unichar;
1347 STRLEN len, retlen;
1348 if (s == bufend) {
1349 if (!lex_next_chunk(flags))
1350 return -1;
1351 s = PL_parser->bufptr;
1352 bufend = PL_parser->bufend;
1353 }
1354 head = (U8)*s;
1355 if (!(head & 0x80))
1356 return head;
1357 if (head & 0x40) {
1358 len = PL_utf8skip[head];
1359 while ((STRLEN)(bufend-s) < len) {
1360 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1361 break;
1362 s = PL_parser->bufptr;
1363 bufend = PL_parser->bufend;
1364 }
1365 }
1366 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1367 if (retlen == (STRLEN)-1) {
1368 /* malformed UTF-8 */
1369 ENTER;
1370 SAVESPTR(PL_warnhook);
1371 PL_warnhook = PERL_WARNHOOK_FATAL;
1372 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1373 LEAVE;
1374 }
1375 return unichar;
1376 } else {
1377 if (s == bufend) {
1378 if (!lex_next_chunk(flags))
1379 return -1;
1380 s = PL_parser->bufptr;
1381 }
1382 return (U8)*s;
1383 }
1384}
1385
1386/*
1387=for apidoc Amx|I32|lex_read_unichar|U32 flags
1388
1389Reads the next (Unicode) character in the text currently being lexed.
1390Returns the codepoint (unsigned integer value) of the character read,
1391and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1392if lexing has reached the end of the input text. To non-destructively
1393examine the next character, use L</lex_peek_unichar> instead.
1394
1395If the next character is in (or extends into) the next chunk of input
1396text, the next chunk will be read in. Normally the current chunk will be
1397discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1398then the current chunk will not be discarded.
1399
1400If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1401is encountered, an exception is generated.
1402
1403=cut
1404*/
1405
1406I32
1407Perl_lex_read_unichar(pTHX_ U32 flags)
1408{
1409 I32 c;
1410 if (flags & ~(LEX_KEEP_PREVIOUS))
1411 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1412 c = lex_peek_unichar(flags);
1413 if (c != -1) {
1414 if (c == '\n')
1415 CopLINE_inc(PL_curcop);
1416 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1417 }
1418 return c;
1419}
1420
1421/*
1422=for apidoc Amx|void|lex_read_space|U32 flags
1423
1424Reads optional spaces, in Perl style, in the text currently being
1425lexed. The spaces may include ordinary whitespace characters and
1426Perl-style comments. C<#line> directives are processed if encountered.
1427L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1428at a non-space character (or the end of the input text).
1429
1430If spaces extend into the next chunk of input text, the next chunk will
1431be read in. Normally the current chunk will be discarded at the same
1432time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1433chunk will not be discarded.
1434
1435=cut
1436*/
1437
f0998909
Z
1438#define LEX_NO_NEXT_CHUNK 0x80000000
1439
f0e67a1d
Z
1440void
1441Perl_lex_read_space(pTHX_ U32 flags)
1442{
1443 char *s, *bufend;
1444 bool need_incline = 0;
f0998909 1445 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1446 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1447#ifdef PERL_MAD
1448 if (PL_skipwhite) {
1449 sv_free(PL_skipwhite);
1450 PL_skipwhite = NULL;
1451 }
1452 if (PL_madskills)
1453 PL_skipwhite = newSVpvs("");
1454#endif /* PERL_MAD */
1455 s = PL_parser->bufptr;
1456 bufend = PL_parser->bufend;
1457 while (1) {
1458 char c = *s;
1459 if (c == '#') {
1460 do {
1461 c = *++s;
1462 } while (!(c == '\n' || (c == 0 && s == bufend)));
1463 } else if (c == '\n') {
1464 s++;
1465 PL_parser->linestart = s;
1466 if (s == bufend)
1467 need_incline = 1;
1468 else
1469 incline(s);
1470 } else if (isSPACE(c)) {
1471 s++;
1472 } else if (c == 0 && s == bufend) {
1473 bool got_more;
1474#ifdef PERL_MAD
1475 if (PL_madskills)
1476 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1477#endif /* PERL_MAD */
f0998909
Z
1478 if (flags & LEX_NO_NEXT_CHUNK)
1479 break;
f0e67a1d
Z
1480 PL_parser->bufptr = s;
1481 CopLINE_inc(PL_curcop);
1482 got_more = lex_next_chunk(flags);
1483 CopLINE_dec(PL_curcop);
1484 s = PL_parser->bufptr;
1485 bufend = PL_parser->bufend;
1486 if (!got_more)
1487 break;
1488 if (need_incline && PL_parser->rsfp) {
1489 incline(s);
1490 need_incline = 0;
1491 }
1492 } else {
1493 break;
1494 }
1495 }
1496#ifdef PERL_MAD
1497 if (PL_madskills)
1498 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1499#endif /* PERL_MAD */
1500 PL_parser->bufptr = s;
1501}
1502
1503/*
ffb4593c
NT
1504 * S_incline
1505 * This subroutine has nothing to do with tilting, whether at windmills
1506 * or pinball tables. Its name is short for "increment line". It
57843af0 1507 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1508 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1509 * # line 500 "foo.pm"
1510 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1511 */
1512
76e3520e 1513STATIC void
d9095cec 1514S_incline(pTHX_ const char *s)
463ee0b2 1515{
97aff369 1516 dVAR;
d9095cec
NC
1517 const char *t;
1518 const char *n;
1519 const char *e;
463ee0b2 1520
7918f24d
NC
1521 PERL_ARGS_ASSERT_INCLINE;
1522
57843af0 1523 CopLINE_inc(PL_curcop);
463ee0b2
LW
1524 if (*s++ != '#')
1525 return;
d4c19fe8
AL
1526 while (SPACE_OR_TAB(*s))
1527 s++;
73659bf1
GS
1528 if (strnEQ(s, "line", 4))
1529 s += 4;
1530 else
1531 return;
084592ab 1532 if (SPACE_OR_TAB(*s))
73659bf1 1533 s++;
4e553d73 1534 else
73659bf1 1535 return;
d4c19fe8
AL
1536 while (SPACE_OR_TAB(*s))
1537 s++;
463ee0b2
LW
1538 if (!isDIGIT(*s))
1539 return;
d4c19fe8 1540
463ee0b2
LW
1541 n = s;
1542 while (isDIGIT(*s))
1543 s++;
07714eb4 1544 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1545 return;
bf4acbe4 1546 while (SPACE_OR_TAB(*s))
463ee0b2 1547 s++;
73659bf1 1548 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1549 s++;
73659bf1
GS
1550 e = t + 1;
1551 }
463ee0b2 1552 else {
c35e046a
AL
1553 t = s;
1554 while (!isSPACE(*t))
1555 t++;
73659bf1 1556 e = t;
463ee0b2 1557 }
bf4acbe4 1558 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1559 e++;
1560 if (*e != '\n' && *e != '\0')
1561 return; /* false alarm */
1562
f4dd75d9 1563 if (t - s > 0) {
d9095cec 1564 const STRLEN len = t - s;
8a5ee598 1565#ifndef USE_ITHREADS
19bad673
NC
1566 SV *const temp_sv = CopFILESV(PL_curcop);
1567 const char *cf;
1568 STRLEN tmplen;
1569
1570 if (temp_sv) {
1571 cf = SvPVX(temp_sv);
1572 tmplen = SvCUR(temp_sv);
1573 } else {
1574 cf = NULL;
1575 tmplen = 0;
1576 }
1577
42d9b98d 1578 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1579 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1580 * to *{"::_<newfilename"} */
44867030
NC
1581 /* However, the long form of evals is only turned on by the
1582 debugger - usually they're "(eval %lu)" */
1583 char smallbuf[128];
1584 char *tmpbuf;
1585 GV **gvp;
d9095cec 1586 STRLEN tmplen2 = len;
798b63bc 1587 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1588 tmpbuf = smallbuf;
1589 else
2ae0db35 1590 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1591 tmpbuf[0] = '_';
1592 tmpbuf[1] = '<';
2ae0db35 1593 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1594 tmplen += 2;
8a5ee598
RGS
1595 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1596 if (gvp) {
44867030
NC
1597 char *tmpbuf2;
1598 GV *gv2;
1599
1600 if (tmplen2 + 2 <= sizeof smallbuf)
1601 tmpbuf2 = smallbuf;
1602 else
1603 Newx(tmpbuf2, tmplen2 + 2, char);
1604
1605 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1606 /* Either they malloc'd it, or we malloc'd it,
1607 so no prefix is present in ours. */
1608 tmpbuf2[0] = '_';
1609 tmpbuf2[1] = '<';
1610 }
1611
1612 memcpy(tmpbuf2 + 2, s, tmplen2);
1613 tmplen2 += 2;
1614
8a5ee598 1615 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1616 if (!isGV(gv2)) {
8a5ee598 1617 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1618 /* adjust ${"::_<newfilename"} to store the new file name */
1619 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
1620 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1621 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 1622 }
44867030
NC
1623
1624 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1625 }
e66cf94c 1626 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1627 }
8a5ee598 1628#endif
05ec9bb3 1629 CopFILE_free(PL_curcop);
d9095cec 1630 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1631 }
57843af0 1632 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
1633}
1634
29595ff2 1635#ifdef PERL_MAD
cd81e915 1636/* skip space before PL_thistoken */
29595ff2
NC
1637
1638STATIC char *
1639S_skipspace0(pTHX_ register char *s)
1640{
7918f24d
NC
1641 PERL_ARGS_ASSERT_SKIPSPACE0;
1642
29595ff2
NC
1643 s = skipspace(s);
1644 if (!PL_madskills)
1645 return s;
cd81e915
NC
1646 if (PL_skipwhite) {
1647 if (!PL_thiswhite)
6b29d1f5 1648 PL_thiswhite = newSVpvs("");
cd81e915
NC
1649 sv_catsv(PL_thiswhite, PL_skipwhite);
1650 sv_free(PL_skipwhite);
1651 PL_skipwhite = 0;
1652 }
1653 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1654 return s;
1655}
1656
cd81e915 1657/* skip space after PL_thistoken */
29595ff2
NC
1658
1659STATIC char *
1660S_skipspace1(pTHX_ register char *s)
1661{
d4c19fe8 1662 const char *start = s;
29595ff2
NC
1663 I32 startoff = start - SvPVX(PL_linestr);
1664
7918f24d
NC
1665 PERL_ARGS_ASSERT_SKIPSPACE1;
1666
29595ff2
NC
1667 s = skipspace(s);
1668 if (!PL_madskills)
1669 return s;
1670 start = SvPVX(PL_linestr) + startoff;
cd81e915 1671 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1672 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1673 PL_thistoken = newSVpvn(tstart, start - tstart);
1674 }
1675 PL_realtokenstart = -1;
1676 if (PL_skipwhite) {
1677 if (!PL_nextwhite)
6b29d1f5 1678 PL_nextwhite = newSVpvs("");
cd81e915
NC
1679 sv_catsv(PL_nextwhite, PL_skipwhite);
1680 sv_free(PL_skipwhite);
1681 PL_skipwhite = 0;
29595ff2
NC
1682 }
1683 return s;
1684}
1685
1686STATIC char *
1687S_skipspace2(pTHX_ register char *s, SV **svp)
1688{
c35e046a
AL
1689 char *start;
1690 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1691 const I32 startoff = s - SvPVX(PL_linestr);
1692
7918f24d
NC
1693 PERL_ARGS_ASSERT_SKIPSPACE2;
1694
29595ff2
NC
1695 s = skipspace(s);
1696 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1697 if (!PL_madskills || !svp)
1698 return s;
1699 start = SvPVX(PL_linestr) + startoff;
cd81e915 1700 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1701 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1702 PL_thistoken = newSVpvn(tstart, start - tstart);
1703 PL_realtokenstart = -1;
29595ff2 1704 }
cd81e915 1705 if (PL_skipwhite) {
29595ff2 1706 if (!*svp)
6b29d1f5 1707 *svp = newSVpvs("");
cd81e915
NC
1708 sv_setsv(*svp, PL_skipwhite);
1709 sv_free(PL_skipwhite);
1710 PL_skipwhite = 0;
29595ff2
NC
1711 }
1712
1713 return s;
1714}
1715#endif
1716
80a702cd 1717STATIC void
15f169a1 1718S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1719{
1720 AV *av = CopFILEAVx(PL_curcop);
1721 if (av) {
b9f83d2f 1722 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1723 if (orig_sv)
1724 sv_setsv(sv, orig_sv);
1725 else
1726 sv_setpvn(sv, buf, len);
80a702cd
RGS
1727 (void)SvIOK_on(sv);
1728 SvIV_set(sv, 0);
1729 av_store(av, (I32)CopLINE(PL_curcop), sv);
1730 }
1731}
1732
ffb4593c
NT
1733/*
1734 * S_skipspace
1735 * Called to gobble the appropriate amount and type of whitespace.
1736 * Skips comments as well.
1737 */
1738
76e3520e 1739STATIC char *
cea2e8a9 1740S_skipspace(pTHX_ register char *s)
a687059c 1741{
5db06880 1742#ifdef PERL_MAD
f0e67a1d
Z
1743 char *start = s;
1744#endif /* PERL_MAD */
7918f24d 1745 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1746#ifdef PERL_MAD
cd81e915
NC
1747 if (PL_skipwhite) {
1748 sv_free(PL_skipwhite);
f0e67a1d 1749 PL_skipwhite = NULL;
5db06880 1750 }
f0e67a1d 1751#endif /* PERL_MAD */
3280af22 1752 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1753 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1754 s++;
f0e67a1d
Z
1755 } else {
1756 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1757 PL_bufptr = s;
f0998909
Z
1758 lex_read_space(LEX_KEEP_PREVIOUS |
1759 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1760 LEX_NO_NEXT_CHUNK : 0));
3280af22 1761 s = PL_bufptr;
f0e67a1d
Z
1762 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1763 if (PL_linestart > PL_bufptr)
1764 PL_bufptr = PL_linestart;
1765 return s;
463ee0b2 1766 }
5db06880 1767#ifdef PERL_MAD
f0e67a1d
Z
1768 if (PL_madskills)
1769 PL_skipwhite = newSVpvn(start, s-start);
1770#endif /* PERL_MAD */
5db06880 1771 return s;
a687059c 1772}
378cc40b 1773
ffb4593c
NT
1774/*
1775 * S_check_uni
1776 * Check the unary operators to ensure there's no ambiguity in how they're
1777 * used. An ambiguous piece of code would be:
1778 * rand + 5
1779 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1780 * the +5 is its argument.
1781 */
1782
76e3520e 1783STATIC void
cea2e8a9 1784S_check_uni(pTHX)
ba106d47 1785{
97aff369 1786 dVAR;
d4c19fe8
AL
1787 const char *s;
1788 const char *t;
2f3197b3 1789
3280af22 1790 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1791 return;
3280af22
NIS
1792 while (isSPACE(*PL_last_uni))
1793 PL_last_uni++;
c35e046a
AL
1794 s = PL_last_uni;
1795 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1796 s++;
3280af22 1797 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1798 return;
6136c704 1799
9b387841
NC
1800 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1801 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1802 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1803}
1804
ffb4593c
NT
1805/*
1806 * LOP : macro to build a list operator. Its behaviour has been replaced
1807 * with a subroutine, S_lop() for which LOP is just another name.
1808 */
1809
a0d0e21e
LW
1810#define LOP(f,x) return lop(f,x,s)
1811
ffb4593c
NT
1812/*
1813 * S_lop
1814 * Build a list operator (or something that might be one). The rules:
1815 * - if we have a next token, then it's a list operator [why?]
1816 * - if the next thing is an opening paren, then it's a function
1817 * - else it's a list operator
1818 */
1819
76e3520e 1820STATIC I32
a0be28da 1821S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1822{
97aff369 1823 dVAR;
7918f24d
NC
1824
1825 PERL_ARGS_ASSERT_LOP;
1826
6154021b 1827 pl_yylval.ival = f;
35c8bce7 1828 CLINE;
3280af22
NIS
1829 PL_expect = x;
1830 PL_bufptr = s;
1831 PL_last_lop = PL_oldbufptr;
eb160463 1832 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1833#ifdef PERL_MAD
1834 if (PL_lasttoke)
78cdf107 1835 goto lstop;
5db06880 1836#else
3280af22 1837 if (PL_nexttoke)
78cdf107 1838 goto lstop;
5db06880 1839#endif
79072805 1840 if (*s == '(')
bbf60fe6 1841 return REPORT(FUNC);
29595ff2 1842 s = PEEKSPACE(s);
79072805 1843 if (*s == '(')
bbf60fe6 1844 return REPORT(FUNC);
78cdf107
Z
1845 else {
1846 lstop:
1847 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1848 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 1849 return REPORT(LSTOP);
78cdf107 1850 }
79072805
LW
1851}
1852
5db06880
NC
1853#ifdef PERL_MAD
1854 /*
1855 * S_start_force
1856 * Sets up for an eventual force_next(). start_force(0) basically does
1857 * an unshift, while start_force(-1) does a push. yylex removes items
1858 * on the "pop" end.
1859 */
1860
1861STATIC void
1862S_start_force(pTHX_ int where)
1863{
1864 int i;
1865
cd81e915 1866 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1867 where = PL_lasttoke;
cd81e915
NC
1868 assert(PL_curforce < 0 || PL_curforce == where);
1869 if (PL_curforce != where) {
5db06880
NC
1870 for (i = PL_lasttoke; i > where; --i) {
1871 PL_nexttoke[i] = PL_nexttoke[i-1];
1872 }
1873 PL_lasttoke++;
1874 }
cd81e915 1875 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1876 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1877 PL_curforce = where;
1878 if (PL_nextwhite) {
5db06880 1879 if (PL_madskills)
6b29d1f5 1880 curmad('^', newSVpvs(""));
cd81e915 1881 CURMAD('_', PL_nextwhite);
5db06880
NC
1882 }
1883}
1884
1885STATIC void
1886S_curmad(pTHX_ char slot, SV *sv)
1887{
1888 MADPROP **where;
1889
1890 if (!sv)
1891 return;
cd81e915
NC
1892 if (PL_curforce < 0)
1893 where = &PL_thismad;
5db06880 1894 else
cd81e915 1895 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1896
cd81e915 1897 if (PL_faketokens)
76f68e9b 1898 sv_setpvs(sv, "");
5db06880
NC
1899 else {
1900 if (!IN_BYTES) {
1901 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1902 SvUTF8_on(sv);
1903 else if (PL_encoding) {
1904 sv_recode_to_utf8(sv, PL_encoding);
1905 }
1906 }
1907 }
1908
1909 /* keep a slot open for the head of the list? */
1910 if (slot != '_' && *where && (*where)->mad_key == '^') {
1911 (*where)->mad_key = slot;
daba3364 1912 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1913 (*where)->mad_val = (void*)sv;
1914 }
1915 else
1916 addmad(newMADsv(slot, sv), where, 0);
1917}
1918#else
b3f24c00
MHM
1919# define start_force(where) NOOP
1920# define curmad(slot, sv) NOOP
5db06880
NC
1921#endif
1922
ffb4593c
NT
1923/*
1924 * S_force_next
9cbb5ea2 1925 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1926 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1927 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1928 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1929 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1930 */
1931
4e553d73 1932STATIC void
cea2e8a9 1933S_force_next(pTHX_ I32 type)
79072805 1934{
97aff369 1935 dVAR;
704d4215
GG
1936#ifdef DEBUGGING
1937 if (DEBUG_T_TEST) {
1938 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1939 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1940 }
1941#endif
5db06880 1942#ifdef PERL_MAD
cd81e915 1943 if (PL_curforce < 0)
5db06880 1944 start_force(PL_lasttoke);
cd81e915 1945 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1946 if (PL_lex_state != LEX_KNOWNEXT)
1947 PL_lex_defer = PL_lex_state;
1948 PL_lex_state = LEX_KNOWNEXT;
1949 PL_lex_expect = PL_expect;
cd81e915 1950 PL_curforce = -1;
5db06880 1951#else
3280af22
NIS
1952 PL_nexttype[PL_nexttoke] = type;
1953 PL_nexttoke++;
1954 if (PL_lex_state != LEX_KNOWNEXT) {
1955 PL_lex_defer = PL_lex_state;
1956 PL_lex_expect = PL_expect;
1957 PL_lex_state = LEX_KNOWNEXT;
79072805 1958 }
5db06880 1959#endif
79072805
LW
1960}
1961
28ac2b49
Z
1962void
1963Perl_yyunlex(pTHX)
1964{
a7aaec61
Z
1965 int yyc = PL_parser->yychar;
1966 if (yyc != YYEMPTY) {
1967 if (yyc) {
1968 start_force(-1);
1969 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1970 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
78cdf107 1971 PL_lex_allbrackets--;
a7aaec61 1972 PL_lex_brackets--;
78cdf107
Z
1973 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1974 } else if (yyc == '('/*)*/) {
1975 PL_lex_allbrackets--;
1976 yyc |= (2<<24);
a7aaec61
Z
1977 }
1978 force_next(yyc);
1979 }
28ac2b49
Z
1980 PL_parser->yychar = YYEMPTY;
1981 }
1982}
1983
d0a148a6 1984STATIC SV *
15f169a1 1985S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1986{
97aff369 1987 dVAR;
740cce10 1988 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
1989 !IN_BYTES
1990 && UTF
1991 && !is_ascii_string((const U8*)start, len)
740cce10 1992 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1993 return sv;
1994}
1995
ffb4593c
NT
1996/*
1997 * S_force_word
1998 * When the lexer knows the next thing is a word (for instance, it has
1999 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
2000 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2001 * lookahead.
ffb4593c
NT
2002 *
2003 * Arguments:
b1b65b59 2004 * char *start : buffer position (must be within PL_linestr)
02b34bbe 2005 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
2006 * int check_keyword : if true, Perl checks to make sure the word isn't
2007 * a keyword (do this if the word is a label, e.g. goto FOO)
2008 * int allow_pack : if true, : characters will also be allowed (require,
2009 * use, etc. do this)
9cbb5ea2 2010 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
2011 */
2012
76e3520e 2013STATIC char *
cea2e8a9 2014S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 2015{
97aff369 2016 dVAR;
463ee0b2
LW
2017 register char *s;
2018 STRLEN len;
4e553d73 2019
7918f24d
NC
2020 PERL_ARGS_ASSERT_FORCE_WORD;
2021
29595ff2 2022 start = SKIPSPACE1(start);
463ee0b2 2023 s = start;
7e2040f0 2024 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 2025 (allow_pack && *s == ':') ||
15f0808c 2026 (allow_initial_tick && *s == '\'') )
a0d0e21e 2027 {
3280af22 2028 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 2029 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 2030 return start;
cd81e915 2031 start_force(PL_curforce);
5db06880
NC
2032 if (PL_madskills)
2033 curmad('X', newSVpvn(start,s-start));
463ee0b2 2034 if (token == METHOD) {
29595ff2 2035 s = SKIPSPACE1(s);
463ee0b2 2036 if (*s == '(')
3280af22 2037 PL_expect = XTERM;
463ee0b2 2038 else {
3280af22 2039 PL_expect = XOPERATOR;
463ee0b2 2040 }
79072805 2041 }
e74e6b3d 2042 if (PL_madskills)
63575281 2043 curmad('g', newSVpvs( "forced" ));
9ded7720 2044 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
2045 = (OP*)newSVOP(OP_CONST,0,
2046 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 2047 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
2048 force_next(token);
2049 }
2050 return s;
2051}
2052
ffb4593c
NT
2053/*
2054 * S_force_ident
9cbb5ea2 2055 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2056 * text only contains the "foo" portion. The first argument is a pointer
2057 * to the "foo", and the second argument is the type symbol to prefix.
2058 * Forces the next token to be a "WORD".
9cbb5ea2 2059 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2060 */
2061
76e3520e 2062STATIC void
bfed75c6 2063S_force_ident(pTHX_ register const char *s, int kind)
79072805 2064{
97aff369 2065 dVAR;
7918f24d
NC
2066
2067 PERL_ARGS_ASSERT_FORCE_IDENT;
2068
c35e046a 2069 if (*s) {
90e5519e
NC
2070 const STRLEN len = strlen(s);
2071 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2072 start_force(PL_curforce);
9ded7720 2073 NEXTVAL_NEXTTOKE.opval = o;
79072805 2074 force_next(WORD);
748a9306 2075 if (kind) {
11343788 2076 o->op_private = OPpCONST_ENTERED;
55497cff 2077 /* XXX see note in pp_entereval() for why we forgo typo
2078 warnings if the symbol must be introduced in an eval.
2079 GSAR 96-10-12 */
90e5519e
NC
2080 gv_fetchpvn_flags(s, len,
2081 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2082 : GV_ADD,
2083 kind == '$' ? SVt_PV :
2084 kind == '@' ? SVt_PVAV :
2085 kind == '%' ? SVt_PVHV :
a0d0e21e 2086 SVt_PVGV
90e5519e 2087 );
748a9306 2088 }
79072805
LW
2089 }
2090}
2091
1571675a
GS
2092NV
2093Perl_str_to_version(pTHX_ SV *sv)
2094{
2095 NV retval = 0.0;
2096 NV nshift = 1.0;
2097 STRLEN len;
cfd0369c 2098 const char *start = SvPV_const(sv,len);
9d4ba2ae 2099 const char * const end = start + len;
504618e9 2100 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2101
2102 PERL_ARGS_ASSERT_STR_TO_VERSION;
2103
1571675a 2104 while (start < end) {
ba210ebe 2105 STRLEN skip;
1571675a
GS
2106 UV n;
2107 if (utf)
9041c2e3 2108 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2109 else {
2110 n = *(U8*)start;
2111 skip = 1;
2112 }
2113 retval += ((NV)n)/nshift;
2114 start += skip;
2115 nshift *= 1000;
2116 }
2117 return retval;
2118}
2119
4e553d73 2120/*
ffb4593c
NT
2121 * S_force_version
2122 * Forces the next token to be a version number.
e759cc13
RGS
2123 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2124 * and if "guessing" is TRUE, then no new token is created (and the caller
2125 * must use an alternative parsing method).
ffb4593c
NT
2126 */
2127
76e3520e 2128STATIC char *
e759cc13 2129S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2130{
97aff369 2131 dVAR;
5f66b61c 2132 OP *version = NULL;
44dcb63b 2133 char *d;
5db06880
NC
2134#ifdef PERL_MAD
2135 I32 startoff = s - SvPVX(PL_linestr);
2136#endif
89bfa8cd 2137
7918f24d
NC
2138 PERL_ARGS_ASSERT_FORCE_VERSION;
2139
29595ff2 2140 s = SKIPSPACE1(s);
89bfa8cd 2141
44dcb63b 2142 d = s;
dd629d5b 2143 if (*d == 'v')
44dcb63b 2144 d++;
44dcb63b 2145 if (isDIGIT(*d)) {
e759cc13
RGS
2146 while (isDIGIT(*d) || *d == '_' || *d == '.')
2147 d++;
5db06880
NC
2148#ifdef PERL_MAD
2149 if (PL_madskills) {
cd81e915 2150 start_force(PL_curforce);
5db06880
NC
2151 curmad('X', newSVpvn(s,d-s));
2152 }
2153#endif
4e4da3ac 2154 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2155 SV *ver;
8d08d9ba
DG
2156#ifdef USE_LOCALE_NUMERIC
2157 char *loc = setlocale(LC_NUMERIC, "C");
2158#endif
6154021b 2159 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2160#ifdef USE_LOCALE_NUMERIC
2161 setlocale(LC_NUMERIC, loc);
2162#endif
6154021b 2163 version = pl_yylval.opval;
dd629d5b
GS
2164 ver = cSVOPx(version)->op_sv;
2165 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2166 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2167 SvNV_set(ver, str_to_version(ver));
1571675a 2168 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2169 }
89bfa8cd 2170 }
5db06880
NC
2171 else if (guessing) {
2172#ifdef PERL_MAD
2173 if (PL_madskills) {
cd81e915
NC
2174 sv_free(PL_nextwhite); /* let next token collect whitespace */
2175 PL_nextwhite = 0;
5db06880
NC
2176 s = SvPVX(PL_linestr) + startoff;
2177 }
2178#endif
e759cc13 2179 return s;
5db06880 2180 }
89bfa8cd 2181 }
2182
5db06880
NC
2183#ifdef PERL_MAD
2184 if (PL_madskills && !version) {
cd81e915
NC
2185 sv_free(PL_nextwhite); /* let next token collect whitespace */
2186 PL_nextwhite = 0;
5db06880
NC
2187 s = SvPVX(PL_linestr) + startoff;
2188 }
2189#endif
89bfa8cd 2190 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2191 start_force(PL_curforce);
9ded7720 2192 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2193 force_next(WORD);
89bfa8cd 2194
e759cc13 2195 return s;
89bfa8cd 2196}
2197
ffb4593c 2198/*
91152fc1
DG
2199 * S_force_strict_version
2200 * Forces the next token to be a version number using strict syntax rules.
2201 */
2202
2203STATIC char *
2204S_force_strict_version(pTHX_ char *s)
2205{
2206 dVAR;
2207 OP *version = NULL;
2208#ifdef PERL_MAD
2209 I32 startoff = s - SvPVX(PL_linestr);
2210#endif
2211 const char *errstr = NULL;
2212
2213 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2214
2215 while (isSPACE(*s)) /* leading whitespace */
2216 s++;
2217
2218 if (is_STRICT_VERSION(s,&errstr)) {
2219 SV *ver = newSV(0);
2220 s = (char *)scan_version(s, ver, 0);
2221 version = newSVOP(OP_CONST, 0, ver);
2222 }
4e4da3ac
Z
2223 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2224 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2225 {
91152fc1
DG
2226 PL_bufptr = s;
2227 if (errstr)
2228 yyerror(errstr); /* version required */
2229 return s;
2230 }
2231
2232#ifdef PERL_MAD
2233 if (PL_madskills && !version) {
2234 sv_free(PL_nextwhite); /* let next token collect whitespace */
2235 PL_nextwhite = 0;
2236 s = SvPVX(PL_linestr) + startoff;
2237 }
2238#endif
2239 /* NOTE: The parser sees the package name and the VERSION swapped */
2240 start_force(PL_curforce);
2241 NEXTVAL_NEXTTOKE.opval = version;
2242 force_next(WORD);
2243
2244 return s;
2245}
2246
2247/*
ffb4593c
NT
2248 * S_tokeq
2249 * Tokenize a quoted string passed in as an SV. It finds the next
2250 * chunk, up to end of string or a backslash. It may make a new
2251 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2252 * turns \\ into \.
2253 */
2254
76e3520e 2255STATIC SV *
cea2e8a9 2256S_tokeq(pTHX_ SV *sv)
79072805 2257{
97aff369 2258 dVAR;
79072805
LW
2259 register char *s;
2260 register char *send;
2261 register char *d;
b3ac6de7
IZ
2262 STRLEN len = 0;
2263 SV *pv = sv;
79072805 2264
7918f24d
NC
2265 PERL_ARGS_ASSERT_TOKEQ;
2266
79072805 2267 if (!SvLEN(sv))
b3ac6de7 2268 goto finish;
79072805 2269
a0d0e21e 2270 s = SvPV_force(sv, len);
21a311ee 2271 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2272 goto finish;
463ee0b2 2273 send = s + len;
dcb21ed6
NC
2274 /* This is relying on the SV being "well formed" with a trailing '\0' */
2275 while (s < send && !(*s == '\\' && s[1] == '\\'))
79072805
LW
2276 s++;
2277 if (s == send)
b3ac6de7 2278 goto finish;
79072805 2279 d = s;
be4731d2 2280 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2281 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2282 }
79072805
LW
2283 while (s < send) {
2284 if (*s == '\\') {
a0d0e21e 2285 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2286 s++; /* all that, just for this */
2287 }
2288 *d++ = *s++;
2289 }
2290 *d = '\0';
95a20fc0 2291 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2292 finish:
3280af22 2293 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2294 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2295 return sv;
2296}
2297
ffb4593c
NT
2298/*
2299 * Now come three functions related to double-quote context,
2300 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2301 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2302 * interact with PL_lex_state, and create fake ( ... ) argument lists
2303 * to handle functions and concatenation.
2304 * They assume that whoever calls them will be setting up a fake
2305 * join call, because each subthing puts a ',' after it. This lets
2306 * "lower \luPpEr"
2307 * become
2308 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2309 *
2310 * (I'm not sure whether the spurious commas at the end of lcfirst's
2311 * arguments and join's arguments are created or not).
2312 */
2313
2314/*
2315 * S_sublex_start
6154021b 2316 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2317 *
2318 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2319 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2320 *
2321 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2322 *
2323 * Everything else becomes a FUNC.
2324 *
2325 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2326 * had an OP_CONST or OP_READLINE). This just sets us up for a
2327 * call to S_sublex_push().
2328 */
2329
76e3520e 2330STATIC I32
cea2e8a9 2331S_sublex_start(pTHX)
79072805 2332{
97aff369 2333 dVAR;
6154021b 2334 register const I32 op_type = pl_yylval.ival;
79072805
LW
2335
2336 if (op_type == OP_NULL) {
6154021b 2337 pl_yylval.opval = PL_lex_op;
5f66b61c 2338 PL_lex_op = NULL;
79072805
LW
2339 return THING;
2340 }
2341 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2342 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2343
2344 if (SvTYPE(sv) == SVt_PVIV) {
2345 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2346 STRLEN len;
96a5add6 2347 const char * const p = SvPV_const(sv, len);
740cce10 2348 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2349 SvREFCNT_dec(sv);
2350 sv = nsv;
4e553d73 2351 }
6154021b 2352 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2353 PL_lex_stuff = NULL;
6f33ba73
RGS
2354 /* Allow <FH> // "foo" */
2355 if (op_type == OP_READLINE)
2356 PL_expect = XTERMORDORDOR;
79072805
LW
2357 return THING;
2358 }
e3f73d4e
RGS
2359 else if (op_type == OP_BACKTICK && PL_lex_op) {
2360 /* readpipe() vas overriden */
2361 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2362 pl_yylval.opval = PL_lex_op;
9b201d7d 2363 PL_lex_op = NULL;
e3f73d4e
RGS
2364 PL_lex_stuff = NULL;
2365 return THING;
2366 }
79072805 2367
3280af22 2368 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2369 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2370 PL_sublex_info.sub_op = PL_lex_op;
2371 PL_lex_state = LEX_INTERPPUSH;
55497cff 2372
3280af22
NIS
2373 PL_expect = XTERM;
2374 if (PL_lex_op) {
6154021b 2375 pl_yylval.opval = PL_lex_op;
5f66b61c 2376 PL_lex_op = NULL;
55497cff 2377 return PMFUNC;
2378 }
2379 else
2380 return FUNC;
2381}
2382
ffb4593c
NT
2383/*
2384 * S_sublex_push
2385 * Create a new scope to save the lexing state. The scope will be
2386 * ended in S_sublex_done. Returns a '(', starting the function arguments
2387 * to the uc, lc, etc. found before.
2388 * Sets PL_lex_state to LEX_INTERPCONCAT.
2389 */
2390
76e3520e 2391STATIC I32
cea2e8a9 2392S_sublex_push(pTHX)
55497cff 2393{
27da23d5 2394 dVAR;
f46d017c 2395 ENTER;
55497cff 2396
3280af22 2397 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2398 SAVEBOOL(PL_lex_dojoin);
3280af22 2399 SAVEI32(PL_lex_brackets);
78cdf107
Z
2400 SAVEI32(PL_lex_allbrackets);
2401 SAVEI8(PL_lex_fakeeof);
3280af22
NIS
2402 SAVEI32(PL_lex_casemods);
2403 SAVEI32(PL_lex_starts);
651b5b28 2404 SAVEI8(PL_lex_state);
7766f137 2405 SAVEVPTR(PL_lex_inpat);
98246f1e 2406 SAVEI16(PL_lex_inwhat);
57843af0 2407 SAVECOPLINE(PL_curcop);
3280af22 2408 SAVEPPTR(PL_bufptr);
8452ff4b 2409 SAVEPPTR(PL_bufend);
3280af22
NIS
2410 SAVEPPTR(PL_oldbufptr);
2411 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2412 SAVEPPTR(PL_last_lop);
2413 SAVEPPTR(PL_last_uni);
3280af22
NIS
2414 SAVEPPTR(PL_linestart);
2415 SAVESPTR(PL_linestr);
8edd5f42
RGS
2416 SAVEGENERICPV(PL_lex_brackstack);
2417 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2418
2419 PL_linestr = PL_lex_stuff;
a0714e2c 2420 PL_lex_stuff = NULL;
3280af22 2421
9cbb5ea2
GS
2422 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2423 = SvPVX(PL_linestr);
3280af22 2424 PL_bufend += SvCUR(PL_linestr);
bd61b366 2425 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2426 SAVEFREESV(PL_linestr);
2427
2428 PL_lex_dojoin = FALSE;
2429 PL_lex_brackets = 0;
78cdf107
Z
2430 PL_lex_allbrackets = 0;
2431 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
a02a5408
JC
2432 Newx(PL_lex_brackstack, 120, char);
2433 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2434 PL_lex_casemods = 0;
2435 *PL_lex_casestack = '\0';
2436 PL_lex_starts = 0;
2437 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2438 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2439
2440 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
bb16bae8 2441 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
3280af22
NIS
2442 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2443 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2444 else
5f66b61c 2445 PL_lex_inpat = NULL;
79072805 2446
55497cff 2447 return '(';
79072805
LW
2448}
2449
ffb4593c
NT
2450/*
2451 * S_sublex_done
2452 * Restores lexer state after a S_sublex_push.
2453 */
2454
76e3520e 2455STATIC I32
cea2e8a9 2456S_sublex_done(pTHX)
79072805 2457{
27da23d5 2458 dVAR;
3280af22 2459 if (!PL_lex_starts++) {
396482e1 2460 SV * const sv = newSVpvs("");
9aa983d2
JH
2461 if (SvUTF8(PL_linestr))
2462 SvUTF8_on(sv);
3280af22 2463 PL_expect = XOPERATOR;
6154021b 2464 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2465 return THING;
2466 }
2467
3280af22
NIS
2468 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2469 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2470 return yylex();
79072805
LW
2471 }
2472
ffb4593c 2473 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
bb16bae8 2474 assert(PL_lex_inwhat != OP_TRANSR);
3280af22
NIS
2475 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2476 PL_linestr = PL_lex_repl;
2477 PL_lex_inpat = 0;
2478 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2479 PL_bufend += SvCUR(PL_linestr);
bd61b366 2480 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2481 SAVEFREESV(PL_linestr);
2482 PL_lex_dojoin = FALSE;
2483 PL_lex_brackets = 0;
78cdf107
Z
2484 PL_lex_allbrackets = 0;
2485 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
3280af22
NIS
2486 PL_lex_casemods = 0;
2487 *PL_lex_casestack = '\0';
2488 PL_lex_starts = 0;
25da4f38 2489 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2490 PL_lex_state = LEX_INTERPNORMAL;
2491 PL_lex_starts++;
e9fa98b2
HS
2492 /* we don't clear PL_lex_repl here, so that we can check later
2493 whether this is an evalled subst; that means we rely on the
2494 logic to ensure sublex_done() is called again only via the
2495 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2496 }
e9fa98b2 2497 else {
3280af22 2498 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2499 PL_lex_repl = NULL;
e9fa98b2 2500 }
79072805 2501 return ',';
ffed7fef
LW
2502 }
2503 else {
5db06880
NC
2504#ifdef PERL_MAD
2505 if (PL_madskills) {
cd81e915
NC
2506 if (PL_thiswhite) {
2507 if (!PL_endwhite)
6b29d1f5 2508 PL_endwhite = newSVpvs("");
cd81e915
NC
2509 sv_catsv(PL_endwhite, PL_thiswhite);
2510 PL_thiswhite = 0;
2511 }
2512 if (PL_thistoken)
76f68e9b 2513 sv_setpvs(PL_thistoken,"");
5db06880 2514 else
cd81e915 2515 PL_realtokenstart = -1;
5db06880
NC
2516 }
2517#endif
f46d017c 2518 LEAVE;
3280af22
NIS
2519 PL_bufend = SvPVX(PL_linestr);
2520 PL_bufend += SvCUR(PL_linestr);
2521 PL_expect = XOPERATOR;
09bef843 2522 PL_sublex_info.sub_inwhat = 0;
79072805 2523 return ')';
ffed7fef
LW
2524 }
2525}
2526
02aa26ce
NT
2527/*
2528 scan_const
2529
2530 Extracts a pattern, double-quoted string, or transliteration. This
2531 is terrifying code.
2532
94def140 2533 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2534 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2535 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2536
94def140
TS
2537 Returns a pointer to the character scanned up to. If this is
2538 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2539 successfully parsed), will leave an OP for the substring scanned
6154021b 2540 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2541 by looking at the next characters herself.
2542
02aa26ce
NT
2543 In patterns:
2544 backslashes:
ff3f963a 2545 constants: \N{NAME} only
02aa26ce
NT
2546 case and quoting: \U \Q \E
2547 stops on @ and $, but not for $ as tail anchor
2548
2549 In transliterations:
2550 characters are VERY literal, except for - not at the start or end
94def140
TS
2551 of the string, which indicates a range. If the range is in bytes,
2552 scan_const expands the range to the full set of intermediate
2553 characters. If the range is in utf8, the hyphen is replaced with
2554 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2555
2556 In double-quoted strings:
2557 backslashes:
2558 double-quoted style: \r and \n
ff3f963a 2559 constants: \x31, etc.
94def140 2560 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2561 case and quoting: \U \Q \E
2562 stops on @ and $
2563
2564 scan_const does *not* construct ops to handle interpolated strings.
2565 It stops processing as soon as it finds an embedded $ or @ variable
2566 and leaves it to the caller to work out what's going on.
2567
94def140
TS
2568 embedded arrays (whether in pattern or not) could be:
2569 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2570
2571 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2572
2573 $ in pattern could be $foo or could be tail anchor. Assumption:
2574 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2575 followed by one of "()| \r\n\t"
02aa26ce
NT
2576
2577 \1 (backreferences) are turned into $1
2578
2579 The structure of the code is
2580 while (there's a character to process) {
94def140
TS
2581 handle transliteration ranges
2582 skip regexp comments /(?#comment)/ and codes /(?{code})/
2583 skip #-initiated comments in //x patterns
2584 check for embedded arrays
02aa26ce
NT
2585 check for embedded scalars
2586 if (backslash) {
94def140 2587 deprecate \1 in substitution replacements
02aa26ce
NT
2588 handle string-changing backslashes \l \U \Q \E, etc.
2589 switch (what was escaped) {
94def140 2590 handle \- in a transliteration (becomes a literal -)
ff3f963a 2591 if a pattern and not \N{, go treat as regular character
94def140
TS
2592 handle \132 (octal characters)
2593 handle \x15 and \x{1234} (hex characters)
ff3f963a 2594 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2595 handle \cV (control characters)
2596 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2597 } (end switch)
77a135fe 2598 continue
02aa26ce 2599 } (end if backslash)
77a135fe 2600 handle regular character
02aa26ce 2601 } (end while character to read)
4e553d73 2602
02aa26ce
NT
2603*/
2604
76e3520e 2605STATIC char *
cea2e8a9 2606S_scan_const(pTHX_ char *start)
79072805 2607{
97aff369 2608 dVAR;
3280af22 2609 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2610 SV *sv = newSV(send - start); /* sv for the constant. See
2611 note below on sizing. */
02aa26ce
NT
2612 register char *s = start; /* start of the constant */
2613 register char *d = SvPVX(sv); /* destination for copies */
2614 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2615 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0 2616 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
77a135fe
KW
2617 I32 this_utf8 = UTF; /* Is the source string assumed
2618 to be UTF8? But, this can
2619 show as true when the source
2620 isn't utf8, as for example
2621 when it is entirely composed
2622 of hex constants */
2623
2624 /* Note on sizing: The scanned constant is placed into sv, which is
2625 * initialized by newSV() assuming one byte of output for every byte of
2626 * input. This routine expects newSV() to allocate an extra byte for a
2627 * trailing NUL, which this routine will append if it gets to the end of
2628 * the input. There may be more bytes of input than output (eg., \N{LATIN
2629 * CAPITAL LETTER A}), or more output than input if the constant ends up
2630 * recoded to utf8, but each time a construct is found that might increase
2631 * the needed size, SvGROW() is called. Its size parameter each time is
2632 * based on the best guess estimate at the time, namely the length used so
2633 * far, plus the length the current construct will occupy, plus room for
2634 * the trailing NUL, plus one byte for every input byte still unscanned */
2635
012bcf8d 2636 UV uv;
4c3a8340
TS
2637#ifdef EBCDIC
2638 UV literal_endpoint = 0;
e294cc5d 2639 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2640#endif
012bcf8d 2641
7918f24d
NC
2642 PERL_ARGS_ASSERT_SCAN_CONST;
2643
bb16bae8 2644 assert(PL_lex_inwhat != OP_TRANSR);
2b9d42f0
NIS
2645 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2646 /* If we are doing a trans and we know we want UTF8 set expectation */
2647 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2648 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2649 }
2650
2651
79072805 2652 while (s < send || dorange) {
ff3f963a 2653
02aa26ce 2654 /* get transliterations out of the way (they're most literal) */
3280af22 2655 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2656 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2657 if (dorange) {
1ba5c669
JH
2658 I32 i; /* current expanded character */
2659 I32 min; /* first character in range */
2660 I32 max; /* last character in range */
02aa26ce 2661
e294cc5d
JH
2662#ifdef EBCDIC
2663 UV uvmax = 0;
2664#endif
2665
2666 if (has_utf8
2667#ifdef EBCDIC
2668 && !native_range
2669#endif
2670 ) {
9d4ba2ae 2671 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2672 char *e = d++;
2673 while (e-- > c)
2674 *(e + 1) = *e;
25716404 2675 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2676 /* mark the range as done, and continue */
2677 dorange = FALSE;
2678 didrange = TRUE;
2679 continue;
2680 }
2b9d42f0 2681
95a20fc0 2682 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2683#ifdef EBCDIC
2684 SvGROW(sv,
2685 SvLEN(sv) + (has_utf8 ?
2686 (512 - UTF_CONTINUATION_MARK +
2687 UNISKIP(0x100))
2688 : 256));
2689 /* How many two-byte within 0..255: 128 in UTF-8,
2690 * 96 in UTF-8-mod. */
2691#else
9cbb5ea2 2692 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2693#endif
9cbb5ea2 2694 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2695#ifdef EBCDIC
2696 if (has_utf8) {
2697 int j;
2698 for (j = 0; j <= 1; j++) {
2699 char * const c = (char*)utf8_hop((U8*)d, -1);
2700 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2701 if (j)
2702 min = (U8)uv;
2703 else if (uv < 256)
2704 max = (U8)uv;
2705 else {
2706 max = (U8)0xff; /* only to \xff */
2707 uvmax = uv; /* \x{100} to uvmax */
2708 }
2709 d = c; /* eat endpoint chars */
2710 }
2711 }
2712 else {
2713#endif
2714 d -= 2; /* eat the first char and the - */
2715 min = (U8)*d; /* first char in range */
2716 max = (U8)d[1]; /* last char in range */
2717#ifdef EBCDIC
2718 }
2719#endif
8ada0baa 2720
c2e66d9e 2721 if (min > max) {
01ec43d0 2722 Perl_croak(aTHX_
d1573ac7 2723 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2724 (char)min, (char)max);
c2e66d9e
GS
2725 }
2726
c7f1f016 2727#ifdef EBCDIC
4c3a8340
TS
2728 if (literal_endpoint == 2 &&
2729 ((isLOWER(min) && isLOWER(max)) ||
2730 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2731 if (isLOWER(min)) {
2732 for (i = min; i <= max; i++)
2733 if (isLOWER(i))
db42d148 2734 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2735 } else {
2736 for (i = min; i <= max; i++)
2737 if (isUPPER(i))
db42d148 2738 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2739 }
2740 }
2741 else
2742#endif
2743 for (i = min; i <= max; i++)
e294cc5d
JH
2744#ifdef EBCDIC
2745 if (has_utf8) {
2746 const U8 ch = (U8)NATIVE_TO_UTF(i);
2747 if (UNI_IS_INVARIANT(ch))
2748 *d++ = (U8)i;
2749 else {
2750 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2751 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2752 }
2753 }
2754 else
2755#endif
2756 *d++ = (char)i;
2757
2758#ifdef EBCDIC
2759 if (uvmax) {
2760 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2761 if (uvmax > 0x101)
2762 *d++ = (char)UTF_TO_NATIVE(0xff);
2763 if (uvmax > 0x100)
2764 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2765 }
2766#endif
02aa26ce
NT
2767
2768 /* mark the range as done, and continue */
79072805 2769 dorange = FALSE;
01ec43d0 2770 didrange = TRUE;
4c3a8340
TS
2771#ifdef EBCDIC
2772 literal_endpoint = 0;
2773#endif
79072805 2774 continue;
4e553d73 2775 }
02aa26ce
NT
2776
2777 /* range begins (ignore - as first or last char) */
79072805 2778 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2779 if (didrange) {
1fafa243 2780 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2781 }
e294cc5d
JH
2782 if (has_utf8
2783#ifdef EBCDIC
2784 && !native_range
2785#endif
2786 ) {
25716404 2787 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2788 s++;
2789 continue;
2790 }
79072805
LW
2791 dorange = TRUE;
2792 s++;
01ec43d0
GS
2793 }
2794 else {
2795 didrange = FALSE;
4c3a8340
TS
2796#ifdef EBCDIC
2797 literal_endpoint = 0;
e294cc5d 2798 native_range = TRUE;
4c3a8340 2799#endif
01ec43d0 2800 }
79072805 2801 }
02aa26ce
NT
2802
2803 /* if we get here, we're not doing a transliteration */
2804
0f5d15d6
IZ
2805 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2806 except for the last char, which will be done separately. */
3280af22 2807 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2808 if (s[2] == '#') {
e994fd66 2809 while (s+1 < send && *s != ')')
db42d148 2810 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2811 }
2812 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2813 || (s[2] == '?' && s[3] == '{'))
155aba94 2814 {
cc6b7395 2815 I32 count = 1;
0f5d15d6 2816 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2817 char c;
2818
d9f97599
GS
2819 while (count && (c = *regparse)) {
2820 if (c == '\\' && regparse[1])
2821 regparse++;
4e553d73 2822 else if (c == '{')
cc6b7395 2823 count++;
4e553d73 2824 else if (c == '}')
cc6b7395 2825 count--;
d9f97599 2826 regparse++;
cc6b7395 2827 }
e994fd66 2828 if (*regparse != ')')
5bdf89e7 2829 regparse--; /* Leave one char for continuation. */
0f5d15d6 2830 while (s < regparse)
db42d148 2831 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2832 }
748a9306 2833 }
02aa26ce
NT
2834
2835 /* likewise skip #-initiated comments in //x patterns */
3280af22 2836 else if (*s == '#' && PL_lex_inpat &&
73134a2e 2837 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
748a9306 2838 while (s+1 < send && *s != '\n')
db42d148 2839 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2840 }
02aa26ce 2841
5d1d4326 2842 /* check for embedded arrays
da6eedaa 2843 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2844 */
1749ea0d
TS
2845 else if (*s == '@' && s[1]) {
2846 if (isALNUM_lazy_if(s+1,UTF))
2847 break;
2848 if (strchr(":'{$", s[1]))
2849 break;
2850 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2851 break; /* in regexp, neither @+ nor @- are interpolated */
2852 }
02aa26ce
NT
2853
2854 /* check for embedded scalars. only stop if we're sure it's a
2855 variable.
2856 */
79072805 2857 else if (*s == '$') {
3280af22 2858 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2859 break;
77772344 2860 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2861 if (s[1] == '\\') {
2862 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2863 "Possible unintended interpolation of $\\ in regex");
77772344 2864 }
79072805 2865 break; /* in regexp, $ might be tail anchor */
77772344 2866 }
79072805 2867 }
02aa26ce 2868
2b9d42f0
NIS
2869 /* End of else if chain - OP_TRANS rejoin rest */
2870
02aa26ce 2871 /* backslashes */
79072805 2872 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2873 char* e; /* Can be used for ending '}', etc. */
2874
79072805 2875 s++;
02aa26ce 2876
7d0fc23c
KW
2877 /* warn on \1 - \9 in substitution replacements, but note that \11
2878 * is an octal; and \19 is \1 followed by '9' */
3280af22 2879 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2880 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2881 {
a2a5de95 2882 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2883 *--s = '$';
2884 break;
2885 }
02aa26ce
NT
2886
2887 /* string-change backslash escapes */
3280af22 2888 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2889 --s;
2890 break;
2891 }
ff3f963a
KW
2892 /* In a pattern, process \N, but skip any other backslash escapes.
2893 * This is because we don't want to translate an escape sequence
2894 * into a meta symbol and have the regex compiler use the meta
2895 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2896 * in spite of this, we do have to process \N here while the proper
2897 * charnames handler is in scope. See bugs #56444 and #62056.
2898 * There is a complication because \N in a pattern may also stand
2899 * for 'match a non-nl', and not mean a charname, in which case its
2900 * processing should be deferred to the regex compiler. To be a
2901 * charname it must be followed immediately by a '{', and not look
2902 * like \N followed by a curly quantifier, i.e., not something like
2903 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2904 * quantifier */
2905 else if (PL_lex_inpat
2906 && (*s != 'N'
2907 || s[1] != '{'
2908 || regcurly(s + 1)))
2909 {
cc74c5bd
TS
2910 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2911 goto default_action;
2912 }
02aa26ce 2913
79072805 2914 switch (*s) {
02aa26ce
NT
2915
2916 /* quoted - in transliterations */
79072805 2917 case '-':
3280af22 2918 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2919 *d++ = *s++;
2920 continue;
2921 }
2922 /* FALL THROUGH */
2923 default:
11b8faa4 2924 {
a2a5de95
NC
2925 if ((isALPHA(*s) || isDIGIT(*s)))
2926 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2927 "Unrecognized escape \\%c passed through",
2928 *s);
11b8faa4 2929 /* default action is to copy the quoted character */
f9a63242 2930 goto default_action;
11b8faa4 2931 }
02aa26ce 2932
632403cc 2933 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
2934 case '0': case '1': case '2': case '3':
2935 case '4': case '5': case '6': case '7':
ba210ebe 2936 {
53305cf1
NC
2937 I32 flags = 0;
2938 STRLEN len = 3;
77a135fe 2939 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2940 s += len;
2941 }
012bcf8d 2942 goto NUM_ESCAPE_INSERT;
02aa26ce 2943
f0a2b745
KW
2944 /* eg. \o{24} indicates the octal constant \024 */
2945 case 'o':
2946 {
2947 STRLEN len;
454155d9 2948 const char* error;
f0a2b745 2949
454155d9 2950 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
f0a2b745 2951 s += len;
454155d9 2952 if (! valid) {
f0a2b745
KW
2953 yyerror(error);
2954 continue;
2955 }
2956 goto NUM_ESCAPE_INSERT;
2957 }
2958
77a135fe 2959 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2960 case 'x':
a0ed51b3
LW
2961 ++s;
2962 if (*s == '{') {
9d4ba2ae 2963 char* const e = strchr(s, '}');
a4c04bdc
NC
2964 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2965 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2966 STRLEN len;
355860ce 2967
53305cf1 2968 ++s;
adaeee49 2969 if (!e) {
a0ed51b3 2970 yyerror("Missing right brace on \\x{}");
355860ce 2971 continue;
ba210ebe 2972 }
53305cf1 2973 len = e - s;
77a135fe 2974 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2975 s = e + 1;
a0ed51b3
LW
2976 }
2977 else {
ba210ebe 2978 {
53305cf1 2979 STRLEN len = 2;
a4c04bdc 2980 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2981 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2982 s += len;
2983 }
012bcf8d
GS
2984 }
2985
2986 NUM_ESCAPE_INSERT:
ff3f963a
KW
2987 /* Insert oct or hex escaped character. There will always be
2988 * enough room in sv since such escapes will be longer than any
2989 * UTF-8 sequence they can end up as, except if they force us
2990 * to recode the rest of the string into utf8 */
ba7cea30 2991
77a135fe 2992 /* Here uv is the ordinal of the next character being added in
ff3f963a 2993 * unicode (converted from native). */
77a135fe 2994 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 2995 if (!has_utf8 && uv > 255) {
77a135fe
KW
2996 /* Might need to recode whatever we have accumulated so
2997 * far if it contains any chars variant in utf8 or
2998 * utf-ebcdic. */
2999
3000 SvCUR_set(sv, d - SvPVX_const(sv));
3001 SvPOK_on(sv);
3002 *d = '\0';
77a135fe 3003 /* See Note on sizing above. */
7bf79863
KW
3004 sv_utf8_upgrade_flags_grow(sv,
3005 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3006 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
3007 d = SvPVX(sv) + SvCUR(sv);
3008 has_utf8 = TRUE;
012bcf8d
GS
3009 }
3010
77a135fe
KW
3011 if (has_utf8) {
3012 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
3013 if (PL_lex_inwhat == OP_TRANS &&
3014 PL_sublex_info.sub_op) {
3015 PL_sublex_info.sub_op->op_private |=
3016 (PL_lex_repl ? OPpTRANS_FROM_UTF
3017 : OPpTRANS_TO_UTF);
f9a63242 3018 }
e294cc5d
JH
3019#ifdef EBCDIC
3020 if (uv > 255 && !dorange)
3021 native_range = FALSE;
3022#endif
012bcf8d 3023 }
a0ed51b3 3024 else {
012bcf8d 3025 *d++ = (char)uv;
a0ed51b3 3026 }
012bcf8d
GS
3027 }
3028 else {
c4d5f83a 3029 *d++ = (char) uv;
a0ed51b3 3030 }
79072805 3031 continue;
02aa26ce 3032
4a2d328f 3033 case 'N':
ff3f963a
KW
3034 /* In a non-pattern \N must be a named character, like \N{LATIN
3035 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3036 * mean to match a non-newline. For non-patterns, named
3037 * characters are converted to their string equivalents. In
3038 * patterns, named characters are not converted to their
3039 * ultimate forms for the same reasons that other escapes
3040 * aren't. Instead, they are converted to the \N{U+...} form
3041 * to get the value from the charnames that is in effect right
3042 * now, while preserving the fact that it was a named character
3043 * so that the regex compiler knows this */
3044
3045 /* This section of code doesn't generally use the
3046 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3047 * a close examination of this macro and determined it is a
3048 * no-op except on utfebcdic variant characters. Every
3049 * character generated by this that would normally need to be
3050 * enclosed by this macro is invariant, so the macro is not
7538f724
KW
3051 * needed, and would complicate use of copy(). XXX There are
3052 * other parts of this file where the macro is used
3053 * inconsistently, but are saved by it being a no-op */
ff3f963a
KW
3054
3055 /* The structure of this section of code (besides checking for
3056 * errors and upgrading to utf8) is:
3057 * Further disambiguate between the two meanings of \N, and if
3058 * not a charname, go process it elsewhere
0a96133f
KW
3059 * If of form \N{U+...}, pass it through if a pattern;
3060 * otherwise convert to utf8
3061 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3062 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3063
3064 /* Here, s points to the 'N'; the test below is guaranteed to
3065 * succeed if we are being called on a pattern as we already
3066 * know from a test above that the next character is a '{'.
3067 * On a non-pattern \N must mean 'named sequence, which
3068 * requires braces */
3069 s++;
3070 if (*s != '{') {
3071 yyerror("Missing braces on \\N{}");
3072 continue;
3073 }
3074 s++;
3075
0a96133f 3076 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3077 if (! (e = strchr(s, '}'))) {
3078 if (! PL_lex_inpat) {
5777a3f7 3079 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3080 } else {
3081 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3082 }
0a96133f 3083 continue;
ff3f963a 3084 }
cddc7ef4 3085
ff3f963a 3086 /* Here it looks like a named character */
cddc7ef4 3087
ff3f963a
KW
3088 if (PL_lex_inpat) {
3089
3090 /* XXX This block is temporary code. \N{} implies that the
3091 * pattern is to have Unicode semantics, and therefore
3092 * currently has to be encoded in utf8. By putting it in
3093 * utf8 now, we save a whole pass in the regular expression
3094 * compiler. Once that code is changed so Unicode
3095 * semantics doesn't necessarily have to be in utf8, this
3096 * block should be removed */
3097 if (!has_utf8) {
77a135fe 3098 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3099 SvPOK_on(sv);
e4f3eed8 3100 *d = '\0';
77a135fe 3101 /* See Note on sizing above. */
7bf79863 3102 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3103 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3104 /* 5 = '\N{' + cur char + NUL */
3105 (STRLEN)(send - s) + 5);
f08d6ad9 3106 d = SvPVX(sv) + SvCUR(sv);
89491803 3107 has_utf8 = TRUE;
ff3f963a
KW
3108 }
3109 }
423cee85 3110
ff3f963a
KW
3111 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3112 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3113 | PERL_SCAN_DISALLOW_PREFIX;
3114 STRLEN len;
3115
3116 /* For \N{U+...}, the '...' is a unicode value even on
3117 * EBCDIC machines */
3118 s += 2; /* Skip to next char after the 'U+' */
3119 len = e - s;
3120 uv = grok_hex(s, &len, &flags, NULL);
3121 if (len == 0 || len != (STRLEN)(e - s)) {
3122 yyerror("Invalid hexadecimal number in \\N{U+...}");
3123 s = e + 1;
3124 continue;
3125 }
3126
3127 if (PL_lex_inpat) {
3128
3129 /* Pass through to the regex compiler unchanged. The
3130 * reason we evaluated the number above is to make sure
0a96133f 3131 * there wasn't a syntax error. */
ff3f963a
KW
3132 s -= 5; /* Include the '\N{U+' */
3133 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3134 d += e - s + 1;
3135 }
3136 else { /* Not a pattern: convert the hex to string */
3137
3138 /* If destination is not in utf8, unconditionally
3139 * recode it to be so. This is because \N{} implies
3140 * Unicode semantics, and scalars have to be in utf8
3141 * to guarantee those semantics */
3142 if (! has_utf8) {
3143 SvCUR_set(sv, d - SvPVX_const(sv));
3144 SvPOK_on(sv);
3145 *d = '\0';
3146 /* See Note on sizing above. */
3147 sv_utf8_upgrade_flags_grow(
3148 sv,
3149 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3150 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3151 d = SvPVX(sv) + SvCUR(sv);
3152 has_utf8 = TRUE;
3153 }
3154
3155 /* Add the string to the output */
3156 if (UNI_IS_INVARIANT(uv)) {
3157 *d++ = (char) uv;
3158 }
3159 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3160 }
3161 }
3162 else { /* Here is \N{NAME} but not \N{U+...}. */
3163
3164 SV *res; /* result from charnames */
3165 const char *str; /* the string in 'res' */
3166 STRLEN len; /* its length */
3167
3168 /* Get the value for NAME */
3169 res = newSVpvn(s, e - s);
3170 res = new_constant( NULL, 0, "charnames",
3171 /* includes all of: \N{...} */
3172 res, NULL, s - 3, e - s + 4 );
3173
3174 /* Most likely res will be in utf8 already since the
3175 * standard charnames uses pack U, but a custom translator
3176 * can leave it otherwise, so make sure. XXX This can be
3177 * revisited to not have charnames use utf8 for characters
3178 * that don't need it when regexes don't have to be in utf8
3179 * for Unicode semantics. If doing so, remember EBCDIC */
3180 sv_utf8_upgrade(res);
3181 str = SvPV_const(res, len);
3182
3183 /* Don't accept malformed input */
3184 if (! is_utf8_string((U8 *) str, len)) {
3185 yyerror("Malformed UTF-8 returned by \\N");
3186 }
3187 else if (PL_lex_inpat) {
3188
3189 if (! len) { /* The name resolved to an empty string */
3190 Copy("\\N{}", d, 4, char);
3191 d += 4;
3192 }
3193 else {
3194 /* In order to not lose information for the regex
3195 * compiler, pass the result in the specially made
3196 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3197 * the code points in hex of each character
3198 * returned by charnames */
3199
3200 const char *str_end = str + len;
3201 STRLEN char_length; /* cur char's byte length */
3202 STRLEN output_length; /* and the number of bytes
3203 after this is translated
3204 into hex digits */
3205 const STRLEN off = d - SvPVX_const(sv);
3206
3207 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3208 * max('U+', '.'); and 1 for NUL */
3209 char hex_string[2 * UTF8_MAXBYTES + 5];
3210
3211 /* Get the first character of the result. */
3212 U32 uv = utf8n_to_uvuni((U8 *) str,
3213 len,
3214 &char_length,
3215 UTF8_ALLOW_ANYUV);
3216
3217 /* The call to is_utf8_string() above hopefully
3218 * guarantees that there won't be an error. But
3219 * it's easy here to make sure. The function just
3220 * above warns and returns 0 if invalid utf8, but
3221 * it can also return 0 if the input is validly a
3222 * NUL. Disambiguate */
3223 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3224 uv = UNICODE_REPLACEMENT;
3225 }
3226
3227 /* Convert first code point to hex, including the
3228 * boiler plate before it */
78c35590 3229 output_length =
3353de27
NC
3230 my_snprintf(hex_string, sizeof(hex_string),
3231 "\\N{U+%X", (unsigned int) uv);
ff3f963a
KW
3232
3233 /* Make sure there is enough space to hold it */
3234 d = off + SvGROW(sv, off
3235 + output_length
3236 + (STRLEN)(send - e)
3237 + 2); /* '}' + NUL */
3238 /* And output it */
3239 Copy(hex_string, d, output_length, char);
3240 d += output_length;
3241
3242 /* For each subsequent character, append dot and
3243 * its ordinal in hex */
3244 while ((str += char_length) < str_end) {
3245 const STRLEN off = d - SvPVX_const(sv);
3246 U32 uv = utf8n_to_uvuni((U8 *) str,
3247 str_end - str,
3248 &char_length,
3249 UTF8_ALLOW_ANYUV);
3250 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3251 uv = UNICODE_REPLACEMENT;
3252 }
3253
78c35590 3254 output_length =
3353de27
NC
3255 my_snprintf(hex_string, sizeof(hex_string),
3256 ".%X", (unsigned int) uv);
ff3f963a
KW
3257
3258 d = off + SvGROW(sv, off
3259 + output_length
3260 + (STRLEN)(send - e)
3261 + 2); /* '}' + NUL */
3262 Copy(hex_string, d, output_length, char);
3263 d += output_length;
3264 }
3265
3266 *d++ = '}'; /* Done. Add the trailing brace */
3267 }
3268 }
3269 else { /* Here, not in a pattern. Convert the name to a
3270 * string. */
3271
3272 /* If destination is not in utf8, unconditionally
3273 * recode it to be so. This is because \N{} implies
3274 * Unicode semantics, and scalars have to be in utf8
3275 * to guarantee those semantics */
3276 if (! has_utf8) {
3277 SvCUR_set(sv, d - SvPVX_const(sv));
3278 SvPOK_on(sv);
3279 *d = '\0';
3280 /* See Note on sizing above. */
3281 sv_utf8_upgrade_flags_grow(sv,
3282 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3283 len + (STRLEN)(send - s) + 1);
3284 d = SvPVX(sv) + SvCUR(sv);
3285 has_utf8 = TRUE;
3286 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3287
3288 /* See Note on sizing above. (NOTE: SvCUR() is not
3289 * set correctly here). */
3290 const STRLEN off = d - SvPVX_const(sv);
3291 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3292 }
3293 Copy(str, d, len, char);
3294 d += len;
423cee85 3295 }
423cee85 3296 SvREFCNT_dec(res);
cb233ae3
KW
3297
3298 /* Deprecate non-approved name syntax */
3299 if (ckWARN_d(WARN_DEPRECATED)) {
3300 bool problematic = FALSE;
3301 char* i = s;
3302
3303 /* For non-ut8 input, look to see that the first
3304 * character is an alpha, then loop through the rest
3305 * checking that each is a continuation */
3306 if (! this_utf8) {
3307 if (! isALPHAU(*i)) problematic = TRUE;
3308 else for (i = s + 1; i < e; i++) {
3309 if (isCHARNAME_CONT(*i)) continue;
3310 problematic = TRUE;
3311 break;
3312 }
3313 }
3314 else {
3315 /* Similarly for utf8. For invariants can check
3316 * directly. We accept anything above the latin1
3317 * range because it is immaterial to Perl if it is
3318 * correct or not, and is expensive to check. But
3319 * it is fairly easy in the latin1 range to convert
3320 * the variants into a single character and check
3321 * those */
3322 if (UTF8_IS_INVARIANT(*i)) {
3323 if (! isALPHAU(*i)) problematic = TRUE;
3324 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
81c14aa2 3325 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
cb233ae3
KW
3326 *(i+1)))))
3327 {
3328 problematic = TRUE;
3329 }
3330 }
3331 if (! problematic) for (i = s + UTF8SKIP(s);
3332 i < e;
3333 i+= UTF8SKIP(i))
3334 {
3335 if (UTF8_IS_INVARIANT(*i)) {
3336 if (isCHARNAME_CONT(*i)) continue;
3337 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3338 continue;
3339 } else if (isCHARNAME_CONT(
3340 UNI_TO_NATIVE(
81c14aa2 3341 TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
cb233ae3
KW
3342 {
3343 continue;
3344 }
3345 problematic = TRUE;
3346 break;
3347 }
3348 }
3349 if (problematic) {
6e1bad6c
KW
3350 /* The e-i passed to the final %.*s makes sure that
3351 * should the trailing NUL be missing that this
3352 * print won't run off the end of the string */
cb233ae3 3353 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3354 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3355 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3356 }
3357 }
3358 } /* End \N{NAME} */
ff3f963a
KW
3359#ifdef EBCDIC
3360 if (!dorange)
3361 native_range = FALSE; /* \N{} is defined to be Unicode */
3362#endif
3363 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3364 continue;
3365
02aa26ce 3366 /* \c is a control character */
79072805
LW
3367 case 'c':
3368 s++;
961ce445 3369 if (s < send) {
f9d13529 3370 *d++ = grok_bslash_c(*s++, 1);
ba210ebe 3371 }
961ce445
RGS
3372 else {
3373 yyerror("Missing control char name in \\c");
3374 }
79072805 3375 continue;
02aa26ce
NT
3376
3377 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3378 case 'b':
db42d148 3379 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3380 break;
3381 case 'n':
db42d148 3382 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3383 break;
3384 case 'r':
db42d148 3385 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3386 break;
3387 case 'f':
db42d148 3388 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3389 break;
3390 case 't':
db42d148 3391 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3392 break;
34a3fe2a 3393 case 'e':
db42d148 3394 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3395 break;
3396 case 'a':
db42d148 3397 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3398 break;
02aa26ce
NT
3399 } /* end switch */
3400
79072805
LW
3401 s++;
3402 continue;
02aa26ce 3403 } /* end if (backslash) */
4c3a8340
TS
3404#ifdef EBCDIC
3405 else
3406 literal_endpoint++;
3407#endif
02aa26ce 3408
f9a63242 3409 default_action:
77a135fe
KW
3410 /* If we started with encoded form, or already know we want it,
3411 then encode the next character */
3412 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3413 STRLEN len = 1;
77a135fe
KW
3414
3415
3416 /* One might think that it is wasted effort in the case of the
3417 * source being utf8 (this_utf8 == TRUE) to take the next character
3418 * in the source, convert it to an unsigned value, and then convert
3419 * it back again. But the source has not been validated here. The
3420 * routine that does the conversion checks for errors like
3421 * malformed utf8 */
3422
5f66b61c
AL
3423 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3424 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3425 if (!has_utf8) {
3426 SvCUR_set(sv, d - SvPVX_const(sv));
3427 SvPOK_on(sv);
3428 *d = '\0';
77a135fe 3429 /* See Note on sizing above. */
7bf79863
KW
3430 sv_utf8_upgrade_flags_grow(sv,
3431 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3432 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3433 d = SvPVX(sv) + SvCUR(sv);
3434 has_utf8 = TRUE;
3435 } else if (need > len) {
3436 /* encoded value larger than old, may need extra space (NOTE:
3437 * SvCUR() is not set correctly here). See Note on sizing
3438 * above. */
9d4ba2ae 3439 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3440 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3441 }
77a135fe
KW
3442 s += len;
3443
5f66b61c 3444 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3445#ifdef EBCDIC
3446 if (uv > 255 && !dorange)
3447 native_range = FALSE;
3448#endif
2b9d42f0
NIS
3449 }
3450 else {
3451 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3452 }
02aa26ce
NT
3453 } /* while loop to process each character */
3454
3455 /* terminate the string and set up the sv */
79072805 3456 *d = '\0';
95a20fc0 3457 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3458 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3459 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3460
79072805 3461 SvPOK_on(sv);
9f4817db 3462 if (PL_encoding && !has_utf8) {
d0063567
DK
3463 sv_recode_to_utf8(sv, PL_encoding);
3464 if (SvUTF8(sv))
3465 has_utf8 = TRUE;
9f4817db 3466 }
2b9d42f0 3467 if (has_utf8) {
7e2040f0 3468 SvUTF8_on(sv);
2b9d42f0 3469 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3470 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3471 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3472 }
3473 }
79072805 3474
02aa26ce 3475 /* shrink the sv if we allocated more than we used */
79072805 3476 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3477 SvPV_shrink_to_cur(sv);
79072805 3478 }
02aa26ce 3479
6154021b 3480 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3481 if (s > PL_bufptr) {
eb0d8d16
NC
3482 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3483 const char *const key = PL_lex_inpat ? "qr" : "q";
3484 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3485 const char *type;
3486 STRLEN typelen;
3487
3488 if (PL_lex_inwhat == OP_TRANS) {
3489 type = "tr";
3490 typelen = 2;
3491 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3492 type = "s";
3493 typelen = 1;
3494 } else {
3495 type = "qq";
3496 typelen = 2;
3497 }
3498
3499 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3500 type, typelen);
3501 }
6154021b 3502 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3503 } else
8990e307 3504 SvREFCNT_dec(sv);
79072805
LW
3505 return s;
3506}
3507
ffb4593c
NT
3508/* S_intuit_more
3509 * Returns TRUE if there's more to the expression (e.g., a subscript),
3510 * FALSE otherwise.
ffb4593c
NT
3511 *
3512 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3513 *
3514 * ->[ and ->{ return TRUE
3515 * { and [ outside a pattern are always subscripts, so return TRUE
3516 * if we're outside a pattern and it's not { or [, then return FALSE
3517 * if we're in a pattern and the first char is a {
3518 * {4,5} (any digits around the comma) returns FALSE
3519 * if we're in a pattern and the first char is a [
3520 * [] returns FALSE
3521 * [SOMETHING] has a funky algorithm to decide whether it's a
3522 * character class or not. It has to deal with things like
3523 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3524 * anything else returns TRUE
3525 */
3526
9cbb5ea2
GS
3527/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3528
76e3520e 3529STATIC int
cea2e8a9 3530S_intuit_more(pTHX_ register char *s)
79072805 3531{
97aff369 3532 dVAR;
7918f24d
NC
3533
3534 PERL_ARGS_ASSERT_INTUIT_MORE;
3535
3280af22 3536 if (PL_lex_brackets)
79072805
LW
3537 return TRUE;
3538 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3539 return TRUE;
3540 if (*s != '{' && *s != '[')
3541 return FALSE;
3280af22 3542 if (!PL_lex_inpat)
79072805
LW
3543 return TRUE;
3544
3545 /* In a pattern, so maybe we have {n,m}. */
3546 if (*s == '{') {
b3155d95 3547 if (regcurly(s)) {
79072805 3548 return FALSE;
b3155d95 3549 }
79072805 3550 return TRUE;
79072805
LW
3551 }
3552
3553 /* On the other hand, maybe we have a character class */
3554
3555 s++;
3556 if (*s == ']' || *s == '^')
3557 return FALSE;
3558 else {
ffb4593c 3559 /* this is terrifying, and it works */
79072805
LW
3560 int weight = 2; /* let's weigh the evidence */
3561 char seen[256];
f27ffc4a 3562 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3563 const char * const send = strchr(s,']');
3280af22 3564 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3565
3566 if (!send) /* has to be an expression */
3567 return TRUE;
3568
3569 Zero(seen,256,char);
3570 if (*s == '$')
3571 weight -= 3;
3572 else if (isDIGIT(*s)) {
3573 if (s[1] != ']') {
3574 if (isDIGIT(s[1]) && s[2] == ']')
3575 weight -= 10;
3576 }
3577 else
3578 weight -= 100;
3579 }
3580 for (; s < send; s++) {
3581 last_un_char = un_char;
3582 un_char = (unsigned char)*s;
3583 switch (*s) {
3584 case '@':
3585 case '&':
3586 case '$':
3587 weight -= seen[un_char] * 10;
7e2040f0 3588 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3589 int len;
8903cb82 3590 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3591 len = (int)strlen(tmpbuf);
3592 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3593 weight -= 100;
3594 else
3595 weight -= 10;
3596 }
3597 else if (*s == '$' && s[1] &&
93a17b20
LW
3598 strchr("[#!%*<>()-=",s[1])) {
3599 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3600 weight -= 10;
3601 else
3602 weight -= 1;
3603 }
3604 break;
3605 case '\\':
3606 un_char = 254;
3607 if (s[1]) {
93a17b20 3608 if (strchr("wds]",s[1]))
79072805 3609 weight += 100;
10edeb5d 3610 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3611 weight += 1;
93a17b20 3612 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3613 weight += 40;
3614 else if (isDIGIT(s[1])) {
3615 weight += 40;
3616 while (s[1] && isDIGIT(s[1]))
3617 s++;
3618 }
3619 }
3620 else
3621 weight += 100;
3622 break;
3623 case '-':
3624 if (s[1] == '\\')
3625 weight += 50;
93a17b20 3626 if (strchr("aA01! ",last_un_char))
79072805 3627 weight += 30;
93a17b20 3628 if (strchr("zZ79~",s[1]))
79072805 3629 weight += 30;
f27ffc4a
GS
3630 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3631 weight -= 5; /* cope with negative subscript */
79072805
LW
3632 break;
3633 default:
3792a11b
NC
3634 if (!isALNUM(last_un_char)
3635 && !(last_un_char == '$' || last_un_char == '@'
3636 || last_un_char == '&')
3637 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3638 char *d = tmpbuf;
3639 while (isALPHA(*s))
3640 *d++ = *s++;
3641 *d = '\0';
5458a98a 3642 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3643 weight -= 150;
3644 }
3645 if (un_char == last_un_char + 1)
3646 weight += 5;
3647 weight -= seen[un_char];
3648 break;
3649 }
3650 seen[un_char]++;
3651 }
3652 if (weight >= 0) /* probably a character class */
3653 return FALSE;
3654 }
3655
3656 return TRUE;
3657}
ffed7fef 3658
ffb4593c
NT
3659/*
3660 * S_intuit_method
3661 *
3662 * Does all the checking to disambiguate
3663 * foo bar
3664 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3665 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3666 *
3667 * First argument is the stuff after the first token, e.g. "bar".
3668 *
3669 * Not a method if bar is a filehandle.
3670 * Not a method if foo is a subroutine prototyped to take a filehandle.
3671 * Not a method if it's really "Foo $bar"
3672 * Method if it's "foo $bar"
3673 * Not a method if it's really "print foo $bar"
3674 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3675 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3676 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3677 * =>
3678 */
3679
76e3520e 3680STATIC int
62d55b22 3681S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3682{
97aff369 3683 dVAR;
a0d0e21e 3684 char *s = start + (*start == '$');
3280af22 3685 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3686 STRLEN len;
3687 GV* indirgv;
5db06880
NC
3688#ifdef PERL_MAD
3689 int soff;
3690#endif
a0d0e21e 3691
7918f24d
NC
3692 PERL_ARGS_ASSERT_INTUIT_METHOD;
3693
a0d0e21e 3694 if (gv) {
62d55b22 3695 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3696 return 0;
62d55b22
NC
3697 if (cv) {
3698 if (SvPOK(cv)) {
3699 const char *proto = SvPVX_const(cv);
3700 if (proto) {
3701 if (*proto == ';')
3702 proto++;
3703 if (*proto == '*')
3704 return 0;
3705 }
b6c543e3
IZ
3706 }
3707 } else
c35e046a 3708 gv = NULL;
a0d0e21e 3709 }
8903cb82 3710 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3711 /* start is the beginning of the possible filehandle/object,
3712 * and s is the end of it
3713 * tmpbuf is a copy of it
3714 */
3715
a0d0e21e 3716 if (*start == '$') {
3ef1310e
RGS
3717 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3718 isUPPER(*PL_tokenbuf))
a0d0e21e 3719 return 0;
5db06880
NC
3720#ifdef PERL_MAD
3721 len = start - SvPVX(PL_linestr);
3722#endif
29595ff2 3723 s = PEEKSPACE(s);
f0092767 3724#ifdef PERL_MAD
5db06880
NC
3725 start = SvPVX(PL_linestr) + len;
3726#endif
3280af22
NIS
3727 PL_bufptr = start;
3728 PL_expect = XREF;
a0d0e21e
LW
3729 return *s == '(' ? FUNCMETH : METHOD;
3730 }
5458a98a 3731 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3732 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3733 len -= 2;
3734 tmpbuf[len] = '\0';
5db06880
NC
3735#ifdef PERL_MAD
3736 soff = s - SvPVX(PL_linestr);
3737#endif
c3e0f903
GS
3738 goto bare_package;
3739 }
90e5519e 3740 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3741 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3742 return 0;
3743 /* filehandle or package name makes it a method */
da51bb9b 3744 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3745#ifdef PERL_MAD
3746 soff = s - SvPVX(PL_linestr);
3747#endif
29595ff2 3748 s = PEEKSPACE(s);
3280af22 3749 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
486ec47a 3750 return 0; /* no assumptions -- "=>" quotes bareword */
c3e0f903 3751 bare_package:
cd81e915 3752 start_force(PL_curforce);
9ded7720 3753 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3754 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3755 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
3756 if (PL_madskills)
3757 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 3758 PL_expect = XTERM;
a0d0e21e 3759 force_next(WORD);
3280af22 3760 PL_bufptr = s;
5db06880
NC
3761#ifdef PERL_MAD
3762 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3763#endif
a0d0e21e
LW
3764 return *s == '(' ? FUNCMETH : METHOD;
3765 }
3766 }
3767 return 0;
3768}
3769
16d20bd9 3770/* Encoded script support. filter_add() effectively inserts a
4e553d73 3771 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3772 * Note that the filter function only applies to the current source file