This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
In S_pending_ident(), only call gv_fetchpvn_flags() if the warning is enabled.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
378cc40b 24#include "EXTERN.h"
864dbfa3 25#define PERL_IN_TOKE_C
378cc40b 26#include "perl.h"
378cc40b 27
eb0d8d16
NC
28#define new_constant(a,b,c,d,e,f,g) \
29 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
30
6154021b 31#define pl_yylval (PL_parser->yylval)
d3b6f988 32
acdf0a21
DM
33/* YYINITDEPTH -- initial size of the parser's stacks. */
34#define YYINITDEPTH 200
35
199e78b7
DM
36/* XXX temporary backwards compatibility */
37#define PL_lex_brackets (PL_parser->lex_brackets)
38#define PL_lex_brackstack (PL_parser->lex_brackstack)
39#define PL_lex_casemods (PL_parser->lex_casemods)
40#define PL_lex_casestack (PL_parser->lex_casestack)
41#define PL_lex_defer (PL_parser->lex_defer)
42#define PL_lex_dojoin (PL_parser->lex_dojoin)
43#define PL_lex_expect (PL_parser->lex_expect)
44#define PL_lex_formbrack (PL_parser->lex_formbrack)
45#define PL_lex_inpat (PL_parser->lex_inpat)
46#define PL_lex_inwhat (PL_parser->lex_inwhat)
47#define PL_lex_op (PL_parser->lex_op)
48#define PL_lex_repl (PL_parser->lex_repl)
49#define PL_lex_starts (PL_parser->lex_starts)
50#define PL_lex_stuff (PL_parser->lex_stuff)
51#define PL_multi_start (PL_parser->multi_start)
52#define PL_multi_open (PL_parser->multi_open)
53#define PL_multi_close (PL_parser->multi_close)
54#define PL_pending_ident (PL_parser->pending_ident)
55#define PL_preambled (PL_parser->preambled)
56#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 57#define PL_linestr (PL_parser->linestr)
c2598295
DM
58#define PL_expect (PL_parser->expect)
59#define PL_copline (PL_parser->copline)
f06b5848
DM
60#define PL_bufptr (PL_parser->bufptr)
61#define PL_oldbufptr (PL_parser->oldbufptr)
62#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
63#define PL_linestart (PL_parser->linestart)
64#define PL_bufend (PL_parser->bufend)
65#define PL_last_uni (PL_parser->last_uni)
66#define PL_last_lop (PL_parser->last_lop)
67#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 68#define PL_lex_state (PL_parser->lex_state)
2f9285f8 69#define PL_rsfp (PL_parser->rsfp)
5486870f 70#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
71#define PL_in_my (PL_parser->in_my)
72#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 73#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 74#define PL_multi_end (PL_parser->multi_end)
13765c85 75#define PL_error_count (PL_parser->error_count)
199e78b7
DM
76
77#ifdef PERL_MAD
78# define PL_endwhite (PL_parser->endwhite)
79# define PL_faketokens (PL_parser->faketokens)
80# define PL_lasttoke (PL_parser->lasttoke)
81# define PL_nextwhite (PL_parser->nextwhite)
82# define PL_realtokenstart (PL_parser->realtokenstart)
83# define PL_skipwhite (PL_parser->skipwhite)
84# define PL_thisclose (PL_parser->thisclose)
85# define PL_thismad (PL_parser->thismad)
86# define PL_thisopen (PL_parser->thisopen)
87# define PL_thisstuff (PL_parser->thisstuff)
88# define PL_thistoken (PL_parser->thistoken)
89# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
90# define PL_thiswhite (PL_parser->thiswhite)
91# define PL_nexttoke (PL_parser->nexttoke)
92# define PL_curforce (PL_parser->curforce)
93#else
94# define PL_nexttoke (PL_parser->nexttoke)
95# define PL_nexttype (PL_parser->nexttype)
96# define PL_nextval (PL_parser->nextval)
199e78b7
DM
97#endif
98
16173588
NC
99/* This can't be done with embed.fnc, because struct yy_parser contains a
100 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
101static int
102S_pending_ident(pTHX);
199e78b7 103
0bd48802 104static const char ident_too_long[] = "Identifier too long";
8903cb82 105
29595ff2 106#ifdef PERL_MAD
29595ff2 107# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 108# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 109#else
5db06880 110# define CURMAD(slot,sv)
9ded7720 111# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
112#endif
113
9059aa12
LW
114#define XFAKEBRACK 128
115#define XENUMMASK 127
116
39e02b42
JH
117#ifdef USE_UTF8_SCRIPTS
118# define UTF (!IN_BYTES)
2b9d42f0 119#else
746b446a 120# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 121#endif
a0ed51b3 122
b1fc3636
CJ
123/* The maximum number of characters preceding the unrecognized one to display */
124#define UNRECOGNIZED_PRECEDE_COUNT 10
125
61f0cdd9 126/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
127 * 1999-02-27 mjd-perl-patch@plover.com */
128#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
129
bf4acbe4 130#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 131
ffb4593c
NT
132/* LEX_* are values for PL_lex_state, the state of the lexer.
133 * They are arranged oddly so that the guard on the switch statement
79072805
LW
134 * can get by with a single comparison (if the compiler is smart enough).
135 */
136
fb73857a
PP
137/* #define LEX_NOTPARSING 11 is done in perl.h. */
138
b6007c36
DM
139#define LEX_NORMAL 10 /* normal code (ie not within "...") */
140#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
141#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
142#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
143#define LEX_INTERPSTART 6 /* expecting the start of a $var */
144
145 /* at end of code, eg "$x" followed by: */
146#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
147#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
148
149#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
150 string or after \E, $foo, etc */
151#define LEX_INTERPCONST 2 /* NOT USED */
152#define LEX_FORMLINE 1 /* expecting a format line */
153#define LEX_KNOWNEXT 0 /* next token known; just return it */
154
79072805 155
bbf60fe6 156#ifdef DEBUGGING
27da23d5 157static const char* const lex_state_names[] = {
bbf60fe6
DM
158 "KNOWNEXT",
159 "FORMLINE",
160 "INTERPCONST",
161 "INTERPCONCAT",
162 "INTERPENDMAYBE",
163 "INTERPEND",
164 "INTERPSTART",
165 "INTERPPUSH",
166 "INTERPCASEMOD",
167 "INTERPNORMAL",
168 "NORMAL"
169};
170#endif
171
79072805
LW
172#ifdef ff_next
173#undef ff_next
d48672a2
LW
174#endif
175
79072805 176#include "keywords.h"
fe14fcc3 177
ffb4593c
NT
178/* CLINE is a macro that ensures PL_copline has a sane value */
179
ae986130
LW
180#ifdef CLINE
181#undef CLINE
182#endif
57843af0 183#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 184
5db06880 185#ifdef PERL_MAD
29595ff2
NC
186# define SKIPSPACE0(s) skipspace0(s)
187# define SKIPSPACE1(s) skipspace1(s)
188# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
189# define PEEKSPACE(s) skipspace2(s,0)
190#else
191# define SKIPSPACE0(s) skipspace(s)
192# define SKIPSPACE1(s) skipspace(s)
193# define SKIPSPACE2(s,tsv) skipspace(s)
194# define PEEKSPACE(s) skipspace(s)
195#endif
196
ffb4593c
NT
197/*
198 * Convenience functions to return different tokens and prime the
9cbb5ea2 199 * lexer for the next token. They all take an argument.
ffb4593c
NT
200 *
201 * TOKEN : generic token (used for '(', DOLSHARP, etc)
202 * OPERATOR : generic operator
203 * AOPERATOR : assignment operator
204 * PREBLOCK : beginning the block after an if, while, foreach, ...
205 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
206 * PREREF : *EXPR where EXPR is not a simple identifier
207 * TERM : expression term
208 * LOOPX : loop exiting command (goto, last, dump, etc)
209 * FTST : file test operator
210 * FUN0 : zero-argument function
2d2e263d 211 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
212 * BOop : bitwise or or xor
213 * BAop : bitwise and
214 * SHop : shift operator
215 * PWop : power operator
9cbb5ea2 216 * PMop : pattern-matching operator
ffb4593c
NT
217 * Aop : addition-level operator
218 * Mop : multiplication-level operator
219 * Eop : equality-testing operator
e5edeb50 220 * Rop : relational operator <= != gt
ffb4593c
NT
221 *
222 * Also see LOP and lop() below.
223 */
224
998054bd 225#ifdef DEBUGGING /* Serve -DT. */
704d4215 226# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 227#else
bbf60fe6 228# define REPORT(retval) (retval)
998054bd
SC
229#endif
230
bbf60fe6
DM
231#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
232#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
233#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
234#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
235#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
236#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
237#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
238#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
239#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
240#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
241#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
242#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
243#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
244#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
245#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
246#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
247#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
248#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
249#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
250#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 251
a687059c
LW
252/* This bit of chicanery makes a unary function followed by
253 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
254 * The UNIDOR macro is for unary functions that can be followed by the //
255 * operator (such as C<shift // 0>).
a687059c 256 */
376fcdbf 257#define UNI2(f,x) { \
6154021b 258 pl_yylval.ival = f; \
376fcdbf
AL
259 PL_expect = x; \
260 PL_bufptr = s; \
261 PL_last_uni = PL_oldbufptr; \
262 PL_last_lop_op = f; \
263 if (*s == '(') \
264 return REPORT( (int)FUNC1 ); \
29595ff2 265 s = PEEKSPACE(s); \
376fcdbf
AL
266 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
267 }
6f33ba73
RGS
268#define UNI(f) UNI2(f,XTERM)
269#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 270
376fcdbf 271#define UNIBRACK(f) { \
6154021b 272 pl_yylval.ival = f; \
376fcdbf
AL
273 PL_bufptr = s; \
274 PL_last_uni = PL_oldbufptr; \
275 if (*s == '(') \
276 return REPORT( (int)FUNC1 ); \
29595ff2 277 s = PEEKSPACE(s); \
376fcdbf
AL
278 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
279 }
79072805 280
9f68db38 281/* grandfather return to old style */
6154021b 282#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 283
8fa7f367
JH
284#ifdef DEBUGGING
285
6154021b 286/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
287enum token_type {
288 TOKENTYPE_NONE,
289 TOKENTYPE_IVAL,
6154021b 290 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
291 TOKENTYPE_PVAL,
292 TOKENTYPE_OPVAL,
293 TOKENTYPE_GVVAL
294};
295
6d4a66ac
NC
296static struct debug_tokens {
297 const int token;
298 enum token_type type;
299 const char *name;
300} const debug_tokens[] =
9041c2e3 301{
bbf60fe6
DM
302 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
303 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
304 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
305 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
306 { ARROW, TOKENTYPE_NONE, "ARROW" },
307 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
308 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
309 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
310 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
311 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 312 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
313 { DO, TOKENTYPE_NONE, "DO" },
314 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
315 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
316 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
317 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
318 { ELSE, TOKENTYPE_NONE, "ELSE" },
319 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
320 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
321 { FOR, TOKENTYPE_IVAL, "FOR" },
322 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
323 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
324 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
325 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
326 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
327 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 328 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
329 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
330 { IF, TOKENTYPE_IVAL, "IF" },
331 { LABEL, TOKENTYPE_PVAL, "LABEL" },
332 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
333 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
334 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
335 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
336 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
337 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
338 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
339 { MY, TOKENTYPE_IVAL, "MY" },
340 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
341 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
342 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
343 { OROP, TOKENTYPE_IVAL, "OROP" },
344 { OROR, TOKENTYPE_NONE, "OROR" },
345 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
346 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
347 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
348 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
349 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
350 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
351 { PREINC, TOKENTYPE_NONE, "PREINC" },
352 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
353 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
354 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
355 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
356 { SUB, TOKENTYPE_NONE, "SUB" },
357 { THING, TOKENTYPE_OPVAL, "THING" },
358 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
359 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
360 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
361 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
362 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
363 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 364 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
365 { WHILE, TOKENTYPE_IVAL, "WHILE" },
366 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 367 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 368 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
369};
370
6154021b 371/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 372
bbf60fe6 373STATIC int
704d4215 374S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 375{
97aff369 376 dVAR;
7918f24d
NC
377
378 PERL_ARGS_ASSERT_TOKEREPORT;
379
bbf60fe6 380 if (DEBUG_T_TEST) {
bd61b366 381 const char *name = NULL;
bbf60fe6 382 enum token_type type = TOKENTYPE_NONE;
f54cb97a 383 const struct debug_tokens *p;
396482e1 384 SV* const report = newSVpvs("<== ");
bbf60fe6 385
f54cb97a 386 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
387 if (p->token == (int)rv) {
388 name = p->name;
389 type = p->type;
390 break;
391 }
392 }
393 if (name)
54667de8 394 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
395 else if ((char)rv > ' ' && (char)rv < '~')
396 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
397 else if (!rv)
396482e1 398 sv_catpvs(report, "EOF");
bbf60fe6
DM
399 else
400 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
401 switch (type) {
402 case TOKENTYPE_NONE:
403 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
404 break;
405 case TOKENTYPE_IVAL:
704d4215 406 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
407 break;
408 case TOKENTYPE_OPNUM:
409 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 410 PL_op_name[lvalp->ival]);
bbf60fe6
DM
411 break;
412 case TOKENTYPE_PVAL:
704d4215 413 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
414 break;
415 case TOKENTYPE_OPVAL:
704d4215 416 if (lvalp->opval) {
401441c0 417 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
418 PL_op_name[lvalp->opval->op_type]);
419 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 420 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 421 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
422 }
423
424 }
401441c0 425 else
396482e1 426 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
427 break;
428 }
b6007c36 429 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
430 };
431 return (int)rv;
998054bd
SC
432}
433
b6007c36
DM
434
435/* print the buffer with suitable escapes */
436
437STATIC void
15f169a1 438S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 439{
396482e1 440 SV* const tmp = newSVpvs("");
7918f24d
NC
441
442 PERL_ARGS_ASSERT_PRINTBUF;
443
b6007c36
DM
444 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
445 SvREFCNT_dec(tmp);
446}
447
8fa7f367
JH
448#endif
449
8290c323
NC
450static int
451S_deprecate_commaless_var_list(pTHX) {
452 PL_expect = XTERM;
453 deprecate("comma-less variable list");
454 return REPORT(','); /* grandfather non-comma-format format */
455}
456
ffb4593c
NT
457/*
458 * S_ao
459 *
c963b151
BD
460 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
461 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
462 */
463
76e3520e 464STATIC int
cea2e8a9 465S_ao(pTHX_ int toketype)
a0d0e21e 466{
97aff369 467 dVAR;
3280af22
NIS
468 if (*PL_bufptr == '=') {
469 PL_bufptr++;
a0d0e21e 470 if (toketype == ANDAND)
6154021b 471 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 472 else if (toketype == OROR)
6154021b 473 pl_yylval.ival = OP_ORASSIGN;
c963b151 474 else if (toketype == DORDOR)
6154021b 475 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
476 toketype = ASSIGNOP;
477 }
478 return toketype;
479}
480
ffb4593c
NT
481/*
482 * S_no_op
483 * When Perl expects an operator and finds something else, no_op
484 * prints the warning. It always prints "<something> found where
485 * operator expected. It prints "Missing semicolon on previous line?"
486 * if the surprise occurs at the start of the line. "do you need to
487 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
488 * where the compiler doesn't know if foo is a method call or a function.
489 * It prints "Missing operator before end of line" if there's nothing
490 * after the missing operator, or "... before <...>" if there is something
491 * after the missing operator.
492 */
493
76e3520e 494STATIC void
15f169a1 495S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 496{
97aff369 497 dVAR;
9d4ba2ae
AL
498 char * const oldbp = PL_bufptr;
499 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 500
7918f24d
NC
501 PERL_ARGS_ASSERT_NO_OP;
502
1189a94a
GS
503 if (!s)
504 s = oldbp;
07c798fb 505 else
1189a94a 506 PL_bufptr = s;
cea2e8a9 507 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
508 if (ckWARN_d(WARN_SYNTAX)) {
509 if (is_first)
510 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
511 "\t(Missing semicolon on previous line?)\n");
512 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 513 const char *t;
c35e046a
AL
514 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
515 NOOP;
56da5a46
RGS
516 if (t < PL_bufptr && isSPACE(*t))
517 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
518 "\t(Do you need to predeclare %.*s?)\n",
551405c4 519 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
520 }
521 else {
522 assert(s >= oldbp);
523 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 524 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 525 }
07c798fb 526 }
3280af22 527 PL_bufptr = oldbp;
8990e307
LW
528}
529
ffb4593c
NT
530/*
531 * S_missingterm
532 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 533 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
534 * If we're in a delimited string and the delimiter is a control
535 * character, it's reformatted into a two-char sequence like ^C.
536 * This is fatal.
537 */
538
76e3520e 539STATIC void
cea2e8a9 540S_missingterm(pTHX_ char *s)
8990e307 541{
97aff369 542 dVAR;
8990e307
LW
543 char tmpbuf[3];
544 char q;
545 if (s) {
9d4ba2ae 546 char * const nl = strrchr(s,'\n');
d2719217 547 if (nl)
8990e307
LW
548 *nl = '\0';
549 }
463559e7 550 else if (isCNTRL(PL_multi_close)) {
8990e307 551 *tmpbuf = '^';
585ec06d 552 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
553 tmpbuf[2] = '\0';
554 s = tmpbuf;
555 }
556 else {
eb160463 557 *tmpbuf = (char)PL_multi_close;
8990e307
LW
558 tmpbuf[1] = '\0';
559 s = tmpbuf;
560 }
561 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 562 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 563}
79072805 564
ef89dcc3 565#define FEATURE_IS_ENABLED(name) \
0d863452 566 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 567 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b
NC
568/* The longest string we pass in. */
569#define MAX_FEATURE_LEN (sizeof("switch")-1)
570
0d863452
RH
571/*
572 * S_feature_is_enabled
573 * Check whether the named feature is enabled.
574 */
575STATIC bool
15f169a1 576S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 577{
97aff369 578 dVAR;
0d863452 579 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 580 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
581
582 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
583
4a731d7b
NC
584 assert(namelen <= MAX_FEATURE_LEN);
585 memcpy(&he_name[8], name, namelen);
d4c19fe8 586
7b9ef140 587 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
588}
589
ffb4593c 590/*
9cbb5ea2
GS
591 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
592 * utf16-to-utf8-reversed.
ffb4593c
NT
593 */
594
c39cd008
GS
595#ifdef PERL_CR_FILTER
596static void
597strip_return(SV *sv)
598{
95a20fc0 599 register const char *s = SvPVX_const(sv);
9d4ba2ae 600 register const char * const e = s + SvCUR(sv);
7918f24d
NC
601
602 PERL_ARGS_ASSERT_STRIP_RETURN;
603
c39cd008
GS
604 /* outer loop optimized to do nothing if there are no CR-LFs */
605 while (s < e) {
606 if (*s++ == '\r' && *s == '\n') {
607 /* hit a CR-LF, need to copy the rest */
608 register char *d = s - 1;
609 *d++ = *s++;
610 while (s < e) {
611 if (*s == '\r' && s[1] == '\n')
612 s++;
613 *d++ = *s++;
614 }
615 SvCUR(sv) -= s - d;
616 return;
617 }
618 }
619}
a868473f 620
76e3520e 621STATIC I32
c39cd008 622S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 623{
f54cb97a 624 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
625 if (count > 0 && !maxlen)
626 strip_return(sv);
627 return count;
a868473f
NIS
628}
629#endif
630
199e78b7
DM
631
632
ffb4593c
NT
633/*
634 * Perl_lex_start
5486870f 635 *
e3abe207 636 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
637 *
638 * rsfp is the opened file handle to read from (if any),
639 *
640 * line holds any initial content already read from the file (or in
641 * the case of no file, such as an eval, the whole contents);
642 *
643 * new_filter indicates that this is a new file and it shouldn't inherit
644 * the filters from the current parser (ie require).
ffb4593c
NT
645 */
646
a0d0e21e 647void
5486870f 648Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 649{
97aff369 650 dVAR;
6ef55633 651 const char *s = NULL;
8990e307 652 STRLEN len;
5486870f 653 yy_parser *parser, *oparser;
acdf0a21
DM
654
655 /* create and initialise a parser */
656
199e78b7 657 Newxz(parser, 1, yy_parser);
5486870f 658 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
659 PL_parser = parser;
660
661 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
662 parser->ps = parser->stack;
663 parser->stack_size = YYINITDEPTH;
664
665 parser->stack->state = 0;
666 parser->yyerrstatus = 0;
667 parser->yychar = YYEMPTY; /* Cause a token to be read. */
668
e3abe207
DM
669 /* on scope exit, free this parser and restore any outer one */
670 SAVEPARSER(parser);
7c4baf47 671 parser->saved_curcop = PL_curcop;
e3abe207 672
acdf0a21 673 /* initialise lexer state */
8990e307 674
fb205e7a
DM
675#ifdef PERL_MAD
676 parser->curforce = -1;
677#else
678 parser->nexttoke = 0;
679#endif
ca4cfd28 680 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 681 parser->copline = NOLINE;
5afb0a62 682 parser->lex_state = LEX_NORMAL;
c2598295 683 parser->expect = XSTATE;
2f9285f8 684 parser->rsfp = rsfp;
56b27c9a 685 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
502c6561 686 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 687
199e78b7
DM
688 Newx(parser->lex_brackstack, 120, char);
689 Newx(parser->lex_casestack, 12, char);
690 *parser->lex_casestack = '\0';
02b34bbe 691
10efb74f
NC
692 if (line) {
693 s = SvPV_const(line, len);
694 } else {
695 len = 0;
696 }
bdc0bf6f 697
10efb74f 698 if (!len) {
bdc0bf6f 699 parser->linestr = newSVpvs("\n;");
10efb74f 700 } else if (SvREADONLY(line) || s[len-1] != ';') {
bdc0bf6f 701 parser->linestr = newSVsv(line);
10efb74f 702 if (s[len-1] != ';')
bdc0bf6f 703 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
704 } else {
705 SvTEMP_off(line);
706 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 707 parser->linestr = line;
8990e307 708 }
f06b5848
DM
709 parser->oldoldbufptr =
710 parser->oldbufptr =
711 parser->bufptr =
712 parser->linestart = SvPVX(parser->linestr);
713 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
714 parser->last_lop = parser->last_uni = NULL;
79072805 715}
a687059c 716
e3abe207
DM
717
718/* delete a parser object */
719
720void
721Perl_parser_free(pTHX_ const yy_parser *parser)
722{
7918f24d
NC
723 PERL_ARGS_ASSERT_PARSER_FREE;
724
7c4baf47 725 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
726 SvREFCNT_dec(parser->linestr);
727
2f9285f8
DM
728 if (parser->rsfp == PerlIO_stdin())
729 PerlIO_clearerr(parser->rsfp);
799361c3
SH
730 else if (parser->rsfp && (!parser->old_parser ||
731 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 732 PerlIO_close(parser->rsfp);
5486870f 733 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 734
e3abe207
DM
735 Safefree(parser->stack);
736 Safefree(parser->lex_brackstack);
737 Safefree(parser->lex_casestack);
738 PL_parser = parser->old_parser;
739 Safefree(parser);
740}
741
742
ffb4593c
NT
743/*
744 * Perl_lex_end
9cbb5ea2
GS
745 * Finalizer for lexing operations. Must be called when the parser is
746 * done with the lexer.
ffb4593c
NT
747 */
748
463ee0b2 749void
864dbfa3 750Perl_lex_end(pTHX)
463ee0b2 751{
97aff369 752 dVAR;
3280af22 753 PL_doextract = FALSE;
463ee0b2
LW
754}
755
ffb4593c
NT
756/*
757 * S_incline
758 * This subroutine has nothing to do with tilting, whether at windmills
759 * or pinball tables. Its name is short for "increment line". It
57843af0 760 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 761 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
762 * # line 500 "foo.pm"
763 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
764 */
765
76e3520e 766STATIC void
d9095cec 767S_incline(pTHX_ const char *s)
463ee0b2 768{
97aff369 769 dVAR;
d9095cec
NC
770 const char *t;
771 const char *n;
772 const char *e;
463ee0b2 773
7918f24d
NC
774 PERL_ARGS_ASSERT_INCLINE;
775
57843af0 776 CopLINE_inc(PL_curcop);
463ee0b2
LW
777 if (*s++ != '#')
778 return;
d4c19fe8
AL
779 while (SPACE_OR_TAB(*s))
780 s++;
73659bf1
GS
781 if (strnEQ(s, "line", 4))
782 s += 4;
783 else
784 return;
084592ab 785 if (SPACE_OR_TAB(*s))
73659bf1 786 s++;
4e553d73 787 else
73659bf1 788 return;
d4c19fe8
AL
789 while (SPACE_OR_TAB(*s))
790 s++;
463ee0b2
LW
791 if (!isDIGIT(*s))
792 return;
d4c19fe8 793
463ee0b2
LW
794 n = s;
795 while (isDIGIT(*s))
796 s++;
07714eb4 797 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 798 return;
bf4acbe4 799 while (SPACE_OR_TAB(*s))
463ee0b2 800 s++;
73659bf1 801 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 802 s++;
73659bf1
GS
803 e = t + 1;
804 }
463ee0b2 805 else {
c35e046a
AL
806 t = s;
807 while (!isSPACE(*t))
808 t++;
73659bf1 809 e = t;
463ee0b2 810 }
bf4acbe4 811 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
812 e++;
813 if (*e != '\n' && *e != '\0')
814 return; /* false alarm */
815
f4dd75d9 816 if (t - s > 0) {
d9095cec 817 const STRLEN len = t - s;
8a5ee598 818#ifndef USE_ITHREADS
19bad673
NC
819 SV *const temp_sv = CopFILESV(PL_curcop);
820 const char *cf;
821 STRLEN tmplen;
822
823 if (temp_sv) {
824 cf = SvPVX(temp_sv);
825 tmplen = SvCUR(temp_sv);
826 } else {
827 cf = NULL;
828 tmplen = 0;
829 }
830
42d9b98d 831 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
832 /* must copy *{"::_<(eval N)[oldfilename:L]"}
833 * to *{"::_<newfilename"} */
44867030
NC
834 /* However, the long form of evals is only turned on by the
835 debugger - usually they're "(eval %lu)" */
836 char smallbuf[128];
837 char *tmpbuf;
838 GV **gvp;
d9095cec 839 STRLEN tmplen2 = len;
798b63bc 840 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
841 tmpbuf = smallbuf;
842 else
2ae0db35 843 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
844 tmpbuf[0] = '_';
845 tmpbuf[1] = '<';
2ae0db35 846 memcpy(tmpbuf + 2, cf, tmplen);
44867030 847 tmplen += 2;
8a5ee598
RGS
848 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
849 if (gvp) {
44867030
NC
850 char *tmpbuf2;
851 GV *gv2;
852
853 if (tmplen2 + 2 <= sizeof smallbuf)
854 tmpbuf2 = smallbuf;
855 else
856 Newx(tmpbuf2, tmplen2 + 2, char);
857
858 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
859 /* Either they malloc'd it, or we malloc'd it,
860 so no prefix is present in ours. */
861 tmpbuf2[0] = '_';
862 tmpbuf2[1] = '<';
863 }
864
865 memcpy(tmpbuf2 + 2, s, tmplen2);
866 tmplen2 += 2;
867
8a5ee598 868 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 869 if (!isGV(gv2)) {
8a5ee598 870 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
871 /* adjust ${"::_<newfilename"} to store the new file name */
872 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
873 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
874 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 875 }
44867030
NC
876
877 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 878 }
e66cf94c 879 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 880 }
8a5ee598 881#endif
05ec9bb3 882 CopFILE_free(PL_curcop);
d9095cec 883 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 884 }
57843af0 885 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
886}
887
29595ff2 888#ifdef PERL_MAD
cd81e915 889/* skip space before PL_thistoken */
29595ff2
NC
890
891STATIC char *
892S_skipspace0(pTHX_ register char *s)
893{
7918f24d
NC
894 PERL_ARGS_ASSERT_SKIPSPACE0;
895
29595ff2
NC
896 s = skipspace(s);
897 if (!PL_madskills)
898 return s;
cd81e915
NC
899 if (PL_skipwhite) {
900 if (!PL_thiswhite)
6b29d1f5 901 PL_thiswhite = newSVpvs("");
cd81e915
NC
902 sv_catsv(PL_thiswhite, PL_skipwhite);
903 sv_free(PL_skipwhite);
904 PL_skipwhite = 0;
905 }
906 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
907 return s;
908}
909
cd81e915 910/* skip space after PL_thistoken */
29595ff2
NC
911
912STATIC char *
913S_skipspace1(pTHX_ register char *s)
914{
d4c19fe8 915 const char *start = s;
29595ff2
NC
916 I32 startoff = start - SvPVX(PL_linestr);
917
7918f24d
NC
918 PERL_ARGS_ASSERT_SKIPSPACE1;
919
29595ff2
NC
920 s = skipspace(s);
921 if (!PL_madskills)
922 return s;
923 start = SvPVX(PL_linestr) + startoff;
cd81e915 924 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 925 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
926 PL_thistoken = newSVpvn(tstart, start - tstart);
927 }
928 PL_realtokenstart = -1;
929 if (PL_skipwhite) {
930 if (!PL_nextwhite)
6b29d1f5 931 PL_nextwhite = newSVpvs("");
cd81e915
NC
932 sv_catsv(PL_nextwhite, PL_skipwhite);
933 sv_free(PL_skipwhite);
934 PL_skipwhite = 0;
29595ff2
NC
935 }
936 return s;
937}
938
939STATIC char *
940S_skipspace2(pTHX_ register char *s, SV **svp)
941{
c35e046a
AL
942 char *start;
943 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
944 const I32 startoff = s - SvPVX(PL_linestr);
945
7918f24d
NC
946 PERL_ARGS_ASSERT_SKIPSPACE2;
947
29595ff2
NC
948 s = skipspace(s);
949 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
950 if (!PL_madskills || !svp)
951 return s;
952 start = SvPVX(PL_linestr) + startoff;
cd81e915 953 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 954 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
955 PL_thistoken = newSVpvn(tstart, start - tstart);
956 PL_realtokenstart = -1;
29595ff2 957 }
cd81e915 958 if (PL_skipwhite) {
29595ff2 959 if (!*svp)
6b29d1f5 960 *svp = newSVpvs("");
cd81e915
NC
961 sv_setsv(*svp, PL_skipwhite);
962 sv_free(PL_skipwhite);
963 PL_skipwhite = 0;
29595ff2
NC
964 }
965
966 return s;
967}
968#endif
969
80a702cd 970STATIC void
15f169a1 971S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
972{
973 AV *av = CopFILEAVx(PL_curcop);
974 if (av) {
b9f83d2f 975 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
976 if (orig_sv)
977 sv_setsv(sv, orig_sv);
978 else
979 sv_setpvn(sv, buf, len);
80a702cd
RGS
980 (void)SvIOK_on(sv);
981 SvIV_set(sv, 0);
982 av_store(av, (I32)CopLINE(PL_curcop), sv);
983 }
984}
985
ffb4593c
NT
986/*
987 * S_skipspace
988 * Called to gobble the appropriate amount and type of whitespace.
989 * Skips comments as well.
990 */
991
76e3520e 992STATIC char *
cea2e8a9 993S_skipspace(pTHX_ register char *s)
a687059c 994{
97aff369 995 dVAR;
5db06880
NC
996#ifdef PERL_MAD
997 int curoff;
998 int startoff = s - SvPVX(PL_linestr);
999
7918f24d
NC
1000 PERL_ARGS_ASSERT_SKIPSPACE;
1001
cd81e915
NC
1002 if (PL_skipwhite) {
1003 sv_free(PL_skipwhite);
1004 PL_skipwhite = 0;
5db06880
NC
1005 }
1006#endif
7918f24d 1007 PERL_ARGS_ASSERT_SKIPSPACE;
5db06880 1008
3280af22 1009 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1010 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1011 s++;
5db06880
NC
1012#ifdef PERL_MAD
1013 goto done;
1014#else
463ee0b2 1015 return s;
5db06880 1016#endif
463ee0b2
LW
1017 }
1018 for (;;) {
fd049845 1019 STRLEN prevlen;
09bef843 1020 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 1021 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
1022 while (s < PL_bufend && isSPACE(*s)) {
1023 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1024 incline(s);
1025 }
ffb4593c
NT
1026
1027 /* comment */
3280af22
NIS
1028 if (s < PL_bufend && *s == '#') {
1029 while (s < PL_bufend && *s != '\n')
463ee0b2 1030 s++;
60e6418e 1031 if (s < PL_bufend) {
463ee0b2 1032 s++;
60e6418e
GS
1033 if (PL_in_eval && !PL_rsfp) {
1034 incline(s);
1035 continue;
1036 }
1037 }
463ee0b2 1038 }
ffb4593c
NT
1039
1040 /* only continue to recharge the buffer if we're at the end
1041 * of the buffer, we're not reading from a source filter, and
1042 * we're in normal lexing mode
1043 */
09bef843
SB
1044 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1045 PL_lex_state == LEX_FORMLINE)
5db06880
NC
1046#ifdef PERL_MAD
1047 goto done;
1048#else
463ee0b2 1049 return s;
5db06880 1050#endif
ffb4593c
NT
1051
1052 /* try to recharge the buffer */
5db06880
NC
1053#ifdef PERL_MAD
1054 curoff = s - SvPVX(PL_linestr);
1055#endif
1056
5cc814fd
NC
1057 if ((s = filter_gets(PL_linestr, (prevlen = SvCUR(PL_linestr))))
1058 == NULL)
9cbb5ea2 1059 {
5db06880
NC
1060#ifdef PERL_MAD
1061 if (PL_madskills && curoff != startoff) {
cd81e915 1062 if (!PL_skipwhite)
6b29d1f5 1063 PL_skipwhite = newSVpvs("");
cd81e915 1064 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1065 curoff - startoff);
1066 }
1067
1068 /* mustn't throw out old stuff yet if madpropping */
1069 SvCUR(PL_linestr) = curoff;
1070 s = SvPVX(PL_linestr) + curoff;
1071 *s = 0;
1072 if (curoff && s[-1] == '\n')
1073 s[-1] = ' ';
1074#endif
1075
9cbb5ea2 1076 /* end of file. Add on the -p or -n magic */
cd81e915 1077 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 1078 if (PL_minus_p) {
5db06880 1079#ifdef PERL_MAD
6502358f 1080 sv_catpvs(PL_linestr,
5db06880
NC
1081 ";}continue{print or die qq(-p destination: $!\\n);}");
1082#else
6502358f 1083 sv_setpvs(PL_linestr,
01a19ab0 1084 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 1085#endif
3280af22 1086 PL_minus_n = PL_minus_p = 0;
a0d0e21e 1087 }
01a19ab0 1088 else if (PL_minus_n) {
5db06880 1089#ifdef PERL_MAD
76f68e9b 1090 sv_catpvs(PL_linestr, ";}");
5db06880 1091#else
76f68e9b 1092 sv_setpvs(PL_linestr, ";}");
5db06880 1093#endif
01a19ab0
NC
1094 PL_minus_n = 0;
1095 }
a0d0e21e 1096 else
5db06880 1097#ifdef PERL_MAD
76f68e9b 1098 sv_catpvs(PL_linestr,";");
5db06880 1099#else
76f68e9b 1100 sv_setpvs(PL_linestr,";");
5db06880 1101#endif
ffb4593c
NT
1102
1103 /* reset variables for next time we lex */
9cbb5ea2 1104 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
1105 = SvPVX(PL_linestr)
1106#ifdef PERL_MAD
1107 + curoff
1108#endif
1109 ;
3280af22 1110 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1111 PL_last_lop = PL_last_uni = NULL;
ffb4593c 1112
4c84d7f2 1113 /* Close the filehandle. Could be from
ffb4593c
NT
1114 * STDIN, or a regular file. If we were reading code from
1115 * STDIN (because the commandline held no -e or filename)
1116 * then we don't close it, we reset it so the code can
1117 * read from STDIN too.
1118 */
1119
4c84d7f2 1120 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
3280af22 1121 PerlIO_clearerr(PL_rsfp);
8990e307 1122 else
3280af22 1123 (void)PerlIO_close(PL_rsfp);
4608196e 1124 PL_rsfp = NULL;
463ee0b2
LW
1125 return s;
1126 }
ffb4593c
NT
1127
1128 /* not at end of file, so we only read another line */
09bef843
SB
1129 /* make corresponding updates to old pointers, for yyerror() */
1130 oldprevlen = PL_oldbufptr - PL_bufend;
1131 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1132 if (PL_last_uni)
1133 oldunilen = PL_last_uni - PL_bufend;
1134 if (PL_last_lop)
1135 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1136 PL_linestart = PL_bufptr = s + prevlen;
1137 PL_bufend = s + SvCUR(PL_linestr);
1138 s = PL_bufptr;
09bef843
SB
1139 PL_oldbufptr = s + oldprevlen;
1140 PL_oldoldbufptr = s + oldoldprevlen;
1141 if (PL_last_uni)
1142 PL_last_uni = s + oldunilen;
1143 if (PL_last_lop)
1144 PL_last_lop = s + oldloplen;
a0d0e21e 1145 incline(s);
ffb4593c
NT
1146
1147 /* debugger active and we're not compiling the debugger code,
1148 * so store the line into the debugger's array of lines
1149 */
65269a95 1150 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 1151 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
463ee0b2 1152 }
5db06880
NC
1153
1154#ifdef PERL_MAD
1155 done:
1156 if (PL_madskills) {
cd81e915 1157 if (!PL_skipwhite)
6b29d1f5 1158 PL_skipwhite = newSVpvs("");
5db06880
NC
1159 curoff = s - SvPVX(PL_linestr);
1160 if (curoff - startoff)
cd81e915 1161 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1162 curoff - startoff);
1163 }
1164 return s;
1165#endif
a687059c 1166}
378cc40b 1167
ffb4593c
NT
1168/*
1169 * S_check_uni
1170 * Check the unary operators to ensure there's no ambiguity in how they're
1171 * used. An ambiguous piece of code would be:
1172 * rand + 5
1173 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1174 * the +5 is its argument.
1175 */
1176
76e3520e 1177STATIC void
cea2e8a9 1178S_check_uni(pTHX)
ba106d47 1179{
97aff369 1180 dVAR;
d4c19fe8
AL
1181 const char *s;
1182 const char *t;
2f3197b3 1183
3280af22 1184 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1185 return;
3280af22
NIS
1186 while (isSPACE(*PL_last_uni))
1187 PL_last_uni++;
c35e046a
AL
1188 s = PL_last_uni;
1189 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1190 s++;
3280af22 1191 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1192 return;
6136c704 1193
9b387841
NC
1194 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1195 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1196 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1197}
1198
ffb4593c
NT
1199/*
1200 * LOP : macro to build a list operator. Its behaviour has been replaced
1201 * with a subroutine, S_lop() for which LOP is just another name.
1202 */
1203
a0d0e21e
LW
1204#define LOP(f,x) return lop(f,x,s)
1205
ffb4593c
NT
1206/*
1207 * S_lop
1208 * Build a list operator (or something that might be one). The rules:
1209 * - if we have a next token, then it's a list operator [why?]
1210 * - if the next thing is an opening paren, then it's a function
1211 * - else it's a list operator
1212 */
1213
76e3520e 1214STATIC I32
a0be28da 1215S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1216{
97aff369 1217 dVAR;
7918f24d
NC
1218
1219 PERL_ARGS_ASSERT_LOP;
1220
6154021b 1221 pl_yylval.ival = f;
35c8bce7 1222 CLINE;
3280af22
NIS
1223 PL_expect = x;
1224 PL_bufptr = s;
1225 PL_last_lop = PL_oldbufptr;
eb160463 1226 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1227#ifdef PERL_MAD
1228 if (PL_lasttoke)
1229 return REPORT(LSTOP);
1230#else
3280af22 1231 if (PL_nexttoke)
bbf60fe6 1232 return REPORT(LSTOP);
5db06880 1233#endif
79072805 1234 if (*s == '(')
bbf60fe6 1235 return REPORT(FUNC);
29595ff2 1236 s = PEEKSPACE(s);
79072805 1237 if (*s == '(')
bbf60fe6 1238 return REPORT(FUNC);
79072805 1239 else
bbf60fe6 1240 return REPORT(LSTOP);
79072805
LW
1241}
1242
5db06880
NC
1243#ifdef PERL_MAD
1244 /*
1245 * S_start_force
1246 * Sets up for an eventual force_next(). start_force(0) basically does
1247 * an unshift, while start_force(-1) does a push. yylex removes items
1248 * on the "pop" end.
1249 */
1250
1251STATIC void
1252S_start_force(pTHX_ int where)
1253{
1254 int i;
1255
cd81e915 1256 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1257 where = PL_lasttoke;
cd81e915
NC
1258 assert(PL_curforce < 0 || PL_curforce == where);
1259 if (PL_curforce != where) {
5db06880
NC
1260 for (i = PL_lasttoke; i > where; --i) {
1261 PL_nexttoke[i] = PL_nexttoke[i-1];
1262 }
1263 PL_lasttoke++;
1264 }
cd81e915 1265 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1266 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1267 PL_curforce = where;
1268 if (PL_nextwhite) {
5db06880 1269 if (PL_madskills)
6b29d1f5 1270 curmad('^', newSVpvs(""));
cd81e915 1271 CURMAD('_', PL_nextwhite);
5db06880
NC
1272 }
1273}
1274
1275STATIC void
1276S_curmad(pTHX_ char slot, SV *sv)
1277{
1278 MADPROP **where;
1279
1280 if (!sv)
1281 return;
cd81e915
NC
1282 if (PL_curforce < 0)
1283 where = &PL_thismad;
5db06880 1284 else
cd81e915 1285 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1286
cd81e915 1287 if (PL_faketokens)
76f68e9b 1288 sv_setpvs(sv, "");
5db06880
NC
1289 else {
1290 if (!IN_BYTES) {
1291 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1292 SvUTF8_on(sv);
1293 else if (PL_encoding) {
1294 sv_recode_to_utf8(sv, PL_encoding);
1295 }
1296 }
1297 }
1298
1299 /* keep a slot open for the head of the list? */
1300 if (slot != '_' && *where && (*where)->mad_key == '^') {
1301 (*where)->mad_key = slot;
daba3364 1302 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1303 (*where)->mad_val = (void*)sv;
1304 }
1305 else
1306 addmad(newMADsv(slot, sv), where, 0);
1307}
1308#else
b3f24c00
MHM
1309# define start_force(where) NOOP
1310# define curmad(slot, sv) NOOP
5db06880
NC
1311#endif
1312
ffb4593c
NT
1313/*
1314 * S_force_next
9cbb5ea2 1315 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1316 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1317 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1318 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1319 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1320 */
1321
4e553d73 1322STATIC void
cea2e8a9 1323S_force_next(pTHX_ I32 type)
79072805 1324{
97aff369 1325 dVAR;
704d4215
GG
1326#ifdef DEBUGGING
1327 if (DEBUG_T_TEST) {
1328 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1329 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1330 }
1331#endif
5db06880 1332#ifdef PERL_MAD
cd81e915 1333 if (PL_curforce < 0)
5db06880 1334 start_force(PL_lasttoke);
cd81e915 1335 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1336 if (PL_lex_state != LEX_KNOWNEXT)
1337 PL_lex_defer = PL_lex_state;
1338 PL_lex_state = LEX_KNOWNEXT;
1339 PL_lex_expect = PL_expect;
cd81e915 1340 PL_curforce = -1;
5db06880 1341#else
3280af22
NIS
1342 PL_nexttype[PL_nexttoke] = type;
1343 PL_nexttoke++;
1344 if (PL_lex_state != LEX_KNOWNEXT) {
1345 PL_lex_defer = PL_lex_state;
1346 PL_lex_expect = PL_expect;
1347 PL_lex_state = LEX_KNOWNEXT;
79072805 1348 }
5db06880 1349#endif
79072805
LW
1350}
1351
d0a148a6 1352STATIC SV *
15f169a1 1353S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1354{
97aff369 1355 dVAR;
740cce10 1356 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
1357 !IN_BYTES
1358 && UTF
1359 && !is_ascii_string((const U8*)start, len)
740cce10 1360 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1361 return sv;
1362}
1363
ffb4593c
NT
1364/*
1365 * S_force_word
1366 * When the lexer knows the next thing is a word (for instance, it has
1367 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1368 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1369 * lookahead.
ffb4593c
NT
1370 *
1371 * Arguments:
b1b65b59 1372 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1373 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1374 * int check_keyword : if true, Perl checks to make sure the word isn't
1375 * a keyword (do this if the word is a label, e.g. goto FOO)
1376 * int allow_pack : if true, : characters will also be allowed (require,
1377 * use, etc. do this)
9cbb5ea2 1378 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1379 */
1380
76e3520e 1381STATIC char *
cea2e8a9 1382S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1383{
97aff369 1384 dVAR;
463ee0b2
LW
1385 register char *s;
1386 STRLEN len;
4e553d73 1387
7918f24d
NC
1388 PERL_ARGS_ASSERT_FORCE_WORD;
1389
29595ff2 1390 start = SKIPSPACE1(start);
463ee0b2 1391 s = start;
7e2040f0 1392 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1393 (allow_pack && *s == ':') ||
15f0808c 1394 (allow_initial_tick && *s == '\'') )
a0d0e21e 1395 {
3280af22 1396 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1397 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1398 return start;
cd81e915 1399 start_force(PL_curforce);
5db06880
NC
1400 if (PL_madskills)
1401 curmad('X', newSVpvn(start,s-start));
463ee0b2 1402 if (token == METHOD) {
29595ff2 1403 s = SKIPSPACE1(s);
463ee0b2 1404 if (*s == '(')
3280af22 1405 PL_expect = XTERM;
463ee0b2 1406 else {
3280af22 1407 PL_expect = XOPERATOR;
463ee0b2 1408 }
79072805 1409 }
e74e6b3d 1410 if (PL_madskills)
63575281 1411 curmad('g', newSVpvs( "forced" ));
9ded7720 1412 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1413 = (OP*)newSVOP(OP_CONST,0,
1414 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1415 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1416 force_next(token);
1417 }
1418 return s;
1419}
1420
ffb4593c
NT
1421/*
1422 * S_force_ident
9cbb5ea2 1423 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1424 * text only contains the "foo" portion. The first argument is a pointer
1425 * to the "foo", and the second argument is the type symbol to prefix.
1426 * Forces the next token to be a "WORD".
9cbb5ea2 1427 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1428 */
1429
76e3520e 1430STATIC void
bfed75c6 1431S_force_ident(pTHX_ register const char *s, int kind)
79072805 1432{
97aff369 1433 dVAR;
7918f24d
NC
1434
1435 PERL_ARGS_ASSERT_FORCE_IDENT;
1436
c35e046a 1437 if (*s) {
90e5519e
NC
1438 const STRLEN len = strlen(s);
1439 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1440 start_force(PL_curforce);
9ded7720 1441 NEXTVAL_NEXTTOKE.opval = o;
79072805 1442 force_next(WORD);
748a9306 1443 if (kind) {
11343788 1444 o->op_private = OPpCONST_ENTERED;
55497cff
PP
1445 /* XXX see note in pp_entereval() for why we forgo typo
1446 warnings if the symbol must be introduced in an eval.
1447 GSAR 96-10-12 */
90e5519e
NC
1448 gv_fetchpvn_flags(s, len,
1449 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1450 : GV_ADD,
1451 kind == '$' ? SVt_PV :
1452 kind == '@' ? SVt_PVAV :
1453 kind == '%' ? SVt_PVHV :
a0d0e21e 1454 SVt_PVGV
90e5519e 1455 );
748a9306 1456 }
79072805
LW
1457 }
1458}
1459
1571675a
GS
1460NV
1461Perl_str_to_version(pTHX_ SV *sv)
1462{
1463 NV retval = 0.0;
1464 NV nshift = 1.0;
1465 STRLEN len;
cfd0369c 1466 const char *start = SvPV_const(sv,len);
9d4ba2ae 1467 const char * const end = start + len;
504618e9 1468 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
1469
1470 PERL_ARGS_ASSERT_STR_TO_VERSION;
1471
1571675a 1472 while (start < end) {
ba210ebe 1473 STRLEN skip;
1571675a
GS
1474 UV n;
1475 if (utf)
9041c2e3 1476 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1477 else {
1478 n = *(U8*)start;
1479 skip = 1;
1480 }
1481 retval += ((NV)n)/nshift;
1482 start += skip;
1483 nshift *= 1000;
1484 }
1485 return retval;
1486}
1487
4e553d73 1488/*
ffb4593c
NT
1489 * S_force_version
1490 * Forces the next token to be a version number.
e759cc13
RGS
1491 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1492 * and if "guessing" is TRUE, then no new token is created (and the caller
1493 * must use an alternative parsing method).
ffb4593c
NT
1494 */
1495
76e3520e 1496STATIC char *
e759cc13 1497S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1498{
97aff369 1499 dVAR;
5f66b61c 1500 OP *version = NULL;
44dcb63b 1501 char *d;
5db06880
NC
1502#ifdef PERL_MAD
1503 I32 startoff = s - SvPVX(PL_linestr);
1504#endif
89bfa8cd 1505
7918f24d
NC
1506 PERL_ARGS_ASSERT_FORCE_VERSION;
1507
29595ff2 1508 s = SKIPSPACE1(s);
89bfa8cd 1509
44dcb63b 1510 d = s;
dd629d5b 1511 if (*d == 'v')
44dcb63b 1512 d++;
44dcb63b 1513 if (isDIGIT(*d)) {
e759cc13
RGS
1514 while (isDIGIT(*d) || *d == '_' || *d == '.')
1515 d++;
5db06880
NC
1516#ifdef PERL_MAD
1517 if (PL_madskills) {
cd81e915 1518 start_force(PL_curforce);
5db06880
NC
1519 curmad('X', newSVpvn(s,d-s));
1520 }
1521#endif
9f3d182e 1522 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1523 SV *ver;
6154021b
RGS
1524 s = scan_num(s, &pl_yylval);
1525 version = pl_yylval.opval;
dd629d5b
GS
1526 ver = cSVOPx(version)->op_sv;
1527 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1528 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1529 SvNV_set(ver, str_to_version(ver));
1571675a 1530 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1531 }
89bfa8cd 1532 }
5db06880
NC
1533 else if (guessing) {
1534#ifdef PERL_MAD
1535 if (PL_madskills) {
cd81e915
NC
1536 sv_free(PL_nextwhite); /* let next token collect whitespace */
1537 PL_nextwhite = 0;
5db06880
NC
1538 s = SvPVX(PL_linestr) + startoff;
1539 }
1540#endif
e759cc13 1541 return s;
5db06880 1542 }
89bfa8cd
PP
1543 }
1544
5db06880
NC
1545#ifdef PERL_MAD
1546 if (PL_madskills && !version) {
cd81e915
NC
1547 sv_free(PL_nextwhite); /* let next token collect whitespace */
1548 PL_nextwhite = 0;
5db06880
NC
1549 s = SvPVX(PL_linestr) + startoff;
1550 }
1551#endif
89bfa8cd 1552 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1553 start_force(PL_curforce);
9ded7720 1554 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1555 force_next(WORD);
89bfa8cd 1556
e759cc13 1557 return s;
89bfa8cd
PP
1558}
1559
ffb4593c
NT
1560/*
1561 * S_tokeq
1562 * Tokenize a quoted string passed in as an SV. It finds the next
1563 * chunk, up to end of string or a backslash. It may make a new
1564 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1565 * turns \\ into \.
1566 */
1567
76e3520e 1568STATIC SV *
cea2e8a9 1569S_tokeq(pTHX_ SV *sv)
79072805 1570{
97aff369 1571 dVAR;
79072805
LW
1572 register char *s;
1573 register char *send;
1574 register char *d;
b3ac6de7
IZ
1575 STRLEN len = 0;
1576 SV *pv = sv;
79072805 1577
7918f24d
NC
1578 PERL_ARGS_ASSERT_TOKEQ;
1579
79072805 1580 if (!SvLEN(sv))
b3ac6de7 1581 goto finish;
79072805 1582
a0d0e21e 1583 s = SvPV_force(sv, len);
21a311ee 1584 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1585 goto finish;
463ee0b2 1586 send = s + len;
79072805
LW
1587 while (s < send && *s != '\\')
1588 s++;
1589 if (s == send)
b3ac6de7 1590 goto finish;
79072805 1591 d = s;
be4731d2 1592 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 1593 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 1594 }
79072805
LW
1595 while (s < send) {
1596 if (*s == '\\') {
a0d0e21e 1597 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1598 s++; /* all that, just for this */
1599 }
1600 *d++ = *s++;
1601 }
1602 *d = '\0';
95a20fc0 1603 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1604 finish:
3280af22 1605 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 1606 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
1607 return sv;
1608}
1609
ffb4593c
NT
1610/*
1611 * Now come three functions related to double-quote context,
1612 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1613 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1614 * interact with PL_lex_state, and create fake ( ... ) argument lists
1615 * to handle functions and concatenation.
1616 * They assume that whoever calls them will be setting up a fake
1617 * join call, because each subthing puts a ',' after it. This lets
1618 * "lower \luPpEr"
1619 * become
1620 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1621 *
1622 * (I'm not sure whether the spurious commas at the end of lcfirst's
1623 * arguments and join's arguments are created or not).
1624 */
1625
1626/*
1627 * S_sublex_start
6154021b 1628 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
1629 *
1630 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 1631 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
1632 *
1633 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1634 *
1635 * Everything else becomes a FUNC.
1636 *
1637 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1638 * had an OP_CONST or OP_READLINE). This just sets us up for a
1639 * call to S_sublex_push().
1640 */
1641
76e3520e 1642STATIC I32
cea2e8a9 1643S_sublex_start(pTHX)
79072805 1644{
97aff369 1645 dVAR;
6154021b 1646 register const I32 op_type = pl_yylval.ival;
79072805
LW
1647
1648 if (op_type == OP_NULL) {
6154021b 1649 pl_yylval.opval = PL_lex_op;
5f66b61c 1650 PL_lex_op = NULL;
79072805
LW
1651 return THING;
1652 }
1653 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1654 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1655
1656 if (SvTYPE(sv) == SVt_PVIV) {
1657 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1658 STRLEN len;
96a5add6 1659 const char * const p = SvPV_const(sv, len);
740cce10 1660 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
1661 SvREFCNT_dec(sv);
1662 sv = nsv;
4e553d73 1663 }
6154021b 1664 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1665 PL_lex_stuff = NULL;
6f33ba73
RGS
1666 /* Allow <FH> // "foo" */
1667 if (op_type == OP_READLINE)
1668 PL_expect = XTERMORDORDOR;
79072805
LW
1669 return THING;
1670 }
e3f73d4e
RGS
1671 else if (op_type == OP_BACKTICK && PL_lex_op) {
1672 /* readpipe() vas overriden */
1673 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 1674 pl_yylval.opval = PL_lex_op;
9b201d7d 1675 PL_lex_op = NULL;
e3f73d4e
RGS
1676 PL_lex_stuff = NULL;
1677 return THING;
1678 }
79072805 1679
3280af22 1680 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 1681 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
1682 PL_sublex_info.sub_op = PL_lex_op;
1683 PL_lex_state = LEX_INTERPPUSH;
55497cff 1684
3280af22
NIS
1685 PL_expect = XTERM;
1686 if (PL_lex_op) {
6154021b 1687 pl_yylval.opval = PL_lex_op;
5f66b61c 1688 PL_lex_op = NULL;
55497cff
PP
1689 return PMFUNC;
1690 }
1691 else
1692 return FUNC;
1693}
1694
ffb4593c
NT
1695/*
1696 * S_sublex_push
1697 * Create a new scope to save the lexing state. The scope will be
1698 * ended in S_sublex_done. Returns a '(', starting the function arguments
1699 * to the uc, lc, etc. found before.
1700 * Sets PL_lex_state to LEX_INTERPCONCAT.
1701 */
1702
76e3520e 1703STATIC I32
cea2e8a9 1704S_sublex_push(pTHX)
55497cff 1705{
27da23d5 1706 dVAR;
f46d017c 1707 ENTER;
55497cff 1708
3280af22 1709 PL_lex_state = PL_sublex_info.super_state;
651b5b28 1710 SAVEBOOL(PL_lex_dojoin);
3280af22 1711 SAVEI32(PL_lex_brackets);
3280af22
NIS
1712 SAVEI32(PL_lex_casemods);
1713 SAVEI32(PL_lex_starts);
651b5b28 1714 SAVEI8(PL_lex_state);
7766f137 1715 SAVEVPTR(PL_lex_inpat);
98246f1e 1716 SAVEI16(PL_lex_inwhat);
57843af0 1717 SAVECOPLINE(PL_curcop);
3280af22 1718 SAVEPPTR(PL_bufptr);
8452ff4b 1719 SAVEPPTR(PL_bufend);
3280af22
NIS
1720 SAVEPPTR(PL_oldbufptr);
1721 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1722 SAVEPPTR(PL_last_lop);
1723 SAVEPPTR(PL_last_uni);
3280af22
NIS
1724 SAVEPPTR(PL_linestart);
1725 SAVESPTR(PL_linestr);
8edd5f42
RGS
1726 SAVEGENERICPV(PL_lex_brackstack);
1727 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1728
1729 PL_linestr = PL_lex_stuff;
a0714e2c 1730 PL_lex_stuff = NULL;
3280af22 1731
9cbb5ea2
GS
1732 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1733 = SvPVX(PL_linestr);
3280af22 1734 PL_bufend += SvCUR(PL_linestr);
bd61b366 1735 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1736 SAVEFREESV(PL_linestr);
1737
1738 PL_lex_dojoin = FALSE;
1739 PL_lex_brackets = 0;
a02a5408
JC
1740 Newx(PL_lex_brackstack, 120, char);
1741 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1742 PL_lex_casemods = 0;
1743 *PL_lex_casestack = '\0';
1744 PL_lex_starts = 0;
1745 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1746 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1747
1748 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1749 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1750 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1751 else
5f66b61c 1752 PL_lex_inpat = NULL;
79072805 1753
55497cff 1754 return '(';
79072805
LW
1755}
1756
ffb4593c
NT
1757/*
1758 * S_sublex_done
1759 * Restores lexer state after a S_sublex_push.
1760 */
1761
76e3520e 1762STATIC I32
cea2e8a9 1763S_sublex_done(pTHX)
79072805 1764{
27da23d5 1765 dVAR;
3280af22 1766 if (!PL_lex_starts++) {
396482e1 1767 SV * const sv = newSVpvs("");
9aa983d2
JH
1768 if (SvUTF8(PL_linestr))
1769 SvUTF8_on(sv);
3280af22 1770 PL_expect = XOPERATOR;
6154021b 1771 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1772 return THING;
1773 }
1774
3280af22
NIS
1775 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1776 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1777 return yylex();
79072805
LW
1778 }
1779
ffb4593c 1780 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1781 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1782 PL_linestr = PL_lex_repl;
1783 PL_lex_inpat = 0;
1784 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1785 PL_bufend += SvCUR(PL_linestr);
bd61b366 1786 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1787 SAVEFREESV(PL_linestr);
1788 PL_lex_dojoin = FALSE;
1789 PL_lex_brackets = 0;
3280af22
NIS
1790 PL_lex_casemods = 0;
1791 *PL_lex_casestack = '\0';
1792 PL_lex_starts = 0;
25da4f38 1793 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1794 PL_lex_state = LEX_INTERPNORMAL;
1795 PL_lex_starts++;
e9fa98b2
HS
1796 /* we don't clear PL_lex_repl here, so that we can check later
1797 whether this is an evalled subst; that means we rely on the
1798 logic to ensure sublex_done() is called again only via the
1799 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1800 }
e9fa98b2 1801 else {
3280af22 1802 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1803 PL_lex_repl = NULL;
e9fa98b2 1804 }
79072805 1805 return ',';
ffed7fef
LW
1806 }
1807 else {
5db06880
NC
1808#ifdef PERL_MAD
1809 if (PL_madskills) {
cd81e915
NC
1810 if (PL_thiswhite) {
1811 if (!PL_endwhite)
6b29d1f5 1812 PL_endwhite = newSVpvs("");
cd81e915
NC
1813 sv_catsv(PL_endwhite, PL_thiswhite);
1814 PL_thiswhite = 0;
1815 }
1816 if (PL_thistoken)
76f68e9b 1817 sv_setpvs(PL_thistoken,"");
5db06880 1818 else
cd81e915 1819 PL_realtokenstart = -1;
5db06880
NC
1820 }
1821#endif
f46d017c 1822 LEAVE;
3280af22
NIS
1823 PL_bufend = SvPVX(PL_linestr);
1824 PL_bufend += SvCUR(PL_linestr);
1825 PL_expect = XOPERATOR;
09bef843 1826 PL_sublex_info.sub_inwhat = 0;
79072805 1827 return ')';
ffed7fef
LW
1828 }
1829}
1830
02aa26ce
NT
1831/*
1832 scan_const
1833
1834 Extracts a pattern, double-quoted string, or transliteration. This
1835 is terrifying code.
1836
94def140 1837 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1838 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1839 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1840
94def140
ST
1841 Returns a pointer to the character scanned up to. If this is
1842 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 1843 successfully parsed), will leave an OP for the substring scanned
6154021b 1844 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
1845 by looking at the next characters herself.
1846
02aa26ce
NT
1847 In patterns:
1848 backslashes:
1849 double-quoted style: \r and \n
1850 regexp special ones: \D \s
94def140
ST
1851 constants: \x31
1852 backrefs: \1
02aa26ce
NT
1853 case and quoting: \U \Q \E
1854 stops on @ and $, but not for $ as tail anchor
1855
1856 In transliterations:
1857 characters are VERY literal, except for - not at the start or end
94def140
ST
1858 of the string, which indicates a range. If the range is in bytes,
1859 scan_const expands the range to the full set of intermediate
1860 characters. If the range is in utf8, the hyphen is replaced with
1861 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1862
1863 In double-quoted strings:
1864 backslashes:
1865 double-quoted style: \r and \n
94def140
ST
1866 constants: \x31
1867 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1868 case and quoting: \U \Q \E
1869 stops on @ and $
1870
1871 scan_const does *not* construct ops to handle interpolated strings.
1872 It stops processing as soon as it finds an embedded $ or @ variable
1873 and leaves it to the caller to work out what's going on.
1874
94def140
ST
1875 embedded arrays (whether in pattern or not) could be:
1876 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1877
1878 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1879
1880 $ in pattern could be $foo or could be tail anchor. Assumption:
1881 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1882 followed by one of "()| \r\n\t"
02aa26ce
NT
1883
1884 \1 (backreferences) are turned into $1
1885
1886 The structure of the code is
1887 while (there's a character to process) {
94def140
ST
1888 handle transliteration ranges
1889 skip regexp comments /(?#comment)/ and codes /(?{code})/
1890 skip #-initiated comments in //x patterns
1891 check for embedded arrays
02aa26ce
NT
1892 check for embedded scalars
1893 if (backslash) {
94def140
ST
1894 leave intact backslashes from leaveit (below)
1895 deprecate \1 in substitution replacements
02aa26ce
NT
1896 handle string-changing backslashes \l \U \Q \E, etc.
1897 switch (what was escaped) {
94def140
ST
1898 handle \- in a transliteration (becomes a literal -)
1899 handle \132 (octal characters)
1900 handle \x15 and \x{1234} (hex characters)
1901 handle \N{name} (named characters)
1902 handle \cV (control characters)
1903 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 1904 } (end switch)
77a135fe 1905 continue
02aa26ce 1906 } (end if backslash)
77a135fe 1907 handle regular character
02aa26ce 1908 } (end while character to read)
4e553d73 1909
02aa26ce
NT
1910*/
1911
76e3520e 1912STATIC char *
cea2e8a9 1913S_scan_const(pTHX_ char *start)
79072805 1914{
97aff369 1915 dVAR;
3280af22 1916 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
1917 SV *sv = newSV(send - start); /* sv for the constant. See
1918 note below on sizing. */
02aa26ce
NT
1919 register char *s = start; /* start of the constant */
1920 register char *d = SvPVX(sv); /* destination for copies */
1921 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1922 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0 1923 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
77a135fe
KW
1924 I32 this_utf8 = UTF; /* Is the source string assumed
1925 to be UTF8? But, this can
1926 show as true when the source
1927 isn't utf8, as for example
1928 when it is entirely composed
1929 of hex constants */
1930
1931 /* Note on sizing: The scanned constant is placed into sv, which is
1932 * initialized by newSV() assuming one byte of output for every byte of
1933 * input. This routine expects newSV() to allocate an extra byte for a
1934 * trailing NUL, which this routine will append if it gets to the end of
1935 * the input. There may be more bytes of input than output (eg., \N{LATIN
1936 * CAPITAL LETTER A}), or more output than input if the constant ends up
1937 * recoded to utf8, but each time a construct is found that might increase
1938 * the needed size, SvGROW() is called. Its size parameter each time is
1939 * based on the best guess estimate at the time, namely the length used so
1940 * far, plus the length the current construct will occupy, plus room for
1941 * the trailing NUL, plus one byte for every input byte still unscanned */
1942
012bcf8d 1943 UV uv;
4c3a8340
ST
1944#ifdef EBCDIC
1945 UV literal_endpoint = 0;
e294cc5d 1946 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1947#endif
012bcf8d 1948
7918f24d
NC
1949 PERL_ARGS_ASSERT_SCAN_CONST;
1950
2b9d42f0
NIS
1951 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1952 /* If we are doing a trans and we know we want UTF8 set expectation */
1953 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1954 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1955 }
1956
1957
79072805 1958 while (s < send || dorange) {
02aa26ce 1959 /* get transliterations out of the way (they're most literal) */
3280af22 1960 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1961 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1962 if (dorange) {
1ba5c669
JH
1963 I32 i; /* current expanded character */
1964 I32 min; /* first character in range */
1965 I32 max; /* last character in range */
02aa26ce 1966
e294cc5d
JH
1967#ifdef EBCDIC
1968 UV uvmax = 0;
1969#endif
1970
1971 if (has_utf8
1972#ifdef EBCDIC
1973 && !native_range
1974#endif
1975 ) {
9d4ba2ae 1976 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1977 char *e = d++;
1978 while (e-- > c)
1979 *(e + 1) = *e;
25716404 1980 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1981 /* mark the range as done, and continue */
1982 dorange = FALSE;
1983 didrange = TRUE;
1984 continue;
1985 }
2b9d42f0 1986
95a20fc0 1987 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1988#ifdef EBCDIC
1989 SvGROW(sv,
1990 SvLEN(sv) + (has_utf8 ?
1991 (512 - UTF_CONTINUATION_MARK +
1992 UNISKIP(0x100))
1993 : 256));
1994 /* How many two-byte within 0..255: 128 in UTF-8,
1995 * 96 in UTF-8-mod. */
1996#else
9cbb5ea2 1997 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 1998#endif
9cbb5ea2 1999 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2000#ifdef EBCDIC
2001 if (has_utf8) {
2002 int j;
2003 for (j = 0; j <= 1; j++) {
2004 char * const c = (char*)utf8_hop((U8*)d, -1);
2005 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2006 if (j)
2007 min = (U8)uv;
2008 else if (uv < 256)
2009 max = (U8)uv;
2010 else {
2011 max = (U8)0xff; /* only to \xff */
2012 uvmax = uv; /* \x{100} to uvmax */
2013 }
2014 d = c; /* eat endpoint chars */
2015 }
2016 }
2017 else {
2018#endif
2019 d -= 2; /* eat the first char and the - */
2020 min = (U8)*d; /* first char in range */
2021 max = (U8)d[1]; /* last char in range */
2022#ifdef EBCDIC
2023 }
2024#endif
8ada0baa 2025
c2e66d9e 2026 if (min > max) {
01ec43d0 2027 Perl_croak(aTHX_
d1573ac7 2028 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2029 (char)min, (char)max);
c2e66d9e
GS
2030 }
2031
c7f1f016 2032#ifdef EBCDIC
4c3a8340
ST
2033 if (literal_endpoint == 2 &&
2034 ((isLOWER(min) && isLOWER(max)) ||
2035 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2036 if (isLOWER(min)) {
2037 for (i = min; i <= max; i++)
2038 if (isLOWER(i))
db42d148 2039 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2040 } else {
2041 for (i = min; i <= max; i++)
2042 if (isUPPER(i))
db42d148 2043 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2044 }
2045 }
2046 else
2047#endif
2048 for (i = min; i <= max; i++)
e294cc5d
JH
2049#ifdef EBCDIC
2050 if (has_utf8) {
2051 const U8 ch = (U8)NATIVE_TO_UTF(i);
2052 if (UNI_IS_INVARIANT(ch))
2053 *d++ = (U8)i;
2054 else {
2055 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2056 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2057 }
2058 }
2059 else
2060#endif
2061 *d++ = (char)i;
2062
2063#ifdef EBCDIC
2064 if (uvmax) {
2065 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2066 if (uvmax > 0x101)
2067 *d++ = (char)UTF_TO_NATIVE(0xff);
2068 if (uvmax > 0x100)
2069 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2070 }
2071#endif
02aa26ce
NT
2072
2073 /* mark the range as done, and continue */
79072805 2074 dorange = FALSE;
01ec43d0 2075 didrange = TRUE;
4c3a8340
ST
2076#ifdef EBCDIC
2077 literal_endpoint = 0;
2078#endif
79072805 2079 continue;
4e553d73 2080 }
02aa26ce
NT
2081
2082 /* range begins (ignore - as first or last char) */
79072805 2083 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2084 if (didrange) {
1fafa243 2085 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2086 }
e294cc5d
JH
2087 if (has_utf8
2088#ifdef EBCDIC
2089 && !native_range
2090#endif
2091 ) {
25716404 2092 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2093 s++;
2094 continue;
2095 }
79072805
LW
2096 dorange = TRUE;
2097 s++;
01ec43d0
GS
2098 }
2099 else {
2100 didrange = FALSE;
4c3a8340
ST
2101#ifdef EBCDIC
2102 literal_endpoint = 0;
e294cc5d 2103 native_range = TRUE;
4c3a8340 2104#endif
01ec43d0 2105 }
79072805 2106 }
02aa26ce
NT
2107
2108 /* if we get here, we're not doing a transliteration */
2109
0f5d15d6
IZ
2110 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2111 except for the last char, which will be done separately. */
3280af22 2112 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2113 if (s[2] == '#') {
e994fd66 2114 while (s+1 < send && *s != ')')
db42d148 2115 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2116 }
2117 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2118 || (s[2] == '?' && s[3] == '{'))
155aba94 2119 {
cc6b7395 2120 I32 count = 1;
0f5d15d6 2121 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2122 char c;
2123
d9f97599
GS
2124 while (count && (c = *regparse)) {
2125 if (c == '\\' && regparse[1])
2126 regparse++;
4e553d73 2127 else if (c == '{')
cc6b7395 2128 count++;
4e553d73 2129 else if (c == '}')
cc6b7395 2130 count--;
d9f97599 2131 regparse++;
cc6b7395 2132 }
e994fd66 2133 if (*regparse != ')')
5bdf89e7 2134 regparse--; /* Leave one char for continuation. */
0f5d15d6 2135 while (s < regparse)
db42d148 2136 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2137 }
748a9306 2138 }
02aa26ce
NT
2139
2140 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2141 else if (*s == '#' && PL_lex_inpat &&
2142 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2143 while (s+1 < send && *s != '\n')
db42d148 2144 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2145 }
02aa26ce 2146
5d1d4326 2147 /* check for embedded arrays
da6eedaa 2148 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2149 */
1749ea0d
ST
2150 else if (*s == '@' && s[1]) {
2151 if (isALNUM_lazy_if(s+1,UTF))
2152 break;
2153 if (strchr(":'{$", s[1]))
2154 break;
2155 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2156 break; /* in regexp, neither @+ nor @- are interpolated */
2157 }
02aa26ce
NT
2158
2159 /* check for embedded scalars. only stop if we're sure it's a
2160 variable.
2161 */
79072805 2162 else if (*s == '$') {
3280af22 2163 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2164 break;
77772344 2165 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2166 if (s[1] == '\\') {
2167 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2168 "Possible unintended interpolation of $\\ in regex");
77772344 2169 }
79072805 2170 break; /* in regexp, $ might be tail anchor */
77772344 2171 }
79072805 2172 }
02aa26ce 2173
2b9d42f0
NIS
2174 /* End of else if chain - OP_TRANS rejoin rest */
2175
02aa26ce 2176 /* backslashes */
79072805
LW
2177 if (*s == '\\' && s+1 < send) {
2178 s++;
02aa26ce 2179
02aa26ce 2180 /* deprecate \1 in strings and substitution replacements */
3280af22 2181 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2182 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2183 {
a2a5de95 2184 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2185 *--s = '$';
2186 break;
2187 }
02aa26ce
NT
2188
2189 /* string-change backslash escapes */
3280af22 2190 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2191 --s;
2192 break;
2193 }
cc74c5bd
ST
2194 /* skip any other backslash escapes in a pattern */
2195 else if (PL_lex_inpat) {
2196 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2197 goto default_action;
2198 }
02aa26ce
NT
2199
2200 /* if we get here, it's either a quoted -, or a digit */
79072805 2201 switch (*s) {
02aa26ce
NT
2202
2203 /* quoted - in transliterations */
79072805 2204 case '-':
3280af22 2205 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2206 *d++ = *s++;
2207 continue;
2208 }
2209 /* FALL THROUGH */
2210 default:
11b8faa4 2211 {
a2a5de95
NC
2212 if ((isALPHA(*s) || isDIGIT(*s)))
2213 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2214 "Unrecognized escape \\%c passed through",
2215 *s);
11b8faa4 2216 /* default action is to copy the quoted character */
f9a63242 2217 goto default_action;
11b8faa4 2218 }
02aa26ce 2219
77a135fe 2220 /* eg. \132 indicates the octal constant 0x132 */
79072805
LW
2221 case '0': case '1': case '2': case '3':
2222 case '4': case '5': case '6': case '7':
ba210ebe 2223 {
53305cf1
NC
2224 I32 flags = 0;
2225 STRLEN len = 3;
77a135fe 2226 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2227 s += len;
2228 }
012bcf8d 2229 goto NUM_ESCAPE_INSERT;
02aa26ce 2230
77a135fe 2231 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2232 case 'x':
a0ed51b3
LW
2233 ++s;
2234 if (*s == '{') {
9d4ba2ae 2235 char* const e = strchr(s, '}');
a4c04bdc
NC
2236 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2237 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2238 STRLEN len;
355860ce 2239
53305cf1 2240 ++s;
adaeee49 2241 if (!e) {
a0ed51b3 2242 yyerror("Missing right brace on \\x{}");
355860ce 2243 continue;
ba210ebe 2244 }
53305cf1 2245 len = e - s;
77a135fe 2246 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2247 s = e + 1;
a0ed51b3
LW
2248 }
2249 else {
ba210ebe 2250 {
53305cf1 2251 STRLEN len = 2;
a4c04bdc 2252 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2253 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2254 s += len;
2255 }
012bcf8d
GS
2256 }
2257
2258 NUM_ESCAPE_INSERT:
77a135fe
KW
2259 /* Insert oct, hex, or \N{U+...} escaped character. There will
2260 * always be enough room in sv since such escapes will be
2261 * longer than any UTF-8 sequence they can end up as, except if
2262 * they force us to recode the rest of the string into utf8 */
ba7cea30 2263
77a135fe
KW
2264 /* Here uv is the ordinal of the next character being added in
2265 * unicode (converted from native). (It has to be done before
2266 * here because \N is interpreted as unicode, and oct and hex
2267 * as native.) */
2268 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 2269 if (!has_utf8 && uv > 255) {
77a135fe
KW
2270 /* Might need to recode whatever we have accumulated so
2271 * far if it contains any chars variant in utf8 or
2272 * utf-ebcdic. */
2273
2274 SvCUR_set(sv, d - SvPVX_const(sv));
2275 SvPOK_on(sv);
2276 *d = '\0';
77a135fe 2277 /* See Note on sizing above. */
7bf79863
KW
2278 sv_utf8_upgrade_flags_grow(sv,
2279 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2280 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
2281 d = SvPVX(sv) + SvCUR(sv);
2282 has_utf8 = TRUE;
012bcf8d
GS
2283 }
2284
77a135fe
KW
2285 if (has_utf8) {
2286 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
2287 if (PL_lex_inwhat == OP_TRANS &&
2288 PL_sublex_info.sub_op) {
2289 PL_sublex_info.sub_op->op_private |=
2290 (PL_lex_repl ? OPpTRANS_FROM_UTF
2291 : OPpTRANS_TO_UTF);
f9a63242 2292 }
e294cc5d
JH
2293#ifdef EBCDIC
2294 if (uv > 255 && !dorange)
2295 native_range = FALSE;
2296#endif
012bcf8d 2297 }
a0ed51b3 2298 else {
012bcf8d 2299 *d++ = (char)uv;
a0ed51b3 2300 }
012bcf8d
GS
2301 }
2302 else {
c4d5f83a 2303 *d++ = (char) uv;
a0ed51b3 2304 }
79072805 2305 continue;
02aa26ce 2306
77a135fe
KW
2307 /* \N{LATIN SMALL LETTER A} is a named character, and so is
2308 * \N{U+0041} */
4a2d328f 2309 case 'N':
55eda711 2310 ++s;
423cee85
JH
2311 if (*s == '{') {
2312 char* e = strchr(s, '}');
155aba94 2313 SV *res;
423cee85 2314 STRLEN len;
cfd0369c 2315 const char *str;
4e553d73 2316
423cee85 2317 if (!e) {
5777a3f7 2318 yyerror("Missing right brace on \\N{}");
423cee85
JH
2319 e = s - 1;
2320 goto cont_scan;
2321 }
dbc0d4f2 2322 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
77a135fe
KW
2323 /* \N{U+...} The ... is a unicode value even on EBCDIC
2324 * machines */
dbc0d4f2
JH
2325 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2326 PERL_SCAN_DISALLOW_PREFIX;
2327 s += 3;
2328 len = e - s;
2329 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2330 if ( e > s && len != (STRLEN)(e - s) ) {
2331 uv = 0xFFFD;
fc8cd66c 2332 }
dbc0d4f2
JH
2333 s = e + 1;
2334 goto NUM_ESCAPE_INSERT;
2335 }
55eda711 2336 res = newSVpvn(s + 1, e - s - 1);
bd61b366 2337 res = new_constant( NULL, 0, "charnames",
eb0d8d16 2338 res, NULL, s - 2, e - s + 3 );
f9a63242
JH
2339 if (has_utf8)
2340 sv_utf8_upgrade(res);
cfd0369c 2341 str = SvPV_const(res,len);
1c47067b
JH
2342#ifdef EBCDIC_NEVER_MIND
2343 /* charnames uses pack U and that has been
2344 * recently changed to do the below uni->native
2345 * mapping, so this would be redundant (and wrong,
2346 * the code point would be doubly converted).
2347 * But leave this in just in case the pack U change
2348 * gets revoked, but the semantics is still
2349 * desireable for charnames. --jhi */
cddc7ef4 2350 {
cfd0369c 2351 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2352
2353 if (uv < 0x100) {
89ebb4a3 2354 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2355
2356 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2357 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2358 str = SvPV_const(res, len);
cddc7ef4
JH
2359 }
2360 }
2361#endif
77a135fe
KW
2362 /* If destination is not in utf8 but this new character is,
2363 * recode the dest to utf8 */
89491803 2364 if (!has_utf8 && SvUTF8(res)) {
77a135fe 2365 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 2366 SvPOK_on(sv);
e4f3eed8 2367 *d = '\0';
77a135fe 2368 /* See Note on sizing above. */
7bf79863
KW
2369 sv_utf8_upgrade_flags_grow(sv,
2370 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2371 len + (STRLEN)(send - s) + 1);
f08d6ad9 2372 d = SvPVX(sv) + SvCUR(sv);
89491803 2373 has_utf8 = TRUE;
77a135fe 2374 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
423cee85 2375
77a135fe
KW
2376 /* See Note on sizing above. (NOTE: SvCUR() is not set
2377 * correctly here). */
2378 const STRLEN off = d - SvPVX_const(sv);
2379 d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
423cee85 2380 }
e294cc5d
JH
2381#ifdef EBCDIC
2382 if (!dorange)
2383 native_range = FALSE; /* \N{} is guessed to be Unicode */
2384#endif
423cee85
JH
2385 Copy(str, d, len, char);
2386 d += len;
2387 SvREFCNT_dec(res);
2388 cont_scan:
2389 s = e + 1;
2390 }
2391 else
5777a3f7 2392 yyerror("Missing braces on \\N{}");
423cee85
JH
2393 continue;
2394
02aa26ce 2395 /* \c is a control character */
79072805
LW
2396 case 'c':
2397 s++;
961ce445 2398 if (s < send) {
ba210ebe 2399 U8 c = *s++;
c7f1f016
NIS
2400#ifdef EBCDIC
2401 if (isLOWER(c))
2402 c = toUPPER(c);
2403#endif
db42d148 2404 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2405 }
961ce445
RGS
2406 else {
2407 yyerror("Missing control char name in \\c");
2408 }
79072805 2409 continue;
02aa26ce
NT
2410
2411 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2412 case 'b':
db42d148 2413 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2414 break;
2415 case 'n':
db42d148 2416 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2417 break;
2418 case 'r':
db42d148 2419 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2420 break;
2421 case 'f':
db42d148 2422 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2423 break;
2424 case 't':
db42d148 2425 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2426 break;
34a3fe2a 2427 case 'e':
db42d148 2428 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2429 break;
2430 case 'a':
db42d148 2431 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2432 break;
02aa26ce
NT
2433 } /* end switch */
2434
79072805
LW
2435 s++;
2436 continue;
02aa26ce 2437 } /* end if (backslash) */
4c3a8340
ST
2438#ifdef EBCDIC
2439 else
2440 literal_endpoint++;
2441#endif
02aa26ce 2442
f9a63242 2443 default_action:
77a135fe
KW
2444 /* If we started with encoded form, or already know we want it,
2445 then encode the next character */
2446 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 2447 STRLEN len = 1;
77a135fe
KW
2448
2449
2450 /* One might think that it is wasted effort in the case of the
2451 * source being utf8 (this_utf8 == TRUE) to take the next character
2452 * in the source, convert it to an unsigned value, and then convert
2453 * it back again. But the source has not been validated here. The
2454 * routine that does the conversion checks for errors like
2455 * malformed utf8 */
2456
5f66b61c
AL
2457 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2458 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
2459 if (!has_utf8) {
2460 SvCUR_set(sv, d - SvPVX_const(sv));
2461 SvPOK_on(sv);
2462 *d = '\0';
77a135fe 2463 /* See Note on sizing above. */
7bf79863
KW
2464 sv_utf8_upgrade_flags_grow(sv,
2465 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2466 need + (STRLEN)(send - s) + 1);
77a135fe
KW
2467 d = SvPVX(sv) + SvCUR(sv);
2468 has_utf8 = TRUE;
2469 } else if (need > len) {
2470 /* encoded value larger than old, may need extra space (NOTE:
2471 * SvCUR() is not set correctly here). See Note on sizing
2472 * above. */
9d4ba2ae 2473 const STRLEN off = d - SvPVX_const(sv);
77a135fe 2474 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 2475 }
77a135fe
KW
2476 s += len;
2477
5f66b61c 2478 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
2479#ifdef EBCDIC
2480 if (uv > 255 && !dorange)
2481 native_range = FALSE;
2482#endif
2b9d42f0
NIS
2483 }
2484 else {
2485 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2486 }
02aa26ce
NT
2487 } /* while loop to process each character */
2488
2489 /* terminate the string and set up the sv */
79072805 2490 *d = '\0';
95a20fc0 2491 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2492 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2493 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2494
79072805 2495 SvPOK_on(sv);
9f4817db 2496 if (PL_encoding && !has_utf8) {
d0063567
DK
2497 sv_recode_to_utf8(sv, PL_encoding);
2498 if (SvUTF8(sv))
2499 has_utf8 = TRUE;
9f4817db 2500 }
2b9d42f0 2501 if (has_utf8) {
7e2040f0 2502 SvUTF8_on(sv);
2b9d42f0 2503 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2504 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2505 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2506 }
2507 }
79072805 2508
02aa26ce 2509 /* shrink the sv if we allocated more than we used */
79072805 2510 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2511 SvPV_shrink_to_cur(sv);
79072805 2512 }
02aa26ce 2513
6154021b 2514 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 2515 if (s > PL_bufptr) {
eb0d8d16
NC
2516 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2517 const char *const key = PL_lex_inpat ? "qr" : "q";
2518 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2519 const char *type;
2520 STRLEN typelen;
2521
2522 if (PL_lex_inwhat == OP_TRANS) {
2523 type = "tr";
2524 typelen = 2;
2525 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2526 type = "s";
2527 typelen = 1;
2528 } else {
2529 type = "qq";
2530 typelen = 2;
2531 }
2532
2533 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2534 type, typelen);
2535 }
6154021b 2536 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2537 } else
8990e307 2538 SvREFCNT_dec(sv);
79072805
LW
2539 return s;
2540}
2541
ffb4593c
NT
2542/* S_intuit_more
2543 * Returns TRUE if there's more to the expression (e.g., a subscript),
2544 * FALSE otherwise.
ffb4593c
NT
2545 *
2546 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2547 *
2548 * ->[ and ->{ return TRUE
2549 * { and [ outside a pattern are always subscripts, so return TRUE
2550 * if we're outside a pattern and it's not { or [, then return FALSE
2551 * if we're in a pattern and the first char is a {
2552 * {4,5} (any digits around the comma) returns FALSE
2553 * if we're in a pattern and the first char is a [
2554 * [] returns FALSE
2555 * [SOMETHING] has a funky algorithm to decide whether it's a
2556 * character class or not. It has to deal with things like
2557 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2558 * anything else returns TRUE
2559 */
2560
9cbb5ea2
GS
2561/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2562
76e3520e 2563STATIC int
cea2e8a9 2564S_intuit_more(pTHX_ register char *s)
79072805 2565{
97aff369 2566 dVAR;
7918f24d
NC
2567
2568 PERL_ARGS_ASSERT_INTUIT_MORE;
2569
3280af22 2570 if (PL_lex_brackets)
79072805
LW
2571 return TRUE;
2572 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2573 return TRUE;
2574 if (*s != '{' && *s != '[')
2575 return FALSE;
3280af22 2576 if (!PL_lex_inpat)
79072805
LW
2577 return TRUE;
2578
2579 /* In a pattern, so maybe we have {n,m}. */
2580 if (*s == '{') {
2581 s++;
2582 if (!isDIGIT(*s))
2583 return TRUE;
2584 while (isDIGIT(*s))
2585 s++;
2586 if (*s == ',')
2587 s++;
2588 while (isDIGIT(*s))
2589 s++;
2590 if (*s == '}')
2591 return FALSE;
2592 return TRUE;
2593
2594 }
2595
2596 /* On the other hand, maybe we have a character class */
2597
2598 s++;
2599 if (*s == ']' || *s == '^')
2600 return FALSE;
2601 else {
ffb4593c 2602 /* this is terrifying, and it works */
79072805
LW
2603 int weight = 2; /* let's weigh the evidence */
2604 char seen[256];
f27ffc4a 2605 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2606 const char * const send = strchr(s,']');
3280af22 2607 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2608
2609 if (!send) /* has to be an expression */
2610 return TRUE;
2611
2612 Zero(seen,256,char);
2613 if (*s == '$')
2614 weight -= 3;
2615 else if (isDIGIT(*s)) {
2616 if (s[1] != ']') {
2617 if (isDIGIT(s[1]) && s[2] == ']')
2618 weight -= 10;
2619 }
2620 else
2621 weight -= 100;
2622 }
2623 for (; s < send; s++) {
2624 last_un_char = un_char;
2625 un_char = (unsigned char)*s;
2626 switch (*s) {
2627 case '@':
2628 case '&':
2629 case '$':
2630 weight -= seen[un_char] * 10;
7e2040f0 2631 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2632 int len;
8903cb82 2633 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2634 len = (int)strlen(tmpbuf);
2635 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2636 weight -= 100;
2637 else
2638 weight -= 10;
2639 }
2640 else if (*s == '$' && s[1] &&
93a17b20
LW
2641 strchr("[#!%*<>()-=",s[1])) {
2642 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2643 weight -= 10;
2644 else
2645 weight -= 1;
2646 }
2647 break;
2648 case '\\':
2649 un_char = 254;
2650 if (s[1]) {
93a17b20 2651 if (strchr("wds]",s[1]))
79072805 2652 weight += 100;
10edeb5d 2653 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2654 weight += 1;
93a17b20 2655 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2656 weight += 40;
2657 else if (isDIGIT(s[1])) {
2658 weight += 40;
2659 while (s[1] && isDIGIT(s[1]))
2660 s++;
2661 }
2662 }
2663 else
2664 weight += 100;
2665 break;
2666 case '-':
2667 if (s[1] == '\\')
2668 weight += 50;
93a17b20 2669 if (strchr("aA01! ",last_un_char))
79072805 2670 weight += 30;
93a17b20 2671 if (strchr("zZ79~",s[1]))
79072805 2672 weight += 30;
f27ffc4a
GS
2673 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2674 weight -= 5; /* cope with negative subscript */
79072805
LW
2675 break;
2676 default:
3792a11b
NC
2677 if (!isALNUM(last_un_char)
2678 && !(last_un_char == '$' || last_un_char == '@'
2679 || last_un_char == '&')
2680 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2681 char *d = tmpbuf;
2682 while (isALPHA(*s))
2683 *d++ = *s++;
2684 *d = '\0';
5458a98a 2685 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
2686 weight -= 150;
2687 }
2688 if (un_char == last_un_char + 1)
2689 weight += 5;
2690 weight -= seen[un_char];
2691 break;
2692 }
2693 seen[un_char]++;
2694 }
2695 if (weight >= 0) /* probably a character class */
2696 return FALSE;
2697 }
2698
2699 return TRUE;
2700}
ffed7fef 2701
ffb4593c
NT
2702/*
2703 * S_intuit_method
2704 *
2705 * Does all the checking to disambiguate
2706 * foo bar
2707 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2708 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2709 *
2710 * First argument is the stuff after the first token, e.g. "bar".
2711 *
2712 * Not a method if bar is a filehandle.
2713 * Not a method if foo is a subroutine prototyped to take a filehandle.
2714 * Not a method if it's really "Foo $bar"
2715 * Method if it's "foo $bar"
2716 * Not a method if it's really "print foo $bar"
2717 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2718 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2719 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2720 * =>
2721 */
2722
76e3520e 2723STATIC int
62d55b22 2724S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2725{
97aff369 2726 dVAR;
a0d0e21e 2727 char *s = start + (*start == '$');
3280af22 2728 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2729 STRLEN len;
2730 GV* indirgv;
5db06880
NC
2731#ifdef PERL_MAD
2732 int soff;
2733#endif
a0d0e21e 2734
7918f24d
NC
2735 PERL_ARGS_ASSERT_INTUIT_METHOD;
2736
a0d0e21e 2737 if (gv) {
62d55b22 2738 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2739 return 0;
62d55b22
NC
2740 if (cv) {
2741 if (SvPOK(cv)) {
2742 const char *proto = SvPVX_const(cv);
2743 if (proto) {
2744 if (*proto == ';')
2745 proto++;
2746 if (*proto == '*')
2747 return 0;
2748 }
b6c543e3
IZ
2749 }
2750 } else
c35e046a 2751 gv = NULL;
a0d0e21e 2752 }
8903cb82 2753 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2754 /* start is the beginning of the possible filehandle/object,
2755 * and s is the end of it
2756 * tmpbuf is a copy of it
2757 */
2758
a0d0e21e 2759 if (*start == '$') {
3ef1310e
RGS
2760 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2761 isUPPER(*PL_tokenbuf))
a0d0e21e 2762 return 0;
5db06880
NC
2763#ifdef PERL_MAD
2764 len = start - SvPVX(PL_linestr);
2765#endif
29595ff2 2766 s = PEEKSPACE(s);
f0092767 2767#ifdef PERL_MAD
5db06880
NC
2768 start = SvPVX(PL_linestr) + len;
2769#endif
3280af22
NIS
2770 PL_bufptr = start;
2771 PL_expect = XREF;
a0d0e21e
LW
2772 return *s == '(' ? FUNCMETH : METHOD;
2773 }
5458a98a 2774 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
2775 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2776 len -= 2;
2777 tmpbuf[len] = '\0';
5db06880
NC
2778#ifdef PERL_MAD
2779 soff = s - SvPVX(PL_linestr);
2780#endif
c3e0f903
GS
2781 goto bare_package;
2782 }
90e5519e 2783 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2784 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2785 return 0;
2786 /* filehandle or package name makes it a method */
da51bb9b 2787 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
2788#ifdef PERL_MAD
2789 soff = s - SvPVX(PL_linestr);
2790#endif
29595ff2 2791 s = PEEKSPACE(s);
3280af22 2792 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2793 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2794 bare_package:
cd81e915 2795 start_force(PL_curforce);
9ded7720 2796 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 2797 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 2798 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2799 if (PL_madskills)
2800 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2801 PL_expect = XTERM;
a0d0e21e 2802 force_next(WORD);
3280af22 2803 PL_bufptr = s;
5db06880
NC
2804#ifdef PERL_MAD
2805 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2806#endif
a0d0e21e
LW
2807 return *s == '(' ? FUNCMETH : METHOD;
2808 }
2809 }
2810 return 0;
2811}
2812
16d20bd9 2813/* Encoded script support. filter_add() effectively inserts a
4e553d73 2814 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2815 * Note that the filter function only applies to the current source file
2816 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2817 *
2818 * The datasv parameter (which may be NULL) can be used to pass
2819 * private data to this instance of the filter. The filter function
2820 * can recover the SV using the FILTER_DATA macro and use it to
2821 * store private buffers and state information.
2822 *
2823 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2824 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2825 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2826 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2827 * private use must be set using malloc'd pointers.
2828 */
16d20bd9
AD
2829
2830SV *
864dbfa3 2831Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2832{
97aff369 2833 dVAR;
f4c556ac 2834 if (!funcp)
a0714e2c 2835 return NULL;
f4c556ac 2836
5486870f
DM
2837 if (!PL_parser)
2838 return NULL;
2839
3280af22
NIS
2840 if (!PL_rsfp_filters)
2841 PL_rsfp_filters = newAV();
16d20bd9 2842 if (!datasv)
561b68a9 2843 datasv = newSV(0);
862a34c6 2844 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2845 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2846 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2847 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2848 FPTR2DPTR(void *, IoANY(datasv)),
2849 SvPV_nolen(datasv)));
3280af22
NIS
2850 av_unshift(PL_rsfp_filters, 1);
2851 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2852 return(datasv);
2853}
4e553d73 2854
16d20bd9
AD
2855
2856/* Delete most recently added instance of this filter function. */
a0d0e21e 2857void
864dbfa3 2858Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2859{
97aff369 2860 dVAR;
e0c19803 2861 SV *datasv;
24801a4b 2862
7918f24d
NC
2863 PERL_ARGS_ASSERT_FILTER_DEL;
2864
33073adb 2865#ifdef DEBUGGING
55662e27
JH
2866 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2867 FPTR2DPTR(void*, funcp)));
33073adb 2868#endif
5486870f 2869 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2870 return;
2871 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2872 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2873 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2874 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2875 IoANY(datasv) = (void *)NULL;
3280af22 2876 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2877
16d20bd9
AD
2878 return;
2879 }
2880 /* we need to search for the correct entry and clear it */
cea2e8a9 2881 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2882}
2883
2884
1de9afcd
RGS
2885/* Invoke the idxth filter function for the current rsfp. */
2886/* maxlen 0 = read one text line */
16d20bd9 2887I32
864dbfa3 2888Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2889{
97aff369 2890 dVAR;
16d20bd9
AD
2891 filter_t funcp;
2892 SV *datasv = NULL;
f482118e
NC
2893 /* This API is bad. It should have been using unsigned int for maxlen.
2894 Not sure if we want to change the API, but if not we should sanity
2895 check the value here. */
39cd7a59
NC
2896 const unsigned int correct_length
2897 = maxlen < 0 ?
2898#ifdef PERL_MICRO
2899 0x7FFFFFFF
2900#else
2901 INT_MAX
2902#endif
2903 : maxlen;
e50aee73 2904
7918f24d
NC
2905 PERL_ARGS_ASSERT_FILTER_READ;
2906
5486870f 2907 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 2908 return -1;
1de9afcd 2909 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2910 /* Provide a default input filter to make life easy. */
2911 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2912 DEBUG_P(PerlIO_printf(Perl_debug_log,
2913 "filter_read %d: from rsfp\n", idx));
f482118e 2914 if (correct_length) {
16d20bd9
AD
2915 /* Want a block */
2916 int len ;
f54cb97a 2917 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2918
2919 /* ensure buf_sv is large enough */
881d8f0a 2920 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
2921 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2922 correct_length)) <= 0) {
3280af22 2923 if (PerlIO_error(PL_rsfp))
37120919
AD
2924 return -1; /* error */
2925 else
2926 return 0 ; /* end of file */
2927 }
16d20bd9 2928 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 2929 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
2930 } else {
2931 /* Want a line */
3280af22
NIS
2932 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2933 if (PerlIO_error(PL_rsfp))
37120919
AD
2934 return -1; /* error */
2935 else
2936 return 0 ; /* end of file */
2937 }
16d20bd9
AD
2938 }
2939 return SvCUR(buf_sv);
2940 }
2941 /* Skip this filter slot if filter has been deleted */
1de9afcd 2942 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2943 DEBUG_P(PerlIO_printf(Perl_debug_log,
2944 "filter_read %d: skipped (filter deleted)\n",
2945 idx));
f482118e 2946 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2947 }
2948 /* Get function pointer hidden within datasv */
8141890a 2949 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2950 DEBUG_P(PerlIO_printf(Perl_debug_log,
2951 "filter_read %d: via function %p (%s)\n",
ca0270c4 2952 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2953 /* Call function. The function is expected to */
2954 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2955 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2956 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2957}
2958
76e3520e 2959STATIC char *
5cc814fd 2960S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 2961{
97aff369 2962 dVAR;
7918f24d
NC
2963
2964 PERL_ARGS_ASSERT_FILTER_GETS;
2965
c39cd008 2966#ifdef PERL_CR_FILTER
3280af22 2967 if (!PL_rsfp_filters) {
c39cd008 2968 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2969 }
2970#endif
3280af22 2971 if (PL_rsfp_filters) {
55497cff
PP
2972 if (!append)
2973 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2974 if (FILTER_READ(0, sv, 0) > 0)
2975 return ( SvPVX(sv) ) ;
2976 else
bd61b366 2977 return NULL ;
16d20bd9 2978 }
9d116dd7 2979 else
5cc814fd 2980 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
2981}
2982
01ec43d0 2983STATIC HV *
9bde8eb0 2984S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 2985{
97aff369 2986 dVAR;
def3634b
GS
2987 GV *gv;
2988
7918f24d
NC
2989 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
2990
01ec43d0 2991 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2992 return PL_curstash;
2993
2994 if (len > 2 &&
2995 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2996 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2997 {
2998 return GvHV(gv); /* Foo:: */
def3634b
GS
2999 }
3000
3001 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3002 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3003 if (gv && GvCV(gv)) {
3004 SV * const sv = cv_const_sv(GvCV(gv));
3005 if (sv)
9bde8eb0 3006 pkgname = SvPV_const(sv, len);
def3634b
GS
3007 }
3008
9bde8eb0 3009 return gv_stashpvn(pkgname, len, 0);
def3634b 3010}
a0d0e21e 3011
e3f73d4e
RGS
3012/*
3013 * S_readpipe_override
3014 * Check whether readpipe() is overriden, and generates the appropriate
3015 * optree, provided sublex_start() is called afterwards.
3016 */
3017STATIC void
1d51329b 3018S_readpipe_override(pTHX)
e3f73d4e
RGS
3019{
3020 GV **gvp;
3021 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 3022 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
3023 if ((gv_readpipe
3024 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3025 ||
3026 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 3027 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
3028 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3029 {
3030 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3031 append_elem(OP_LIST,
3032 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3033 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3034 }
e3f73d4e
RGS
3035}
3036
5db06880
NC
3037#ifdef PERL_MAD
3038 /*
3039 * Perl_madlex
3040 * The intent of this yylex wrapper is to minimize the changes to the
3041 * tokener when we aren't interested in collecting madprops. It remains
3042 * to be seen how successful this strategy will be...
3043 */
3044
3045int
3046Perl_madlex(pTHX)
3047{
3048 int optype;
3049 char *s = PL_bufptr;
3050
cd81e915
NC
3051 /* make sure PL_thiswhite is initialized */
3052 PL_thiswhite = 0;
3053 PL_thismad = 0;
5db06880 3054
cd81e915 3055 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
3056 if (PL_pending_ident)
3057 return S_pending_ident(aTHX);
3058
3059 /* previous token ate up our whitespace? */
cd81e915
NC
3060 if (!PL_lasttoke && PL_nextwhite) {
3061 PL_thiswhite = PL_nextwhite;
3062 PL_nextwhite = 0;
5db06880
NC
3063 }
3064
3065 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3066 PL_realtokenstart = -1;
3067 PL_thistoken = 0;
5db06880
NC
3068 optype = yylex();
3069 s = PL_bufptr;
cd81e915 3070 assert(PL_curforce < 0);
5db06880 3071
cd81e915
NC
3072 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3073 if (!PL_thistoken) {
3074 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3075 PL_thistoken = newSVpvs("");
5db06880 3076 else {
c35e046a 3077 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3078 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3079 }
3080 }
cd81e915
NC
3081 if (PL_thismad) /* install head */
3082 CURMAD('X', PL_thistoken);
5db06880
NC
3083 }
3084
3085 /* last whitespace of a sublex? */
cd81e915
NC
3086 if (optype == ')' && PL_endwhite) {
3087 CURMAD('X', PL_endwhite);
5db06880
NC
3088 }
3089
cd81e915 3090 if (!PL_thismad) {
5db06880
NC
3091
3092 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3093 if (!PL_thiswhite && !PL_endwhite && !optype) {
3094 sv_free(PL_thistoken);
3095 PL_thistoken = 0;
5db06880
NC
3096 return 0;
3097 }
3098
3099 /* put off final whitespace till peg */
3100 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3101 PL_nextwhite = PL_thiswhite;
3102 PL_thiswhite = 0;
5db06880 3103 }
cd81e915
NC
3104 else if (PL_thisopen) {
3105 CURMAD('q', PL_thisopen);
3106 if (PL_thistoken)
3107 sv_free(PL_thistoken);
3108 PL_thistoken = 0;
5db06880
NC
3109 }
3110 else {
3111 /* Store actual token text as madprop X */
cd81e915 3112 CURMAD('X', PL_thistoken);
5db06880
NC
3113 }
3114
cd81e915 3115 if (PL_thiswhite) {
5db06880 3116 /* add preceding whitespace as madprop _ */
cd81e915 3117 CURMAD('_', PL_thiswhite);
5db06880
NC
3118 }
3119
cd81e915 3120 if (PL_thisstuff) {
5db06880 3121 /* add quoted material as madprop = */
cd81e915 3122 CURMAD('=', PL_thisstuff);
5db06880
NC
3123 }
3124
cd81e915 3125 if (PL_thisclose) {
5db06880 3126 /* add terminating quote as madprop Q */
cd81e915 3127 CURMAD('Q', PL_thisclose);
5db06880
NC
3128 }
3129 }
3130
3131 /* special processing based on optype */
3132
3133 switch (optype) {
3134
3135 /* opval doesn't need a TOKEN since it can already store mp */
3136 case WORD:
3137 case METHOD:
3138 case FUNCMETH:
3139 case THING:
3140 case PMFUNC:
3141 case PRIVATEREF:
3142 case FUNC0SUB:
3143 case UNIOPSUB:
3144 case LSTOPSUB:
6154021b
RGS
3145 if (pl_yylval.opval)
3146 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 3147 PL_thismad = 0;
5db06880
NC
3148 return optype;
3149
3150 /* fake EOF */
3151 case 0:
3152 optype = PEG;
cd81e915
NC
3153 if (PL_endwhite) {
3154 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3155 PL_endwhite = 0;
5db06880
NC
3156 }
3157 break;
3158
3159 case ']':
3160 case '}':
cd81e915 3161 if (PL_faketokens)
5db06880
NC
3162 break;
3163 /* remember any fake bracket that lexer is about to discard */
3164 if (PL_lex_brackets == 1 &&
3165 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3166 {
3167 s = PL_bufptr;
3168 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3169 s++;
3170 if (*s == '}') {
cd81e915
NC
3171 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3172 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3173 PL_thiswhite = 0;
5db06880
NC
3174 PL_bufptr = s - 1;
3175 break; /* don't bother looking for trailing comment */
3176 }
3177 else
3178 s = PL_bufptr;
3179 }
3180 if (optype == ']')
3181 break;
3182 /* FALLTHROUGH */
3183
3184 /* attach a trailing comment to its statement instead of next token */
3185 case ';':
cd81e915 3186 if (PL_faketokens)
5db06880
NC
3187 break;
3188 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3189 s = PL_bufptr;
3190 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3191 s++;
3192 if (*s == '\n' || *s == '#') {
3193 while (s < PL_bufend && *s != '\n')
3194 s++;
3195 if (s < PL_bufend)
3196 s++;
cd81e915
NC
3197 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3198 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3199 PL_thiswhite = 0;
5db06880
NC
3200 PL_bufptr = s;
3201 }
3202 }
3203 break;
3204
3205 /* pval */
3206 case LABEL:
3207 break;
3208
3209 /* ival */
3210 default:
3211 break;
3212
3213 }
3214
3215 /* Create new token struct. Note: opvals return early above. */
6154021b 3216 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 3217 PL_thismad = 0;
5db06880
NC
3218 return optype;
3219}
3220#endif
3221
468aa647 3222STATIC char *
cc6ed77d 3223S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3224 dVAR;
7918f24d
NC
3225
3226 PERL_ARGS_ASSERT_TOKENIZE_USE;
3227
468aa647
RGS
3228 if (PL_expect != XSTATE)
3229 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3230 is_use ? "use" : "no"));
29595ff2 3231 s = SKIPSPACE1(s);
468aa647
RGS
3232 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3233 s = force_version(s, TRUE);
29595ff2 3234 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3235 start_force(PL_curforce);
9ded7720 3236 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3237 force_next(WORD);
3238 }
3239 else if (*s == 'v') {
3240 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3241 s = force_version(s, FALSE);
3242 }
3243 }
3244 else {
3245 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3246 s = force_version(s, FALSE);
3247 }
6154021b 3248 pl_yylval.ival = is_use;
468aa647
RGS
3249 return s;
3250}
748a9306 3251#ifdef DEBUGGING
27da23d5 3252 static const char* const exp_name[] =
09bef843 3253 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3254 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3255 };
748a9306 3256#endif
463ee0b2 3257
02aa26ce
NT
3258/*
3259 yylex
3260
3261 Works out what to call the token just pulled out of the input
3262 stream. The yacc parser takes care of taking the ops we return and
3263 stitching them into a tree.
3264
3265 Returns:
3266 PRIVATEREF
3267
3268 Structure:
3269 if read an identifier
3270 if we're in a my declaration
3271 croak if they tried to say my($foo::bar)
3272 build the ops for a my() declaration
3273 if it's an access to a my() variable
3274 are we in a sort block?
3275 croak if my($a); $a <=> $b
3276 build ops for access to a my() variable
3277 if in a dq string, and they've said @foo and we can't find @foo
3278 croak
3279 build ops for a bareword
3280 if we already built the token before, use it.
3281*/
3282
20141f0e 3283
dba4d153
JH
3284#ifdef __SC__
3285#pragma segment Perl_yylex
3286#endif
dba4d153 3287int
dba4d153 3288Perl_yylex(pTHX)
20141f0e 3289{
97aff369 3290 dVAR;
3afc138a 3291 register char *s = PL_bufptr;
378cc40b 3292 register char *d;
463ee0b2 3293 STRLEN len;
aa7440fb 3294 bool bof = FALSE;
a687059c 3295
10edeb5d
JH
3296 /* orig_keyword, gvp, and gv are initialized here because
3297 * jump to the label just_a_word_zero can bypass their
3298 * initialization later. */
3299 I32 orig_keyword = 0;
3300 GV *gv = NULL;
3301 GV **gvp = NULL;
3302
bbf60fe6 3303 DEBUG_T( {
396482e1 3304 SV* tmp = newSVpvs("");
b6007c36
DM
3305 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3306 (IV)CopLINE(PL_curcop),
3307 lex_state_names[PL_lex_state],
3308 exp_name[PL_expect],
3309 pv_display(tmp, s, strlen(s), 0, 60));
3310 SvREFCNT_dec(tmp);
bbf60fe6 3311 } );
02aa26ce 3312 /* check if there's an identifier for us to look at */
ba979b31 3313 if (PL_pending_ident)
bbf60fe6 3314 return REPORT(S_pending_ident(aTHX));
bbce6d69 3315
02aa26ce
NT
3316 /* no identifier pending identification */
3317
3280af22 3318 switch (PL_lex_state) {
79072805
LW
3319#ifdef COMMENTARY
3320 case LEX_NORMAL: /* Some compilers will produce faster */
3321 case LEX_INTERPNORMAL: /* code if we comment these out. */
3322 break;
3323#endif
3324
09bef843 3325 /* when we've already built the next token, just pull it out of the queue */
79072805 3326 case LEX_KNOWNEXT:
5db06880
NC
3327#ifdef PERL_MAD
3328 PL_lasttoke--;
6154021b 3329 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 3330 if (PL_madskills) {
cd81e915 3331 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3332 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 3333 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 3334 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
3335 PL_thismad->mad_val = 0;
3336 mad_free(PL_thismad);
3337 PL_thismad = 0;
5db06880
NC
3338 }
3339 }
3340 if (!PL_lasttoke) {
3341 PL_lex_state = PL_lex_defer;
3342 PL_expect = PL_lex_expect;
3343 PL_lex_defer = LEX_NORMAL;
3344 if (!PL_nexttoke[PL_lasttoke].next_type)
3345 return yylex();
3346 }
3347#else
3280af22 3348 PL_nexttoke--;
6154021b 3349 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3350 if (!PL_nexttoke) {
3351 PL_lex_state = PL_lex_defer;
3352 PL_expect = PL_lex_expect;
3353 PL_lex_defer = LEX_NORMAL;
463ee0b2 3354 }
5db06880
NC
3355#endif
3356#ifdef PERL_MAD
3357 /* FIXME - can these be merged? */
3358 return(PL_nexttoke[PL_lasttoke].next_type);
3359#else
bbf60fe6 3360 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3361#endif
79072805 3362
02aa26ce 3363 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3364 when we get here, PL_bufptr is at the \
02aa26ce 3365 */
79072805
LW
3366 case LEX_INTERPCASEMOD:
3367#ifdef DEBUGGING
3280af22 3368 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3369 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3370#endif
02aa26ce 3371 /* handle \E or end of string */
3280af22 3372 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3373 /* if at a \E */
3280af22 3374 if (PL_lex_casemods) {
f54cb97a 3375 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3376 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3377
3792a11b
NC
3378 if (PL_bufptr != PL_bufend
3379 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3380 PL_bufptr += 2;
3381 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3382#ifdef PERL_MAD
3383 if (PL_madskills)
6b29d1f5 3384 PL_thistoken = newSVpvs("\\E");
5db06880 3385#endif
a0d0e21e 3386 }
bbf60fe6 3387 return REPORT(')');
79072805 3388 }
5db06880
NC
3389#ifdef PERL_MAD
3390 while (PL_bufptr != PL_bufend &&
3391 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3392 if (!PL_thiswhite)
6b29d1f5 3393 PL_thiswhite = newSVpvs("");
cd81e915 3394 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3395 PL_bufptr += 2;
3396 }
3397#else
3280af22
NIS
3398 if (PL_bufptr != PL_bufend)
3399 PL_bufptr += 2;
5db06880 3400#endif
3280af22 3401 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3402 return yylex();
79072805
LW
3403 }
3404 else {
607df283 3405 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3406 "### Saw case modifier\n"); });
3280af22 3407 s = PL_bufptr + 1;
6e909404 3408 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3409#ifdef PERL_MAD
cd81e915 3410 if (!PL_thiswhite)
6b29d1f5 3411 PL_thiswhite = newSVpvs("");
cd81e915 3412 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3413#endif
89122651 3414 PL_bufptr = s + 3;
6e909404
JH
3415 PL_lex_state = LEX_INTERPCONCAT;
3416 return yylex();
a0d0e21e 3417 }
6e909404 3418 else {
90771dc0 3419 I32 tmp;
5db06880
NC
3420 if (!PL_madskills) /* when just compiling don't need correct */
3421 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3422 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3423 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3424 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3425 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3426 return REPORT(')');
6e909404
JH
3427 }
3428 if (PL_lex_casemods > 10)
3429 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3430 PL_lex_casestack[PL_lex_casemods++] = *s;
3431 PL_lex_casestack[PL_lex_casemods] = '\0';
3432 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3433 start_force(PL_curforce);
9ded7720 3434 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3435 force_next('(');
cd81e915 3436 start_force(PL_curforce);
6e909404 3437 if (*s == 'l')
9ded7720 3438 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3439 else if (*s == 'u')
9ded7720 3440 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3441 else if (*s == 'L')
9ded7720 3442 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3443 else if (*s == 'U')
9ded7720 3444 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3445 else if (*s == 'Q')
9ded7720 3446 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3447 else
3448 Perl_croak(aTHX_ "panic: yylex");
5db06880 3449 if (PL_madskills) {
a5849ce5
NC
3450 SV* const tmpsv = newSVpvs("\\ ");
3451 /* replace the space with the character we want to escape
3452 */
3453 SvPVX(tmpsv)[1] = *s;
5db06880
NC
3454 curmad('_', tmpsv);
3455 }
6e909404 3456 PL_bufptr = s + 1;
a0d0e21e 3457 }
79072805 3458 force_next(FUNC);
3280af22
NIS
3459 if (PL_lex_starts) {
3460 s = PL_bufptr;
3461 PL_lex_starts = 0;
5db06880
NC
3462#ifdef PERL_MAD
3463 if (PL_madskills) {
cd81e915
NC
3464 if (PL_thistoken)
3465 sv_free(PL_thistoken);
6b29d1f5 3466 PL_thistoken = newSVpvs("");
5db06880
NC
3467 }
3468#endif
131b3ad0
DM
3469 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3470 if (PL_lex_casemods == 1 && PL_lex_inpat)
3471 OPERATOR(',');
3472 else
3473 Aop(OP_CONCAT);
79072805
LW
3474 }
3475 else
cea2e8a9 3476 return yylex();
79072805
LW
3477 }
3478
55497cff 3479 case LEX_INTERPPUSH:
bbf60fe6 3480 return REPORT(sublex_push());
55497cff 3481
79072805 3482 case LEX_INTERPSTART:
3280af22 3483 if (PL_bufptr == PL_bufend)
bbf60fe6 3484 return REPORT(sublex_done());
607df283 3485 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3486 "### Interpolated variable\n"); });
3280af22
NIS
3487 PL_expect = XTERM;
3488 PL_lex_dojoin = (*PL_bufptr == '@');
3489 PL_lex_state = LEX_INTERPNORMAL;
3490 if (PL_lex_dojoin) {
cd81e915 3491 start_force(PL_curforce);
9ded7720 3492 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3493 force_next(',');
cd81e915 3494 start_force(PL_curforce);
a0d0e21e 3495 force_ident("\"", '$');
cd81e915 3496 start_force(PL_curforce);
9ded7720 3497 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3498 force_next('$');
cd81e915 3499 start_force(PL_curforce);
9ded7720 3500 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3501 force_next('(');
cd81e915 3502 start_force(PL_curforce);
9ded7720 3503 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3504 force_next(FUNC);
3505 }
3280af22
NIS
3506 if (PL_lex_starts++) {
3507 s = PL_bufptr;
5db06880
NC
3508#ifdef PERL_MAD
3509 if (PL_madskills) {
cd81e915
NC
3510 if (PL_thistoken)
3511 sv_free(PL_thistoken);
6b29d1f5 3512 PL_thistoken = newSVpvs("");
5db06880
NC
3513 }
3514#endif
131b3ad0
DM
3515 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3516 if (!PL_lex_casemods && PL_lex_inpat)
3517 OPERATOR(',');
3518 else
3519 Aop(OP_CONCAT);
79072805 3520 }
cea2e8a9 3521 return yylex();
79072805
LW
3522
3523 case LEX_INTERPENDMAYBE:
3280af22
NIS
3524 if (intuit_more(PL_bufptr)) {
3525 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3526 break;
3527 }
3528 /* FALL THROUGH */
3529
3530 case LEX_INTERPEND:
3280af22
NIS
3531 if (PL_lex_dojoin) {
3532 PL_lex_dojoin = FALSE;
3533 PL_lex_state = LEX_INTERPCONCAT;