This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Make the style of pad_add_name's flags consistent with pad_new's and pad_tidy's.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
378cc40b 24#include "EXTERN.h"
864dbfa3 25#define PERL_IN_TOKE_C
378cc40b 26#include "perl.h"
378cc40b 27
eb0d8d16
NC
28#define new_constant(a,b,c,d,e,f,g) \
29 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
30
6154021b 31#define pl_yylval (PL_parser->yylval)
d3b6f988 32
acdf0a21
DM
33/* YYINITDEPTH -- initial size of the parser's stacks. */
34#define YYINITDEPTH 200
35
199e78b7
DM
36/* XXX temporary backwards compatibility */
37#define PL_lex_brackets (PL_parser->lex_brackets)
38#define PL_lex_brackstack (PL_parser->lex_brackstack)
39#define PL_lex_casemods (PL_parser->lex_casemods)
40#define PL_lex_casestack (PL_parser->lex_casestack)
41#define PL_lex_defer (PL_parser->lex_defer)
42#define PL_lex_dojoin (PL_parser->lex_dojoin)
43#define PL_lex_expect (PL_parser->lex_expect)
44#define PL_lex_formbrack (PL_parser->lex_formbrack)
45#define PL_lex_inpat (PL_parser->lex_inpat)
46#define PL_lex_inwhat (PL_parser->lex_inwhat)
47#define PL_lex_op (PL_parser->lex_op)
48#define PL_lex_repl (PL_parser->lex_repl)
49#define PL_lex_starts (PL_parser->lex_starts)
50#define PL_lex_stuff (PL_parser->lex_stuff)
51#define PL_multi_start (PL_parser->multi_start)
52#define PL_multi_open (PL_parser->multi_open)
53#define PL_multi_close (PL_parser->multi_close)
54#define PL_pending_ident (PL_parser->pending_ident)
55#define PL_preambled (PL_parser->preambled)
56#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 57#define PL_linestr (PL_parser->linestr)
c2598295
DM
58#define PL_expect (PL_parser->expect)
59#define PL_copline (PL_parser->copline)
f06b5848
DM
60#define PL_bufptr (PL_parser->bufptr)
61#define PL_oldbufptr (PL_parser->oldbufptr)
62#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
63#define PL_linestart (PL_parser->linestart)
64#define PL_bufend (PL_parser->bufend)
65#define PL_last_uni (PL_parser->last_uni)
66#define PL_last_lop (PL_parser->last_lop)
67#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 68#define PL_lex_state (PL_parser->lex_state)
2f9285f8 69#define PL_rsfp (PL_parser->rsfp)
5486870f 70#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
71#define PL_in_my (PL_parser->in_my)
72#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 73#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 74#define PL_multi_end (PL_parser->multi_end)
13765c85 75#define PL_error_count (PL_parser->error_count)
199e78b7
DM
76
77#ifdef PERL_MAD
78# define PL_endwhite (PL_parser->endwhite)
79# define PL_faketokens (PL_parser->faketokens)
80# define PL_lasttoke (PL_parser->lasttoke)
81# define PL_nextwhite (PL_parser->nextwhite)
82# define PL_realtokenstart (PL_parser->realtokenstart)
83# define PL_skipwhite (PL_parser->skipwhite)
84# define PL_thisclose (PL_parser->thisclose)
85# define PL_thismad (PL_parser->thismad)
86# define PL_thisopen (PL_parser->thisopen)
87# define PL_thisstuff (PL_parser->thisstuff)
88# define PL_thistoken (PL_parser->thistoken)
89# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
90# define PL_thiswhite (PL_parser->thiswhite)
91# define PL_nexttoke (PL_parser->nexttoke)
92# define PL_curforce (PL_parser->curforce)
93#else
94# define PL_nexttoke (PL_parser->nexttoke)
95# define PL_nexttype (PL_parser->nexttype)
96# define PL_nextval (PL_parser->nextval)
199e78b7
DM
97#endif
98
16173588
NC
99/* This can't be done with embed.fnc, because struct yy_parser contains a
100 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
101static int
102S_pending_ident(pTHX);
199e78b7 103
0bd48802 104static const char ident_too_long[] = "Identifier too long";
8903cb82 105
29595ff2 106#ifdef PERL_MAD
29595ff2 107# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 108# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 109#else
5db06880 110# define CURMAD(slot,sv)
9ded7720 111# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
112#endif
113
9059aa12
LW
114#define XFAKEBRACK 128
115#define XENUMMASK 127
116
39e02b42
JH
117#ifdef USE_UTF8_SCRIPTS
118# define UTF (!IN_BYTES)
2b9d42f0 119#else
746b446a 120# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 121#endif
a0ed51b3 122
b1fc3636
CJ
123/* The maximum number of characters preceding the unrecognized one to display */
124#define UNRECOGNIZED_PRECEDE_COUNT 10
125
61f0cdd9 126/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
127 * 1999-02-27 mjd-perl-patch@plover.com */
128#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
129
bf4acbe4 130#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 131
ffb4593c
NT
132/* LEX_* are values for PL_lex_state, the state of the lexer.
133 * They are arranged oddly so that the guard on the switch statement
79072805
LW
134 * can get by with a single comparison (if the compiler is smart enough).
135 */
136
fb73857a 137/* #define LEX_NOTPARSING 11 is done in perl.h. */
138
b6007c36
DM
139#define LEX_NORMAL 10 /* normal code (ie not within "...") */
140#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
141#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
142#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
143#define LEX_INTERPSTART 6 /* expecting the start of a $var */
144
145 /* at end of code, eg "$x" followed by: */
146#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
147#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
148
149#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
150 string or after \E, $foo, etc */
151#define LEX_INTERPCONST 2 /* NOT USED */
152#define LEX_FORMLINE 1 /* expecting a format line */
153#define LEX_KNOWNEXT 0 /* next token known; just return it */
154
79072805 155
bbf60fe6 156#ifdef DEBUGGING
27da23d5 157static const char* const lex_state_names[] = {
bbf60fe6
DM
158 "KNOWNEXT",
159 "FORMLINE",
160 "INTERPCONST",
161 "INTERPCONCAT",
162 "INTERPENDMAYBE",
163 "INTERPEND",
164 "INTERPSTART",
165 "INTERPPUSH",
166 "INTERPCASEMOD",
167 "INTERPNORMAL",
168 "NORMAL"
169};
170#endif
171
79072805
LW
172#ifdef ff_next
173#undef ff_next
d48672a2
LW
174#endif
175
79072805 176#include "keywords.h"
fe14fcc3 177
ffb4593c
NT
178/* CLINE is a macro that ensures PL_copline has a sane value */
179
ae986130
LW
180#ifdef CLINE
181#undef CLINE
182#endif
57843af0 183#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 184
5db06880 185#ifdef PERL_MAD
29595ff2
NC
186# define SKIPSPACE0(s) skipspace0(s)
187# define SKIPSPACE1(s) skipspace1(s)
188# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
189# define PEEKSPACE(s) skipspace2(s,0)
190#else
191# define SKIPSPACE0(s) skipspace(s)
192# define SKIPSPACE1(s) skipspace(s)
193# define SKIPSPACE2(s,tsv) skipspace(s)
194# define PEEKSPACE(s) skipspace(s)
195#endif
196
ffb4593c
NT
197/*
198 * Convenience functions to return different tokens and prime the
9cbb5ea2 199 * lexer for the next token. They all take an argument.
ffb4593c
NT
200 *
201 * TOKEN : generic token (used for '(', DOLSHARP, etc)
202 * OPERATOR : generic operator
203 * AOPERATOR : assignment operator
204 * PREBLOCK : beginning the block after an if, while, foreach, ...
205 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
206 * PREREF : *EXPR where EXPR is not a simple identifier
207 * TERM : expression term
208 * LOOPX : loop exiting command (goto, last, dump, etc)
209 * FTST : file test operator
210 * FUN0 : zero-argument function
2d2e263d 211 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
212 * BOop : bitwise or or xor
213 * BAop : bitwise and
214 * SHop : shift operator
215 * PWop : power operator
9cbb5ea2 216 * PMop : pattern-matching operator
ffb4593c
NT
217 * Aop : addition-level operator
218 * Mop : multiplication-level operator
219 * Eop : equality-testing operator
e5edeb50 220 * Rop : relational operator <= != gt
ffb4593c
NT
221 *
222 * Also see LOP and lop() below.
223 */
224
998054bd 225#ifdef DEBUGGING /* Serve -DT. */
704d4215 226# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 227#else
bbf60fe6 228# define REPORT(retval) (retval)
998054bd
SC
229#endif
230
bbf60fe6
DM
231#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
232#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
233#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
234#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
235#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
236#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
237#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
238#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
239#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
240#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
241#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
242#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
243#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
244#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
245#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
246#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
247#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
248#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
249#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
250#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 251
a687059c
LW
252/* This bit of chicanery makes a unary function followed by
253 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
254 * The UNIDOR macro is for unary functions that can be followed by the //
255 * operator (such as C<shift // 0>).
a687059c 256 */
376fcdbf 257#define UNI2(f,x) { \
6154021b 258 pl_yylval.ival = f; \
376fcdbf
AL
259 PL_expect = x; \
260 PL_bufptr = s; \
261 PL_last_uni = PL_oldbufptr; \
262 PL_last_lop_op = f; \
263 if (*s == '(') \
264 return REPORT( (int)FUNC1 ); \
29595ff2 265 s = PEEKSPACE(s); \
376fcdbf
AL
266 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
267 }
6f33ba73
RGS
268#define UNI(f) UNI2(f,XTERM)
269#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 270
376fcdbf 271#define UNIBRACK(f) { \
6154021b 272 pl_yylval.ival = f; \
376fcdbf
AL
273 PL_bufptr = s; \
274 PL_last_uni = PL_oldbufptr; \
275 if (*s == '(') \
276 return REPORT( (int)FUNC1 ); \
29595ff2 277 s = PEEKSPACE(s); \
376fcdbf
AL
278 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
279 }
79072805 280
9f68db38 281/* grandfather return to old style */
6154021b 282#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 283
8fa7f367
JH
284#ifdef DEBUGGING
285
6154021b 286/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
287enum token_type {
288 TOKENTYPE_NONE,
289 TOKENTYPE_IVAL,
6154021b 290 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
291 TOKENTYPE_PVAL,
292 TOKENTYPE_OPVAL,
293 TOKENTYPE_GVVAL
294};
295
6d4a66ac
NC
296static struct debug_tokens {
297 const int token;
298 enum token_type type;
299 const char *name;
300} const debug_tokens[] =
9041c2e3 301{
bbf60fe6
DM
302 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
303 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
304 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
305 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
306 { ARROW, TOKENTYPE_NONE, "ARROW" },
307 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
308 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
309 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
310 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
311 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 312 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
313 { DO, TOKENTYPE_NONE, "DO" },
314 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
315 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
316 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
317 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
318 { ELSE, TOKENTYPE_NONE, "ELSE" },
319 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
320 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
321 { FOR, TOKENTYPE_IVAL, "FOR" },
322 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
323 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
324 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
325 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
326 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
327 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 328 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
329 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
330 { IF, TOKENTYPE_IVAL, "IF" },
331 { LABEL, TOKENTYPE_PVAL, "LABEL" },
332 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
333 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
334 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
335 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
336 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
337 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
338 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
339 { MY, TOKENTYPE_IVAL, "MY" },
340 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
341 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
342 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
343 { OROP, TOKENTYPE_IVAL, "OROP" },
344 { OROR, TOKENTYPE_NONE, "OROR" },
345 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
346 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
347 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
348 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
349 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
350 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
351 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
352 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
353 { PREINC, TOKENTYPE_NONE, "PREINC" },
354 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
355 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
356 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
357 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
358 { SUB, TOKENTYPE_NONE, "SUB" },
359 { THING, TOKENTYPE_OPVAL, "THING" },
360 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
361 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
362 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
363 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
364 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
365 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 366 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
367 { WHILE, TOKENTYPE_IVAL, "WHILE" },
368 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 369 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 370 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
371};
372
6154021b 373/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 374
bbf60fe6 375STATIC int
704d4215 376S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 377{
97aff369 378 dVAR;
7918f24d
NC
379
380 PERL_ARGS_ASSERT_TOKEREPORT;
381
bbf60fe6 382 if (DEBUG_T_TEST) {
bd61b366 383 const char *name = NULL;
bbf60fe6 384 enum token_type type = TOKENTYPE_NONE;
f54cb97a 385 const struct debug_tokens *p;
396482e1 386 SV* const report = newSVpvs("<== ");
bbf60fe6 387
f54cb97a 388 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
389 if (p->token == (int)rv) {
390 name = p->name;
391 type = p->type;
392 break;
393 }
394 }
395 if (name)
54667de8 396 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
397 else if ((char)rv > ' ' && (char)rv < '~')
398 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
399 else if (!rv)
396482e1 400 sv_catpvs(report, "EOF");
bbf60fe6
DM
401 else
402 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
403 switch (type) {
404 case TOKENTYPE_NONE:
405 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
406 break;
407 case TOKENTYPE_IVAL:
704d4215 408 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
409 break;
410 case TOKENTYPE_OPNUM:
411 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 412 PL_op_name[lvalp->ival]);
bbf60fe6
DM
413 break;
414 case TOKENTYPE_PVAL:
704d4215 415 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
416 break;
417 case TOKENTYPE_OPVAL:
704d4215 418 if (lvalp->opval) {
401441c0 419 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
420 PL_op_name[lvalp->opval->op_type]);
421 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 422 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 423 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
424 }
425
426 }
401441c0 427 else
396482e1 428 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
429 break;
430 }
b6007c36 431 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
432 };
433 return (int)rv;
998054bd
SC
434}
435
b6007c36
DM
436
437/* print the buffer with suitable escapes */
438
439STATIC void
15f169a1 440S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 441{
396482e1 442 SV* const tmp = newSVpvs("");
7918f24d
NC
443
444 PERL_ARGS_ASSERT_PRINTBUF;
445
b6007c36
DM
446 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
447 SvREFCNT_dec(tmp);
448}
449
8fa7f367
JH
450#endif
451
8290c323
NC
452static int
453S_deprecate_commaless_var_list(pTHX) {
454 PL_expect = XTERM;
455 deprecate("comma-less variable list");
456 return REPORT(','); /* grandfather non-comma-format format */
457}
458
ffb4593c
NT
459/*
460 * S_ao
461 *
c963b151
BD
462 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
463 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
464 */
465
76e3520e 466STATIC int
cea2e8a9 467S_ao(pTHX_ int toketype)
a0d0e21e 468{
97aff369 469 dVAR;
3280af22
NIS
470 if (*PL_bufptr == '=') {
471 PL_bufptr++;
a0d0e21e 472 if (toketype == ANDAND)
6154021b 473 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 474 else if (toketype == OROR)
6154021b 475 pl_yylval.ival = OP_ORASSIGN;
c963b151 476 else if (toketype == DORDOR)
6154021b 477 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
478 toketype = ASSIGNOP;
479 }
480 return toketype;
481}
482
ffb4593c
NT
483/*
484 * S_no_op
485 * When Perl expects an operator and finds something else, no_op
486 * prints the warning. It always prints "<something> found where
487 * operator expected. It prints "Missing semicolon on previous line?"
488 * if the surprise occurs at the start of the line. "do you need to
489 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
490 * where the compiler doesn't know if foo is a method call or a function.
491 * It prints "Missing operator before end of line" if there's nothing
492 * after the missing operator, or "... before <...>" if there is something
493 * after the missing operator.
494 */
495
76e3520e 496STATIC void
15f169a1 497S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 498{
97aff369 499 dVAR;
9d4ba2ae
AL
500 char * const oldbp = PL_bufptr;
501 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 502
7918f24d
NC
503 PERL_ARGS_ASSERT_NO_OP;
504
1189a94a
GS
505 if (!s)
506 s = oldbp;
07c798fb 507 else
1189a94a 508 PL_bufptr = s;
cea2e8a9 509 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
510 if (ckWARN_d(WARN_SYNTAX)) {
511 if (is_first)
512 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
513 "\t(Missing semicolon on previous line?)\n");
514 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 515 const char *t;
c35e046a
AL
516 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
517 NOOP;
56da5a46
RGS
518 if (t < PL_bufptr && isSPACE(*t))
519 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
520 "\t(Do you need to predeclare %.*s?)\n",
551405c4 521 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
522 }
523 else {
524 assert(s >= oldbp);
525 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 526 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 527 }
07c798fb 528 }
3280af22 529 PL_bufptr = oldbp;
8990e307
LW
530}
531
ffb4593c
NT
532/*
533 * S_missingterm
534 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 535 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
536 * If we're in a delimited string and the delimiter is a control
537 * character, it's reformatted into a two-char sequence like ^C.
538 * This is fatal.
539 */
540
76e3520e 541STATIC void
cea2e8a9 542S_missingterm(pTHX_ char *s)
8990e307 543{
97aff369 544 dVAR;
8990e307
LW
545 char tmpbuf[3];
546 char q;
547 if (s) {
9d4ba2ae 548 char * const nl = strrchr(s,'\n');
d2719217 549 if (nl)
8990e307
LW
550 *nl = '\0';
551 }
463559e7 552 else if (isCNTRL(PL_multi_close)) {
8990e307 553 *tmpbuf = '^';
585ec06d 554 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
555 tmpbuf[2] = '\0';
556 s = tmpbuf;
557 }
558 else {
eb160463 559 *tmpbuf = (char)PL_multi_close;
8990e307
LW
560 tmpbuf[1] = '\0';
561 s = tmpbuf;
562 }
563 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 564 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 565}
79072805 566
ef89dcc3 567#define FEATURE_IS_ENABLED(name) \
0d863452 568 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 569 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b
NC
570/* The longest string we pass in. */
571#define MAX_FEATURE_LEN (sizeof("switch")-1)
572
0d863452
RH
573/*
574 * S_feature_is_enabled
575 * Check whether the named feature is enabled.
576 */
577STATIC bool
15f169a1 578S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 579{
97aff369 580 dVAR;
0d863452 581 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 582 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
583
584 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
585
4a731d7b
NC
586 assert(namelen <= MAX_FEATURE_LEN);
587 memcpy(&he_name[8], name, namelen);
d4c19fe8 588
7b9ef140 589 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
590}
591
ffb4593c 592/*
9cbb5ea2
GS
593 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
594 * utf16-to-utf8-reversed.
ffb4593c
NT
595 */
596
c39cd008
GS
597#ifdef PERL_CR_FILTER
598static void
599strip_return(SV *sv)
600{
95a20fc0 601 register const char *s = SvPVX_const(sv);
9d4ba2ae 602 register const char * const e = s + SvCUR(sv);
7918f24d
NC
603
604 PERL_ARGS_ASSERT_STRIP_RETURN;
605
c39cd008
GS
606 /* outer loop optimized to do nothing if there are no CR-LFs */
607 while (s < e) {
608 if (*s++ == '\r' && *s == '\n') {
609 /* hit a CR-LF, need to copy the rest */
610 register char *d = s - 1;
611 *d++ = *s++;
612 while (s < e) {
613 if (*s == '\r' && s[1] == '\n')
614 s++;
615 *d++ = *s++;
616 }
617 SvCUR(sv) -= s - d;
618 return;
619 }
620 }
621}
a868473f 622
76e3520e 623STATIC I32
c39cd008 624S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 625{
f54cb97a 626 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
627 if (count > 0 && !maxlen)
628 strip_return(sv);
629 return count;
a868473f
NIS
630}
631#endif
632
199e78b7
DM
633
634
ffb4593c
NT
635/*
636 * Perl_lex_start
5486870f 637 *
e3abe207 638 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
639 *
640 * rsfp is the opened file handle to read from (if any),
641 *
642 * line holds any initial content already read from the file (or in
643 * the case of no file, such as an eval, the whole contents);
644 *
645 * new_filter indicates that this is a new file and it shouldn't inherit
646 * the filters from the current parser (ie require).
ffb4593c
NT
647 */
648
a0d0e21e 649void
5486870f 650Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 651{
97aff369 652 dVAR;
6ef55633 653 const char *s = NULL;
8990e307 654 STRLEN len;
5486870f 655 yy_parser *parser, *oparser;
acdf0a21
DM
656
657 /* create and initialise a parser */
658
199e78b7 659 Newxz(parser, 1, yy_parser);
5486870f 660 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
661 PL_parser = parser;
662
663 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
664 parser->ps = parser->stack;
665 parser->stack_size = YYINITDEPTH;
666
667 parser->stack->state = 0;
668 parser->yyerrstatus = 0;
669 parser->yychar = YYEMPTY; /* Cause a token to be read. */
670
e3abe207
DM
671 /* on scope exit, free this parser and restore any outer one */
672 SAVEPARSER(parser);
7c4baf47 673 parser->saved_curcop = PL_curcop;
e3abe207 674
acdf0a21 675 /* initialise lexer state */
8990e307 676
fb205e7a
DM
677#ifdef PERL_MAD
678 parser->curforce = -1;
679#else
680 parser->nexttoke = 0;
681#endif
ca4cfd28 682 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 683 parser->copline = NOLINE;
5afb0a62 684 parser->lex_state = LEX_NORMAL;
c2598295 685 parser->expect = XSTATE;
2f9285f8 686 parser->rsfp = rsfp;
56b27c9a 687 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
502c6561 688 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 689
199e78b7
DM
690 Newx(parser->lex_brackstack, 120, char);
691 Newx(parser->lex_casestack, 12, char);
692 *parser->lex_casestack = '\0';
02b34bbe 693
10efb74f
NC
694 if (line) {
695 s = SvPV_const(line, len);
696 } else {
697 len = 0;
698 }
bdc0bf6f 699
10efb74f 700 if (!len) {
bdc0bf6f 701 parser->linestr = newSVpvs("\n;");
10efb74f 702 } else if (SvREADONLY(line) || s[len-1] != ';') {
bdc0bf6f 703 parser->linestr = newSVsv(line);
10efb74f 704 if (s[len-1] != ';')
bdc0bf6f 705 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
706 } else {
707 SvTEMP_off(line);
708 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 709 parser->linestr = line;
8990e307 710 }
f06b5848
DM
711 parser->oldoldbufptr =
712 parser->oldbufptr =
713 parser->bufptr =
714 parser->linestart = SvPVX(parser->linestr);
715 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
716 parser->last_lop = parser->last_uni = NULL;
79072805 717}
a687059c 718
e3abe207
DM
719
720/* delete a parser object */
721
722void
723Perl_parser_free(pTHX_ const yy_parser *parser)
724{
7918f24d
NC
725 PERL_ARGS_ASSERT_PARSER_FREE;
726
7c4baf47 727 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
728 SvREFCNT_dec(parser->linestr);
729
2f9285f8
DM
730 if (parser->rsfp == PerlIO_stdin())
731 PerlIO_clearerr(parser->rsfp);
799361c3
SH
732 else if (parser->rsfp && (!parser->old_parser ||
733 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 734 PerlIO_close(parser->rsfp);
5486870f 735 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 736
e3abe207
DM
737 Safefree(parser->stack);
738 Safefree(parser->lex_brackstack);
739 Safefree(parser->lex_casestack);
740 PL_parser = parser->old_parser;
741 Safefree(parser);
742}
743
744
ffb4593c
NT
745/*
746 * Perl_lex_end
9cbb5ea2
GS
747 * Finalizer for lexing operations. Must be called when the parser is
748 * done with the lexer.
ffb4593c
NT
749 */
750
463ee0b2 751void
864dbfa3 752Perl_lex_end(pTHX)
463ee0b2 753{
97aff369 754 dVAR;
3280af22 755 PL_doextract = FALSE;
463ee0b2
LW
756}
757
ffb4593c
NT
758/*
759 * S_incline
760 * This subroutine has nothing to do with tilting, whether at windmills
761 * or pinball tables. Its name is short for "increment line". It
57843af0 762 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 763 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
764 * # line 500 "foo.pm"
765 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
766 */
767
76e3520e 768STATIC void
d9095cec 769S_incline(pTHX_ const char *s)
463ee0b2 770{
97aff369 771 dVAR;
d9095cec
NC
772 const char *t;
773 const char *n;
774 const char *e;
463ee0b2 775
7918f24d
NC
776 PERL_ARGS_ASSERT_INCLINE;
777
57843af0 778 CopLINE_inc(PL_curcop);
463ee0b2
LW
779 if (*s++ != '#')
780 return;
d4c19fe8
AL
781 while (SPACE_OR_TAB(*s))
782 s++;
73659bf1
GS
783 if (strnEQ(s, "line", 4))
784 s += 4;
785 else
786 return;
084592ab 787 if (SPACE_OR_TAB(*s))
73659bf1 788 s++;
4e553d73 789 else
73659bf1 790 return;
d4c19fe8
AL
791 while (SPACE_OR_TAB(*s))
792 s++;
463ee0b2
LW
793 if (!isDIGIT(*s))
794 return;
d4c19fe8 795
463ee0b2
LW
796 n = s;
797 while (isDIGIT(*s))
798 s++;
07714eb4 799 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 800 return;
bf4acbe4 801 while (SPACE_OR_TAB(*s))
463ee0b2 802 s++;
73659bf1 803 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 804 s++;
73659bf1
GS
805 e = t + 1;
806 }
463ee0b2 807 else {
c35e046a
AL
808 t = s;
809 while (!isSPACE(*t))
810 t++;
73659bf1 811 e = t;
463ee0b2 812 }
bf4acbe4 813 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
814 e++;
815 if (*e != '\n' && *e != '\0')
816 return; /* false alarm */
817
f4dd75d9 818 if (t - s > 0) {
d9095cec 819 const STRLEN len = t - s;
8a5ee598 820#ifndef USE_ITHREADS
19bad673
NC
821 SV *const temp_sv = CopFILESV(PL_curcop);
822 const char *cf;
823 STRLEN tmplen;
824
825 if (temp_sv) {
826 cf = SvPVX(temp_sv);
827 tmplen = SvCUR(temp_sv);
828 } else {
829 cf = NULL;
830 tmplen = 0;
831 }
832
42d9b98d 833 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
834 /* must copy *{"::_<(eval N)[oldfilename:L]"}
835 * to *{"::_<newfilename"} */
44867030
NC
836 /* However, the long form of evals is only turned on by the
837 debugger - usually they're "(eval %lu)" */
838 char smallbuf[128];
839 char *tmpbuf;
840 GV **gvp;
d9095cec 841 STRLEN tmplen2 = len;
798b63bc 842 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
843 tmpbuf = smallbuf;
844 else
2ae0db35 845 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
846 tmpbuf[0] = '_';
847 tmpbuf[1] = '<';
2ae0db35 848 memcpy(tmpbuf + 2, cf, tmplen);
44867030 849 tmplen += 2;
8a5ee598
RGS
850 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
851 if (gvp) {
44867030
NC
852 char *tmpbuf2;
853 GV *gv2;
854
855 if (tmplen2 + 2 <= sizeof smallbuf)
856 tmpbuf2 = smallbuf;
857 else
858 Newx(tmpbuf2, tmplen2 + 2, char);
859
860 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
861 /* Either they malloc'd it, or we malloc'd it,
862 so no prefix is present in ours. */
863 tmpbuf2[0] = '_';
864 tmpbuf2[1] = '<';
865 }
866
867 memcpy(tmpbuf2 + 2, s, tmplen2);
868 tmplen2 += 2;
869
8a5ee598 870 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 871 if (!isGV(gv2)) {
8a5ee598 872 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
873 /* adjust ${"::_<newfilename"} to store the new file name */
874 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
875 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
876 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 877 }
44867030
NC
878
879 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 880 }
e66cf94c 881 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 882 }
8a5ee598 883#endif
05ec9bb3 884 CopFILE_free(PL_curcop);
d9095cec 885 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 886 }
57843af0 887 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
888}
889
29595ff2 890#ifdef PERL_MAD
cd81e915 891/* skip space before PL_thistoken */
29595ff2
NC
892
893STATIC char *
894S_skipspace0(pTHX_ register char *s)
895{
7918f24d
NC
896 PERL_ARGS_ASSERT_SKIPSPACE0;
897
29595ff2
NC
898 s = skipspace(s);
899 if (!PL_madskills)
900 return s;
cd81e915
NC
901 if (PL_skipwhite) {
902 if (!PL_thiswhite)
6b29d1f5 903 PL_thiswhite = newSVpvs("");
cd81e915
NC
904 sv_catsv(PL_thiswhite, PL_skipwhite);
905 sv_free(PL_skipwhite);
906 PL_skipwhite = 0;
907 }
908 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
909 return s;
910}
911
cd81e915 912/* skip space after PL_thistoken */
29595ff2
NC
913
914STATIC char *
915S_skipspace1(pTHX_ register char *s)
916{
d4c19fe8 917 const char *start = s;
29595ff2
NC
918 I32 startoff = start - SvPVX(PL_linestr);
919
7918f24d
NC
920 PERL_ARGS_ASSERT_SKIPSPACE1;
921
29595ff2
NC
922 s = skipspace(s);
923 if (!PL_madskills)
924 return s;
925 start = SvPVX(PL_linestr) + startoff;
cd81e915 926 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 927 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
928 PL_thistoken = newSVpvn(tstart, start - tstart);
929 }
930 PL_realtokenstart = -1;
931 if (PL_skipwhite) {
932 if (!PL_nextwhite)
6b29d1f5 933 PL_nextwhite = newSVpvs("");
cd81e915
NC
934 sv_catsv(PL_nextwhite, PL_skipwhite);
935 sv_free(PL_skipwhite);
936 PL_skipwhite = 0;
29595ff2
NC
937 }
938 return s;
939}
940
941STATIC char *
942S_skipspace2(pTHX_ register char *s, SV **svp)
943{
c35e046a
AL
944 char *start;
945 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
946 const I32 startoff = s - SvPVX(PL_linestr);
947
7918f24d
NC
948 PERL_ARGS_ASSERT_SKIPSPACE2;
949
29595ff2
NC
950 s = skipspace(s);
951 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
952 if (!PL_madskills || !svp)
953 return s;
954 start = SvPVX(PL_linestr) + startoff;
cd81e915 955 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 956 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
957 PL_thistoken = newSVpvn(tstart, start - tstart);
958 PL_realtokenstart = -1;
29595ff2 959 }
cd81e915 960 if (PL_skipwhite) {
29595ff2 961 if (!*svp)
6b29d1f5 962 *svp = newSVpvs("");
cd81e915
NC
963 sv_setsv(*svp, PL_skipwhite);
964 sv_free(PL_skipwhite);
965 PL_skipwhite = 0;
29595ff2
NC
966 }
967
968 return s;
969}
970#endif
971
80a702cd 972STATIC void
15f169a1 973S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
974{
975 AV *av = CopFILEAVx(PL_curcop);
976 if (av) {
b9f83d2f 977 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
978 if (orig_sv)
979 sv_setsv(sv, orig_sv);
980 else
981 sv_setpvn(sv, buf, len);
80a702cd
RGS
982 (void)SvIOK_on(sv);
983 SvIV_set(sv, 0);
984 av_store(av, (I32)CopLINE(PL_curcop), sv);
985 }
986}
987
ffb4593c
NT
988/*
989 * S_skipspace
990 * Called to gobble the appropriate amount and type of whitespace.
991 * Skips comments as well.
992 */
993
76e3520e 994STATIC char *
cea2e8a9 995S_skipspace(pTHX_ register char *s)
a687059c 996{
97aff369 997 dVAR;
5db06880
NC
998#ifdef PERL_MAD
999 int curoff;
1000 int startoff = s - SvPVX(PL_linestr);
1001
7918f24d
NC
1002 PERL_ARGS_ASSERT_SKIPSPACE;
1003
cd81e915
NC
1004 if (PL_skipwhite) {
1005 sv_free(PL_skipwhite);
1006 PL_skipwhite = 0;
5db06880
NC
1007 }
1008#endif
7918f24d 1009 PERL_ARGS_ASSERT_SKIPSPACE;
5db06880 1010
3280af22 1011 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1012 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1013 s++;
5db06880
NC
1014#ifdef PERL_MAD
1015 goto done;
1016#else
463ee0b2 1017 return s;
5db06880 1018#endif
463ee0b2
LW
1019 }
1020 for (;;) {
fd049845 1021 STRLEN prevlen;
09bef843 1022 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 1023 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
1024 while (s < PL_bufend && isSPACE(*s)) {
1025 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1026 incline(s);
1027 }
ffb4593c
NT
1028
1029 /* comment */
3280af22
NIS
1030 if (s < PL_bufend && *s == '#') {
1031 while (s < PL_bufend && *s != '\n')
463ee0b2 1032 s++;
60e6418e 1033 if (s < PL_bufend) {
463ee0b2 1034 s++;
60e6418e
GS
1035 if (PL_in_eval && !PL_rsfp) {
1036 incline(s);
1037 continue;
1038 }
1039 }
463ee0b2 1040 }
ffb4593c
NT
1041
1042 /* only continue to recharge the buffer if we're at the end
1043 * of the buffer, we're not reading from a source filter, and
1044 * we're in normal lexing mode
1045 */
09bef843
SB
1046 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1047 PL_lex_state == LEX_FORMLINE)
5db06880
NC
1048#ifdef PERL_MAD
1049 goto done;
1050#else
463ee0b2 1051 return s;
5db06880 1052#endif
ffb4593c
NT
1053
1054 /* try to recharge the buffer */
5db06880
NC
1055#ifdef PERL_MAD
1056 curoff = s - SvPVX(PL_linestr);
1057#endif
1058
5cc814fd
NC
1059 if ((s = filter_gets(PL_linestr, (prevlen = SvCUR(PL_linestr))))
1060 == NULL)
9cbb5ea2 1061 {
5db06880
NC
1062#ifdef PERL_MAD
1063 if (PL_madskills && curoff != startoff) {
cd81e915 1064 if (!PL_skipwhite)
6b29d1f5 1065 PL_skipwhite = newSVpvs("");
cd81e915 1066 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1067 curoff - startoff);
1068 }
1069
1070 /* mustn't throw out old stuff yet if madpropping */
1071 SvCUR(PL_linestr) = curoff;
1072 s = SvPVX(PL_linestr) + curoff;
1073 *s = 0;
1074 if (curoff && s[-1] == '\n')
1075 s[-1] = ' ';
1076#endif
1077
9cbb5ea2 1078 /* end of file. Add on the -p or -n magic */
cd81e915 1079 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 1080 if (PL_minus_p) {
5db06880 1081#ifdef PERL_MAD
6502358f 1082 sv_catpvs(PL_linestr,
5db06880
NC
1083 ";}continue{print or die qq(-p destination: $!\\n);}");
1084#else
6502358f 1085 sv_setpvs(PL_linestr,
01a19ab0 1086 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 1087#endif
3280af22 1088 PL_minus_n = PL_minus_p = 0;
a0d0e21e 1089 }
01a19ab0 1090 else if (PL_minus_n) {
5db06880 1091#ifdef PERL_MAD
76f68e9b 1092 sv_catpvs(PL_linestr, ";}");
5db06880 1093#else
76f68e9b 1094 sv_setpvs(PL_linestr, ";}");
5db06880 1095#endif
01a19ab0
NC
1096 PL_minus_n = 0;
1097 }
a0d0e21e 1098 else
5db06880 1099#ifdef PERL_MAD
76f68e9b 1100 sv_catpvs(PL_linestr,";");
5db06880 1101#else
76f68e9b 1102 sv_setpvs(PL_linestr,";");
5db06880 1103#endif
ffb4593c
NT
1104
1105 /* reset variables for next time we lex */
9cbb5ea2 1106 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
1107 = SvPVX(PL_linestr)
1108#ifdef PERL_MAD
1109 + curoff
1110#endif
1111 ;
3280af22 1112 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1113 PL_last_lop = PL_last_uni = NULL;
ffb4593c 1114
4c84d7f2 1115 /* Close the filehandle. Could be from
ffb4593c
NT
1116 * STDIN, or a regular file. If we were reading code from
1117 * STDIN (because the commandline held no -e or filename)
1118 * then we don't close it, we reset it so the code can
1119 * read from STDIN too.
1120 */
1121
4c84d7f2 1122 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
3280af22 1123 PerlIO_clearerr(PL_rsfp);
8990e307 1124 else
3280af22 1125 (void)PerlIO_close(PL_rsfp);
4608196e 1126 PL_rsfp = NULL;
463ee0b2
LW
1127 return s;
1128 }
ffb4593c
NT
1129
1130 /* not at end of file, so we only read another line */
09bef843
SB
1131 /* make corresponding updates to old pointers, for yyerror() */
1132 oldprevlen = PL_oldbufptr - PL_bufend;
1133 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1134 if (PL_last_uni)
1135 oldunilen = PL_last_uni - PL_bufend;
1136 if (PL_last_lop)
1137 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1138 PL_linestart = PL_bufptr = s + prevlen;
1139 PL_bufend = s + SvCUR(PL_linestr);
1140 s = PL_bufptr;
09bef843
SB
1141 PL_oldbufptr = s + oldprevlen;
1142 PL_oldoldbufptr = s + oldoldprevlen;
1143 if (PL_last_uni)
1144 PL_last_uni = s + oldunilen;
1145 if (PL_last_lop)
1146 PL_last_lop = s + oldloplen;
a0d0e21e 1147 incline(s);
ffb4593c
NT
1148
1149 /* debugger active and we're not compiling the debugger code,
1150 * so store the line into the debugger's array of lines
1151 */
65269a95 1152 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 1153 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
463ee0b2 1154 }
5db06880
NC
1155
1156#ifdef PERL_MAD
1157 done:
1158 if (PL_madskills) {
cd81e915 1159 if (!PL_skipwhite)
6b29d1f5 1160 PL_skipwhite = newSVpvs("");
5db06880
NC
1161 curoff = s - SvPVX(PL_linestr);
1162 if (curoff - startoff)
cd81e915 1163 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1164 curoff - startoff);
1165 }
1166 return s;
1167#endif
a687059c 1168}
378cc40b 1169
ffb4593c
NT
1170/*
1171 * S_check_uni
1172 * Check the unary operators to ensure there's no ambiguity in how they're
1173 * used. An ambiguous piece of code would be:
1174 * rand + 5
1175 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1176 * the +5 is its argument.
1177 */
1178
76e3520e 1179STATIC void
cea2e8a9 1180S_check_uni(pTHX)
ba106d47 1181{
97aff369 1182 dVAR;
d4c19fe8
AL
1183 const char *s;
1184 const char *t;
2f3197b3 1185
3280af22 1186 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1187 return;
3280af22
NIS
1188 while (isSPACE(*PL_last_uni))
1189 PL_last_uni++;
c35e046a
AL
1190 s = PL_last_uni;
1191 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1192 s++;
3280af22 1193 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1194 return;
6136c704 1195
9b387841
NC
1196 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1197 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1198 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1199}
1200
ffb4593c
NT
1201/*
1202 * LOP : macro to build a list operator. Its behaviour has been replaced
1203 * with a subroutine, S_lop() for which LOP is just another name.
1204 */
1205
a0d0e21e
LW
1206#define LOP(f,x) return lop(f,x,s)
1207
ffb4593c
NT
1208/*
1209 * S_lop
1210 * Build a list operator (or something that might be one). The rules:
1211 * - if we have a next token, then it's a list operator [why?]
1212 * - if the next thing is an opening paren, then it's a function
1213 * - else it's a list operator
1214 */
1215
76e3520e 1216STATIC I32
a0be28da 1217S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1218{
97aff369 1219 dVAR;
7918f24d
NC
1220
1221 PERL_ARGS_ASSERT_LOP;
1222
6154021b 1223 pl_yylval.ival = f;
35c8bce7 1224 CLINE;
3280af22
NIS
1225 PL_expect = x;
1226 PL_bufptr = s;
1227 PL_last_lop = PL_oldbufptr;
eb160463 1228 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1229#ifdef PERL_MAD
1230 if (PL_lasttoke)
1231 return REPORT(LSTOP);
1232#else
3280af22 1233 if (PL_nexttoke)
bbf60fe6 1234 return REPORT(LSTOP);
5db06880 1235#endif
79072805 1236 if (*s == '(')
bbf60fe6 1237 return REPORT(FUNC);
29595ff2 1238 s = PEEKSPACE(s);
79072805 1239 if (*s == '(')
bbf60fe6 1240 return REPORT(FUNC);
79072805 1241 else
bbf60fe6 1242 return REPORT(LSTOP);
79072805
LW
1243}
1244
5db06880
NC
1245#ifdef PERL_MAD
1246 /*
1247 * S_start_force
1248 * Sets up for an eventual force_next(). start_force(0) basically does
1249 * an unshift, while start_force(-1) does a push. yylex removes items
1250 * on the "pop" end.
1251 */
1252
1253STATIC void
1254S_start_force(pTHX_ int where)
1255{
1256 int i;
1257
cd81e915 1258 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1259 where = PL_lasttoke;
cd81e915
NC
1260 assert(PL_curforce < 0 || PL_curforce == where);
1261 if (PL_curforce != where) {
5db06880
NC
1262 for (i = PL_lasttoke; i > where; --i) {
1263 PL_nexttoke[i] = PL_nexttoke[i-1];
1264 }
1265 PL_lasttoke++;
1266 }
cd81e915 1267 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1268 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1269 PL_curforce = where;
1270 if (PL_nextwhite) {
5db06880 1271 if (PL_madskills)
6b29d1f5 1272 curmad('^', newSVpvs(""));
cd81e915 1273 CURMAD('_', PL_nextwhite);
5db06880
NC
1274 }
1275}
1276
1277STATIC void
1278S_curmad(pTHX_ char slot, SV *sv)
1279{
1280 MADPROP **where;
1281
1282 if (!sv)
1283 return;
cd81e915
NC
1284 if (PL_curforce < 0)
1285 where = &PL_thismad;
5db06880 1286 else
cd81e915 1287 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1288
cd81e915 1289 if (PL_faketokens)
76f68e9b 1290 sv_setpvs(sv, "");
5db06880
NC
1291 else {
1292 if (!IN_BYTES) {
1293 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1294 SvUTF8_on(sv);
1295 else if (PL_encoding) {
1296 sv_recode_to_utf8(sv, PL_encoding);
1297 }
1298 }
1299 }
1300
1301 /* keep a slot open for the head of the list? */
1302 if (slot != '_' && *where && (*where)->mad_key == '^') {
1303 (*where)->mad_key = slot;
daba3364 1304 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1305 (*where)->mad_val = (void*)sv;
1306 }
1307 else
1308 addmad(newMADsv(slot, sv), where, 0);
1309}
1310#else
b3f24c00
MHM
1311# define start_force(where) NOOP
1312# define curmad(slot, sv) NOOP
5db06880
NC
1313#endif
1314
ffb4593c
NT
1315/*
1316 * S_force_next
9cbb5ea2 1317 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1318 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1319 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1320 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1321 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1322 */
1323
4e553d73 1324STATIC void
cea2e8a9 1325S_force_next(pTHX_ I32 type)
79072805 1326{
97aff369 1327 dVAR;
704d4215
GG
1328#ifdef DEBUGGING
1329 if (DEBUG_T_TEST) {
1330 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1331 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1332 }
1333#endif
5db06880 1334#ifdef PERL_MAD
cd81e915 1335 if (PL_curforce < 0)
5db06880 1336 start_force(PL_lasttoke);
cd81e915 1337 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1338 if (PL_lex_state != LEX_KNOWNEXT)
1339 PL_lex_defer = PL_lex_state;
1340 PL_lex_state = LEX_KNOWNEXT;
1341 PL_lex_expect = PL_expect;
cd81e915 1342 PL_curforce = -1;
5db06880 1343#else
3280af22
NIS
1344 PL_nexttype[PL_nexttoke] = type;
1345 PL_nexttoke++;
1346 if (PL_lex_state != LEX_KNOWNEXT) {
1347 PL_lex_defer = PL_lex_state;
1348 PL_lex_expect = PL_expect;
1349 PL_lex_state = LEX_KNOWNEXT;
79072805 1350 }
5db06880 1351#endif
79072805
LW
1352}
1353
d0a148a6 1354STATIC SV *
15f169a1 1355S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1356{
97aff369 1357 dVAR;
740cce10 1358 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
1359 !IN_BYTES
1360 && UTF
1361 && !is_ascii_string((const U8*)start, len)
740cce10 1362 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1363 return sv;
1364}
1365
ffb4593c
NT
1366/*
1367 * S_force_word
1368 * When the lexer knows the next thing is a word (for instance, it has
1369 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1370 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1371 * lookahead.
ffb4593c
NT
1372 *
1373 * Arguments:
b1b65b59 1374 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1375 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1376 * int check_keyword : if true, Perl checks to make sure the word isn't
1377 * a keyword (do this if the word is a label, e.g. goto FOO)
1378 * int allow_pack : if true, : characters will also be allowed (require,
1379 * use, etc. do this)
9cbb5ea2 1380 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1381 */
1382
76e3520e 1383STATIC char *
cea2e8a9 1384S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1385{
97aff369 1386 dVAR;
463ee0b2
LW
1387 register char *s;
1388 STRLEN len;
4e553d73 1389
7918f24d
NC
1390 PERL_ARGS_ASSERT_FORCE_WORD;
1391
29595ff2 1392 start = SKIPSPACE1(start);
463ee0b2 1393 s = start;
7e2040f0 1394 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1395 (allow_pack && *s == ':') ||
15f0808c 1396 (allow_initial_tick && *s == '\'') )
a0d0e21e 1397 {
3280af22 1398 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1399 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1400 return start;
cd81e915 1401 start_force(PL_curforce);
5db06880
NC
1402 if (PL_madskills)
1403 curmad('X', newSVpvn(start,s-start));
463ee0b2 1404 if (token == METHOD) {
29595ff2 1405 s = SKIPSPACE1(s);
463ee0b2 1406 if (*s == '(')
3280af22 1407 PL_expect = XTERM;
463ee0b2 1408 else {
3280af22 1409 PL_expect = XOPERATOR;
463ee0b2 1410 }
79072805 1411 }
e74e6b3d 1412 if (PL_madskills)
63575281 1413 curmad('g', newSVpvs( "forced" ));
9ded7720 1414 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1415 = (OP*)newSVOP(OP_CONST,0,
1416 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1417 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1418 force_next(token);
1419 }
1420 return s;
1421}
1422
ffb4593c
NT
1423/*
1424 * S_force_ident
9cbb5ea2 1425 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1426 * text only contains the "foo" portion. The first argument is a pointer
1427 * to the "foo", and the second argument is the type symbol to prefix.
1428 * Forces the next token to be a "WORD".
9cbb5ea2 1429 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1430 */
1431
76e3520e 1432STATIC void
bfed75c6 1433S_force_ident(pTHX_ register const char *s, int kind)
79072805 1434{
97aff369 1435 dVAR;
7918f24d
NC
1436
1437 PERL_ARGS_ASSERT_FORCE_IDENT;
1438
c35e046a 1439 if (*s) {
90e5519e
NC
1440 const STRLEN len = strlen(s);
1441 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1442 start_force(PL_curforce);
9ded7720 1443 NEXTVAL_NEXTTOKE.opval = o;
79072805 1444 force_next(WORD);
748a9306 1445 if (kind) {
11343788 1446 o->op_private = OPpCONST_ENTERED;
55497cff 1447 /* XXX see note in pp_entereval() for why we forgo typo
1448 warnings if the symbol must be introduced in an eval.
1449 GSAR 96-10-12 */
90e5519e
NC
1450 gv_fetchpvn_flags(s, len,
1451 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1452 : GV_ADD,
1453 kind == '$' ? SVt_PV :
1454 kind == '@' ? SVt_PVAV :
1455 kind == '%' ? SVt_PVHV :
a0d0e21e 1456 SVt_PVGV
90e5519e 1457 );
748a9306 1458 }
79072805
LW
1459 }
1460}
1461
1571675a
GS
1462NV
1463Perl_str_to_version(pTHX_ SV *sv)
1464{
1465 NV retval = 0.0;
1466 NV nshift = 1.0;
1467 STRLEN len;
cfd0369c 1468 const char *start = SvPV_const(sv,len);
9d4ba2ae 1469 const char * const end = start + len;
504618e9 1470 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
1471
1472 PERL_ARGS_ASSERT_STR_TO_VERSION;
1473
1571675a 1474 while (start < end) {
ba210ebe 1475 STRLEN skip;
1571675a
GS
1476 UV n;
1477 if (utf)
9041c2e3 1478 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1479 else {
1480 n = *(U8*)start;
1481 skip = 1;
1482 }
1483 retval += ((NV)n)/nshift;
1484 start += skip;
1485 nshift *= 1000;
1486 }
1487 return retval;
1488}
1489
4e553d73 1490/*
ffb4593c
NT
1491 * S_force_version
1492 * Forces the next token to be a version number.
e759cc13
RGS
1493 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1494 * and if "guessing" is TRUE, then no new token is created (and the caller
1495 * must use an alternative parsing method).
ffb4593c
NT
1496 */
1497
76e3520e 1498STATIC char *
e759cc13 1499S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1500{
97aff369 1501 dVAR;
5f66b61c 1502 OP *version = NULL;
44dcb63b 1503 char *d;
5db06880
NC
1504#ifdef PERL_MAD
1505 I32 startoff = s - SvPVX(PL_linestr);
1506#endif
89bfa8cd 1507
7918f24d
NC
1508 PERL_ARGS_ASSERT_FORCE_VERSION;
1509
29595ff2 1510 s = SKIPSPACE1(s);
89bfa8cd 1511
44dcb63b 1512 d = s;
dd629d5b 1513 if (*d == 'v')
44dcb63b 1514 d++;
44dcb63b 1515 if (isDIGIT(*d)) {
e759cc13
RGS
1516 while (isDIGIT(*d) || *d == '_' || *d == '.')
1517 d++;
5db06880
NC
1518#ifdef PERL_MAD
1519 if (PL_madskills) {
cd81e915 1520 start_force(PL_curforce);
5db06880
NC
1521 curmad('X', newSVpvn(s,d-s));
1522 }
1523#endif
9f3d182e 1524 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1525 SV *ver;
6154021b
RGS
1526 s = scan_num(s, &pl_yylval);
1527 version = pl_yylval.opval;
dd629d5b
GS
1528 ver = cSVOPx(version)->op_sv;
1529 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1530 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1531 SvNV_set(ver, str_to_version(ver));
1571675a 1532 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1533 }
89bfa8cd 1534 }
5db06880
NC
1535 else if (guessing) {
1536#ifdef PERL_MAD
1537 if (PL_madskills) {
cd81e915
NC
1538 sv_free(PL_nextwhite); /* let next token collect whitespace */
1539 PL_nextwhite = 0;
5db06880
NC
1540 s = SvPVX(PL_linestr) + startoff;
1541 }
1542#endif
e759cc13 1543 return s;
5db06880 1544 }
89bfa8cd 1545 }
1546
5db06880
NC
1547#ifdef PERL_MAD
1548 if (PL_madskills && !version) {
cd81e915
NC
1549 sv_free(PL_nextwhite); /* let next token collect whitespace */
1550 PL_nextwhite = 0;
5db06880
NC
1551 s = SvPVX(PL_linestr) + startoff;
1552 }
1553#endif
89bfa8cd 1554 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1555 start_force(PL_curforce);
9ded7720 1556 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1557 force_next(WORD);
89bfa8cd 1558
e759cc13 1559 return s;
89bfa8cd 1560}
1561
ffb4593c
NT
1562/*
1563 * S_tokeq
1564 * Tokenize a quoted string passed in as an SV. It finds the next
1565 * chunk, up to end of string or a backslash. It may make a new
1566 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1567 * turns \\ into \.
1568 */
1569
76e3520e 1570STATIC SV *
cea2e8a9 1571S_tokeq(pTHX_ SV *sv)
79072805 1572{
97aff369 1573 dVAR;
79072805
LW
1574 register char *s;
1575 register char *send;
1576 register char *d;
b3ac6de7
IZ
1577 STRLEN len = 0;
1578 SV *pv = sv;
79072805 1579
7918f24d
NC
1580 PERL_ARGS_ASSERT_TOKEQ;
1581
79072805 1582 if (!SvLEN(sv))
b3ac6de7 1583 goto finish;
79072805 1584
a0d0e21e 1585 s = SvPV_force(sv, len);
21a311ee 1586 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1587 goto finish;
463ee0b2 1588 send = s + len;
79072805
LW
1589 while (s < send && *s != '\\')
1590 s++;
1591 if (s == send)
b3ac6de7 1592 goto finish;
79072805 1593 d = s;
be4731d2 1594 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 1595 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 1596 }
79072805
LW
1597 while (s < send) {
1598 if (*s == '\\') {
a0d0e21e 1599 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1600 s++; /* all that, just for this */
1601 }
1602 *d++ = *s++;
1603 }
1604 *d = '\0';
95a20fc0 1605 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1606 finish:
3280af22 1607 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 1608 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
1609 return sv;
1610}
1611
ffb4593c
NT
1612/*
1613 * Now come three functions related to double-quote context,
1614 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1615 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1616 * interact with PL_lex_state, and create fake ( ... ) argument lists
1617 * to handle functions and concatenation.
1618 * They assume that whoever calls them will be setting up a fake
1619 * join call, because each subthing puts a ',' after it. This lets
1620 * "lower \luPpEr"
1621 * become
1622 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1623 *
1624 * (I'm not sure whether the spurious commas at the end of lcfirst's
1625 * arguments and join's arguments are created or not).
1626 */
1627
1628/*
1629 * S_sublex_start
6154021b 1630 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
1631 *
1632 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 1633 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
1634 *
1635 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1636 *
1637 * Everything else becomes a FUNC.
1638 *
1639 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1640 * had an OP_CONST or OP_READLINE). This just sets us up for a
1641 * call to S_sublex_push().
1642 */
1643
76e3520e 1644STATIC I32
cea2e8a9 1645S_sublex_start(pTHX)
79072805 1646{
97aff369 1647 dVAR;
6154021b 1648 register const I32 op_type = pl_yylval.ival;
79072805
LW
1649
1650 if (op_type == OP_NULL) {
6154021b 1651 pl_yylval.opval = PL_lex_op;
5f66b61c 1652 PL_lex_op = NULL;
79072805
LW
1653 return THING;
1654 }
1655 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1656 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1657
1658 if (SvTYPE(sv) == SVt_PVIV) {
1659 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1660 STRLEN len;
96a5add6 1661 const char * const p = SvPV_const(sv, len);
740cce10 1662 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
1663 SvREFCNT_dec(sv);
1664 sv = nsv;
4e553d73 1665 }
6154021b 1666 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1667 PL_lex_stuff = NULL;
6f33ba73
RGS
1668 /* Allow <FH> // "foo" */
1669 if (op_type == OP_READLINE)
1670 PL_expect = XTERMORDORDOR;
79072805
LW
1671 return THING;
1672 }
e3f73d4e
RGS
1673 else if (op_type == OP_BACKTICK && PL_lex_op) {
1674 /* readpipe() vas overriden */
1675 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 1676 pl_yylval.opval = PL_lex_op;
9b201d7d 1677 PL_lex_op = NULL;
e3f73d4e
RGS
1678 PL_lex_stuff = NULL;
1679 return THING;
1680 }
79072805 1681
3280af22 1682 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 1683 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
1684 PL_sublex_info.sub_op = PL_lex_op;
1685 PL_lex_state = LEX_INTERPPUSH;
55497cff 1686
3280af22
NIS
1687 PL_expect = XTERM;
1688 if (PL_lex_op) {
6154021b 1689 pl_yylval.opval = PL_lex_op;
5f66b61c 1690 PL_lex_op = NULL;
55497cff 1691 return PMFUNC;
1692 }
1693 else
1694 return FUNC;
1695}
1696
ffb4593c
NT
1697/*
1698 * S_sublex_push
1699 * Create a new scope to save the lexing state. The scope will be
1700 * ended in S_sublex_done. Returns a '(', starting the function arguments
1701 * to the uc, lc, etc. found before.
1702 * Sets PL_lex_state to LEX_INTERPCONCAT.
1703 */
1704
76e3520e 1705STATIC I32
cea2e8a9 1706S_sublex_push(pTHX)
55497cff 1707{
27da23d5 1708 dVAR;
f46d017c 1709 ENTER;
55497cff 1710
3280af22 1711 PL_lex_state = PL_sublex_info.super_state;
651b5b28 1712 SAVEBOOL(PL_lex_dojoin);
3280af22 1713 SAVEI32(PL_lex_brackets);
3280af22
NIS
1714 SAVEI32(PL_lex_casemods);
1715 SAVEI32(PL_lex_starts);
651b5b28 1716 SAVEI8(PL_lex_state);
7766f137 1717 SAVEVPTR(PL_lex_inpat);
98246f1e 1718 SAVEI16(PL_lex_inwhat);
57843af0 1719 SAVECOPLINE(PL_curcop);
3280af22 1720 SAVEPPTR(PL_bufptr);
8452ff4b 1721 SAVEPPTR(PL_bufend);
3280af22
NIS
1722 SAVEPPTR(PL_oldbufptr);
1723 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1724 SAVEPPTR(PL_last_lop);
1725 SAVEPPTR(PL_last_uni);
3280af22
NIS
1726 SAVEPPTR(PL_linestart);
1727 SAVESPTR(PL_linestr);
8edd5f42
RGS
1728 SAVEGENERICPV(PL_lex_brackstack);
1729 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1730
1731 PL_linestr = PL_lex_stuff;
a0714e2c 1732 PL_lex_stuff = NULL;
3280af22 1733
9cbb5ea2
GS
1734 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1735 = SvPVX(PL_linestr);
3280af22 1736 PL_bufend += SvCUR(PL_linestr);
bd61b366 1737 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1738 SAVEFREESV(PL_linestr);
1739
1740 PL_lex_dojoin = FALSE;
1741 PL_lex_brackets = 0;
a02a5408
JC
1742 Newx(PL_lex_brackstack, 120, char);
1743 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1744 PL_lex_casemods = 0;
1745 *PL_lex_casestack = '\0';
1746 PL_lex_starts = 0;
1747 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1748 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1749
1750 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1751 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1752 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1753 else
5f66b61c 1754 PL_lex_inpat = NULL;
79072805 1755
55497cff 1756 return '(';
79072805
LW
1757}
1758
ffb4593c
NT
1759/*
1760 * S_sublex_done
1761 * Restores lexer state after a S_sublex_push.
1762 */
1763
76e3520e 1764STATIC I32
cea2e8a9 1765S_sublex_done(pTHX)
79072805 1766{
27da23d5 1767 dVAR;
3280af22 1768 if (!PL_lex_starts++) {
396482e1 1769 SV * const sv = newSVpvs("");
9aa983d2
JH
1770 if (SvUTF8(PL_linestr))
1771 SvUTF8_on(sv);
3280af22 1772 PL_expect = XOPERATOR;
6154021b 1773 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1774 return THING;
1775 }
1776
3280af22
NIS
1777 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1778 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1779 return yylex();
79072805
LW
1780 }
1781
ffb4593c 1782 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1783 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1784 PL_linestr = PL_lex_repl;
1785 PL_lex_inpat = 0;
1786 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1787 PL_bufend += SvCUR(PL_linestr);
bd61b366 1788 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1789 SAVEFREESV(PL_linestr);
1790 PL_lex_dojoin = FALSE;
1791 PL_lex_brackets = 0;
3280af22
NIS
1792 PL_lex_casemods = 0;
1793 *PL_lex_casestack = '\0';
1794 PL_lex_starts = 0;
25da4f38 1795 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1796 PL_lex_state = LEX_INTERPNORMAL;
1797 PL_lex_starts++;
e9fa98b2
HS
1798 /* we don't clear PL_lex_repl here, so that we can check later
1799 whether this is an evalled subst; that means we rely on the
1800 logic to ensure sublex_done() is called again only via the
1801 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1802 }
e9fa98b2 1803 else {
3280af22 1804 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1805 PL_lex_repl = NULL;
e9fa98b2 1806 }
79072805 1807 return ',';
ffed7fef
LW
1808 }
1809 else {
5db06880
NC
1810#ifdef PERL_MAD
1811 if (PL_madskills) {
cd81e915
NC
1812 if (PL_thiswhite) {
1813 if (!PL_endwhite)
6b29d1f5 1814 PL_endwhite = newSVpvs("");
cd81e915
NC
1815 sv_catsv(PL_endwhite, PL_thiswhite);
1816 PL_thiswhite = 0;
1817 }
1818 if (PL_thistoken)
76f68e9b 1819 sv_setpvs(PL_thistoken,"");
5db06880 1820 else
cd81e915 1821 PL_realtokenstart = -1;
5db06880
NC
1822 }
1823#endif
f46d017c 1824 LEAVE;
3280af22
NIS
1825 PL_bufend = SvPVX(PL_linestr);
1826 PL_bufend += SvCUR(PL_linestr);
1827 PL_expect = XOPERATOR;
09bef843 1828 PL_sublex_info.sub_inwhat = 0;
79072805 1829 return ')';
ffed7fef
LW
1830 }
1831}
1832
02aa26ce
NT
1833/*
1834 scan_const
1835
1836 Extracts a pattern, double-quoted string, or transliteration. This
1837 is terrifying code.
1838
94def140 1839 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1840 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1841 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1842
94def140
TS
1843 Returns a pointer to the character scanned up to. If this is
1844 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 1845 successfully parsed), will leave an OP for the substring scanned
6154021b 1846 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
1847 by looking at the next characters herself.
1848
02aa26ce
NT
1849 In patterns:
1850 backslashes:
1851 double-quoted style: \r and \n
1852 regexp special ones: \D \s
94def140
TS
1853 constants: \x31
1854 backrefs: \1
02aa26ce
NT
1855 case and quoting: \U \Q \E
1856 stops on @ and $, but not for $ as tail anchor
1857
1858 In transliterations:
1859 characters are VERY literal, except for - not at the start or end
94def140
TS
1860 of the string, which indicates a range. If the range is in bytes,
1861 scan_const expands the range to the full set of intermediate
1862 characters. If the range is in utf8, the hyphen is replaced with
1863 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1864
1865 In double-quoted strings:
1866 backslashes:
1867 double-quoted style: \r and \n
94def140
TS
1868 constants: \x31
1869 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1870 case and quoting: \U \Q \E
1871 stops on @ and $
1872
1873 scan_const does *not* construct ops to handle interpolated strings.
1874 It stops processing as soon as it finds an embedded $ or @ variable
1875 and leaves it to the caller to work out what's going on.
1876
94def140
TS
1877 embedded arrays (whether in pattern or not) could be:
1878 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1879
1880 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1881
1882 $ in pattern could be $foo or could be tail anchor. Assumption:
1883 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1884 followed by one of "()| \r\n\t"
02aa26ce
NT
1885
1886 \1 (backreferences) are turned into $1
1887
1888 The structure of the code is
1889 while (there's a character to process) {
94def140
TS
1890 handle transliteration ranges
1891 skip regexp comments /(?#comment)/ and codes /(?{code})/
1892 skip #-initiated comments in //x patterns
1893 check for embedded arrays
02aa26ce
NT
1894 check for embedded scalars
1895 if (backslash) {
94def140
TS
1896 leave intact backslashes from leaveit (below)
1897 deprecate \1 in substitution replacements
02aa26ce
NT
1898 handle string-changing backslashes \l \U \Q \E, etc.
1899 switch (what was escaped) {
94def140
TS
1900 handle \- in a transliteration (becomes a literal -)
1901 handle \132 (octal characters)
1902 handle \x15 and \x{1234} (hex characters)
1903 handle \N{name} (named characters)
1904 handle \cV (control characters)
1905 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 1906 } (end switch)
77a135fe 1907 continue
02aa26ce 1908 } (end if backslash)
77a135fe 1909 handle regular character
02aa26ce 1910 } (end while character to read)
4e553d73 1911
02aa26ce
NT
1912*/
1913
76e3520e 1914STATIC char *
cea2e8a9 1915S_scan_const(pTHX_ char *start)
79072805 1916{
97aff369 1917 dVAR;
3280af22 1918 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
1919 SV *sv = newSV(send - start); /* sv for the constant. See
1920 note below on sizing. */
02aa26ce
NT
1921 register char *s = start; /* start of the constant */
1922 register char *d = SvPVX(sv); /* destination for copies */
1923 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1924 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0 1925 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
77a135fe
KW
1926 I32 this_utf8 = UTF; /* Is the source string assumed
1927 to be UTF8? But, this can
1928 show as true when the source
1929 isn't utf8, as for example
1930 when it is entirely composed
1931 of hex constants */
1932
1933 /* Note on sizing: The scanned constant is placed into sv, which is
1934 * initialized by newSV() assuming one byte of output for every byte of
1935 * input. This routine expects newSV() to allocate an extra byte for a
1936 * trailing NUL, which this routine will append if it gets to the end of
1937 * the input. There may be more bytes of input than output (eg., \N{LATIN
1938 * CAPITAL LETTER A}), or more output than input if the constant ends up
1939 * recoded to utf8, but each time a construct is found that might increase
1940 * the needed size, SvGROW() is called. Its size parameter each time is
1941 * based on the best guess estimate at the time, namely the length used so
1942 * far, plus the length the current construct will occupy, plus room for
1943 * the trailing NUL, plus one byte for every input byte still unscanned */
1944
012bcf8d 1945 UV uv;
4c3a8340
TS
1946#ifdef EBCDIC
1947 UV literal_endpoint = 0;
e294cc5d 1948 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1949#endif
012bcf8d 1950
7918f24d
NC
1951 PERL_ARGS_ASSERT_SCAN_CONST;
1952
2b9d42f0
NIS
1953 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1954 /* If we are doing a trans and we know we want UTF8 set expectation */
1955 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1956 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1957 }
1958
1959
79072805 1960 while (s < send || dorange) {
02aa26ce 1961 /* get transliterations out of the way (they're most literal) */
3280af22 1962 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1963 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1964 if (dorange) {
1ba5c669
JH
1965 I32 i; /* current expanded character */
1966 I32 min; /* first character in range */
1967 I32 max; /* last character in range */
02aa26ce 1968
e294cc5d
JH
1969#ifdef EBCDIC
1970 UV uvmax = 0;
1971#endif
1972
1973 if (has_utf8
1974#ifdef EBCDIC
1975 && !native_range
1976#endif
1977 ) {
9d4ba2ae 1978 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1979 char *e = d++;
1980 while (e-- > c)
1981 *(e + 1) = *e;
25716404 1982 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1983 /* mark the range as done, and continue */
1984 dorange = FALSE;
1985 didrange = TRUE;
1986 continue;
1987 }
2b9d42f0 1988
95a20fc0 1989 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1990#ifdef EBCDIC
1991 SvGROW(sv,
1992 SvLEN(sv) + (has_utf8 ?
1993 (512 - UTF_CONTINUATION_MARK +
1994 UNISKIP(0x100))
1995 : 256));
1996 /* How many two-byte within 0..255: 128 in UTF-8,
1997 * 96 in UTF-8-mod. */
1998#else
9cbb5ea2 1999 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2000#endif
9cbb5ea2 2001 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2002#ifdef EBCDIC
2003 if (has_utf8) {
2004 int j;
2005 for (j = 0; j <= 1; j++) {
2006 char * const c = (char*)utf8_hop((U8*)d, -1);
2007 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2008 if (j)
2009 min = (U8)uv;
2010 else if (uv < 256)
2011 max = (U8)uv;
2012 else {
2013 max = (U8)0xff; /* only to \xff */
2014 uvmax = uv; /* \x{100} to uvmax */
2015 }
2016 d = c; /* eat endpoint chars */
2017 }
2018 }
2019 else {
2020#endif
2021 d -= 2; /* eat the first char and the - */
2022 min = (U8)*d; /* first char in range */
2023 max = (U8)d[1]; /* last char in range */
2024#ifdef EBCDIC
2025 }
2026#endif
8ada0baa 2027
c2e66d9e 2028 if (min > max) {
01ec43d0 2029 Perl_croak(aTHX_
d1573ac7 2030 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2031 (char)min, (char)max);
c2e66d9e
GS
2032 }
2033
c7f1f016 2034#ifdef EBCDIC
4c3a8340
TS
2035 if (literal_endpoint == 2 &&
2036 ((isLOWER(min) && isLOWER(max)) ||
2037 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2038 if (isLOWER(min)) {
2039 for (i = min; i <= max; i++)
2040 if (isLOWER(i))
db42d148 2041 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2042 } else {
2043 for (i = min; i <= max; i++)
2044 if (isUPPER(i))
db42d148 2045 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2046 }
2047 }
2048 else
2049#endif
2050 for (i = min; i <= max; i++)
e294cc5d
JH
2051#ifdef EBCDIC
2052 if (has_utf8) {
2053 const U8 ch = (U8)NATIVE_TO_UTF(i);
2054 if (UNI_IS_INVARIANT(ch))
2055 *d++ = (U8)i;
2056 else {
2057 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2058 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2059 }
2060 }
2061 else
2062#endif
2063 *d++ = (char)i;
2064
2065#ifdef EBCDIC
2066 if (uvmax) {
2067 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2068 if (uvmax > 0x101)
2069 *d++ = (char)UTF_TO_NATIVE(0xff);
2070 if (uvmax > 0x100)
2071 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2072 }
2073#endif
02aa26ce
NT
2074
2075 /* mark the range as done, and continue */
79072805 2076 dorange = FALSE;
01ec43d0 2077 didrange = TRUE;
4c3a8340
TS
2078#ifdef EBCDIC
2079 literal_endpoint = 0;
2080#endif
79072805 2081 continue;
4e553d73 2082 }
02aa26ce
NT
2083
2084 /* range begins (ignore - as first or last char) */
79072805 2085 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2086 if (didrange) {
1fafa243 2087 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2088 }
e294cc5d
JH
2089 if (has_utf8
2090#ifdef EBCDIC
2091 && !native_range
2092#endif
2093 ) {
25716404 2094 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2095 s++;
2096 continue;
2097 }
79072805
LW
2098 dorange = TRUE;
2099 s++;
01ec43d0
GS
2100 }
2101 else {
2102 didrange = FALSE;
4c3a8340
TS
2103#ifdef EBCDIC
2104 literal_endpoint = 0;
e294cc5d 2105 native_range = TRUE;
4c3a8340 2106#endif
01ec43d0 2107 }
79072805 2108 }
02aa26ce
NT
2109
2110 /* if we get here, we're not doing a transliteration */
2111
0f5d15d6
IZ
2112 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2113 except for the last char, which will be done separately. */
3280af22 2114 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2115 if (s[2] == '#') {
e994fd66 2116 while (s+1 < send && *s != ')')
db42d148 2117 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2118 }
2119 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2120 || (s[2] == '?' && s[3] == '{'))
155aba94 2121 {
cc6b7395 2122 I32 count = 1;
0f5d15d6 2123 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2124 char c;
2125
d9f97599
GS
2126 while (count && (c = *regparse)) {
2127 if (c == '\\' && regparse[1])
2128 regparse++;
4e553d73 2129 else if (c == '{')
cc6b7395 2130 count++;
4e553d73 2131 else if (c == '}')
cc6b7395 2132 count--;
d9f97599 2133 regparse++;
cc6b7395 2134 }
e994fd66 2135 if (*regparse != ')')
5bdf89e7 2136 regparse--; /* Leave one char for continuation. */
0f5d15d6 2137 while (s < regparse)
db42d148 2138 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2139 }
748a9306 2140 }
02aa26ce
NT
2141
2142 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2143 else if (*s == '#' && PL_lex_inpat &&
2144 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2145 while (s+1 < send && *s != '\n')
db42d148 2146 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2147 }
02aa26ce 2148
5d1d4326 2149 /* check for embedded arrays
da6eedaa 2150 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2151 */
1749ea0d
TS
2152 else if (*s == '@' && s[1]) {
2153 if (isALNUM_lazy_if(s+1,UTF))
2154 break;
2155 if (strchr(":'{$", s[1]))
2156 break;
2157 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2158 break; /* in regexp, neither @+ nor @- are interpolated */
2159 }
02aa26ce
NT
2160
2161 /* check for embedded scalars. only stop if we're sure it's a
2162 variable.
2163 */
79072805 2164 else if (*s == '$') {
3280af22 2165 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2166 break;
77772344 2167 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2168 if (s[1] == '\\') {
2169 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2170 "Possible unintended interpolation of $\\ in regex");
77772344 2171 }
79072805 2172 break; /* in regexp, $ might be tail anchor */
77772344 2173 }
79072805 2174 }
02aa26ce 2175
2b9d42f0
NIS
2176 /* End of else if chain - OP_TRANS rejoin rest */
2177
02aa26ce 2178 /* backslashes */
79072805
LW
2179 if (*s == '\\' && s+1 < send) {
2180 s++;
02aa26ce 2181
02aa26ce 2182 /* deprecate \1 in strings and substitution replacements */
3280af22 2183 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2184 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2185 {
a2a5de95 2186 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2187 *--s = '$';
2188 break;
2189 }
02aa26ce
NT
2190
2191 /* string-change backslash escapes */
3280af22 2192 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2193 --s;
2194 break;
2195 }
cc74c5bd
TS
2196 /* skip any other backslash escapes in a pattern */
2197 else if (PL_lex_inpat) {
2198 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2199 goto default_action;
2200 }
02aa26ce
NT
2201
2202 /* if we get here, it's either a quoted -, or a digit */
79072805 2203 switch (*s) {
02aa26ce
NT
2204
2205 /* quoted - in transliterations */
79072805 2206 case '-':
3280af22 2207 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2208 *d++ = *s++;
2209 continue;
2210 }
2211 /* FALL THROUGH */
2212 default:
11b8faa4 2213 {
a2a5de95
NC
2214 if ((isALPHA(*s) || isDIGIT(*s)))
2215 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2216 "Unrecognized escape \\%c passed through",
2217 *s);
11b8faa4 2218 /* default action is to copy the quoted character */
f9a63242 2219 goto default_action;
11b8faa4 2220 }
02aa26ce 2221
77a135fe 2222 /* eg. \132 indicates the octal constant 0x132 */
79072805
LW
2223 case '0': case '1': case '2': case '3':
2224 case '4': case '5': case '6': case '7':
ba210ebe 2225 {
53305cf1
NC
2226 I32 flags = 0;
2227 STRLEN len = 3;
77a135fe 2228 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2229 s += len;
2230 }
012bcf8d 2231 goto NUM_ESCAPE_INSERT;
02aa26ce 2232
77a135fe 2233 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2234 case 'x':
a0ed51b3
LW
2235 ++s;
2236 if (*s == '{') {
9d4ba2ae 2237 char* const e = strchr(s, '}');
a4c04bdc
NC
2238 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2239 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2240 STRLEN len;
355860ce 2241
53305cf1 2242 ++s;
adaeee49 2243 if (!e) {
a0ed51b3 2244 yyerror("Missing right brace on \\x{}");
355860ce 2245 continue;
ba210ebe 2246 }
53305cf1 2247 len = e - s;
77a135fe 2248 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2249 s = e + 1;
a0ed51b3
LW
2250 }
2251 else {
ba210ebe 2252 {
53305cf1 2253 STRLEN len = 2;
a4c04bdc 2254 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2255 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2256 s += len;
2257 }
012bcf8d
GS
2258 }
2259
2260 NUM_ESCAPE_INSERT:
77a135fe
KW
2261 /* Insert oct, hex, or \N{U+...} escaped character. There will
2262 * always be enough room in sv since such escapes will be
2263 * longer than any UTF-8 sequence they can end up as, except if
2264 * they force us to recode the rest of the string into utf8 */
ba7cea30 2265
77a135fe
KW
2266 /* Here uv is the ordinal of the next character being added in
2267 * unicode (converted from native). (It has to be done before
2268 * here because \N is interpreted as unicode, and oct and hex
2269 * as native.) */
2270 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 2271 if (!has_utf8 && uv > 255) {
77a135fe
KW
2272 /* Might need to recode whatever we have accumulated so
2273 * far if it contains any chars variant in utf8 or
2274 * utf-ebcdic. */
2275
2276 SvCUR_set(sv, d - SvPVX_const(sv));
2277 SvPOK_on(sv);
2278 *d = '\0';
77a135fe 2279 /* See Note on sizing above. */
7bf79863
KW
2280 sv_utf8_upgrade_flags_grow(sv,
2281 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2282 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
2283 d = SvPVX(sv) + SvCUR(sv);
2284 has_utf8 = TRUE;
012bcf8d
GS
2285 }
2286
77a135fe
KW
2287 if (has_utf8) {
2288 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
2289 if (PL_lex_inwhat == OP_TRANS &&
2290 PL_sublex_info.sub_op) {
2291 PL_sublex_info.sub_op->op_private |=
2292 (PL_lex_repl ? OPpTRANS_FROM_UTF
2293 : OPpTRANS_TO_UTF);
f9a63242 2294 }
e294cc5d
JH
2295#ifdef EBCDIC
2296 if (uv > 255 && !dorange)
2297 native_range = FALSE;
2298#endif
012bcf8d 2299 }
a0ed51b3 2300 else {
012bcf8d 2301 *d++ = (char)uv;
a0ed51b3 2302 }
012bcf8d
GS
2303 }
2304 else {
c4d5f83a 2305 *d++ = (char) uv;
a0ed51b3 2306 }
79072805 2307 continue;
02aa26ce 2308
77a135fe
KW
2309 /* \N{LATIN SMALL LETTER A} is a named character, and so is
2310 * \N{U+0041} */
4a2d328f 2311 case 'N':
55eda711 2312 ++s;
423cee85
JH
2313 if (*s == '{') {
2314 char* e = strchr(s, '}');
155aba94 2315 SV *res;
423cee85 2316 STRLEN len;
cfd0369c 2317 const char *str;
4e553d73 2318
423cee85 2319 if (!e) {
5777a3f7 2320 yyerror("Missing right brace on \\N{}");
423cee85
JH
2321 e = s - 1;
2322 goto cont_scan;
2323 }
dbc0d4f2 2324 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
77a135fe
KW
2325 /* \N{U+...} The ... is a unicode value even on EBCDIC
2326 * machines */
dbc0d4f2
JH
2327 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2328 PERL_SCAN_DISALLOW_PREFIX;
2329 s += 3;
2330 len = e - s;
2331 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2332 if ( e > s && len != (STRLEN)(e - s) ) {
2333 uv = 0xFFFD;
fc8cd66c 2334 }
dbc0d4f2
JH
2335 s = e + 1;
2336 goto NUM_ESCAPE_INSERT;
2337 }
55eda711 2338 res = newSVpvn(s + 1, e - s - 1);
bd61b366 2339 res = new_constant( NULL, 0, "charnames",
eb0d8d16 2340 res, NULL, s - 2, e - s + 3 );
f9a63242
JH
2341 if (has_utf8)
2342 sv_utf8_upgrade(res);
cfd0369c 2343 str = SvPV_const(res,len);
1c47067b
JH
2344#ifdef EBCDIC_NEVER_MIND
2345 /* charnames uses pack U and that has been
2346 * recently changed to do the below uni->native
2347 * mapping, so this would be redundant (and wrong,
2348 * the code point would be doubly converted).
2349 * But leave this in just in case the pack U change
2350 * gets revoked, but the semantics is still
2351 * desireable for charnames. --jhi */
cddc7ef4 2352 {
cfd0369c 2353 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2354
2355 if (uv < 0x100) {
89ebb4a3 2356 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2357
2358 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2359 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2360 str = SvPV_const(res, len);
cddc7ef4
JH
2361 }
2362 }
2363#endif
77a135fe
KW
2364 /* If destination is not in utf8 but this new character is,
2365 * recode the dest to utf8 */
89491803 2366 if (!has_utf8 && SvUTF8(res)) {
77a135fe 2367 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 2368 SvPOK_on(sv);
e4f3eed8 2369 *d = '\0';
77a135fe 2370 /* See Note on sizing above. */
7bf79863
KW
2371 sv_utf8_upgrade_flags_grow(sv,
2372 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2373 len + (STRLEN)(send - s) + 1);
f08d6ad9 2374 d = SvPVX(sv) + SvCUR(sv);
89491803 2375 has_utf8 = TRUE;
77a135fe 2376 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
423cee85 2377
77a135fe
KW
2378 /* See Note on sizing above. (NOTE: SvCUR() is not set
2379 * correctly here). */
2380 const STRLEN off = d - SvPVX_const(sv);
2381 d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
423cee85 2382 }
e294cc5d
JH
2383#ifdef EBCDIC
2384 if (!dorange)
2385 native_range = FALSE; /* \N{} is guessed to be Unicode */
2386#endif
423cee85
JH
2387 Copy(str, d, len, char);
2388 d += len;
2389 SvREFCNT_dec(res);
2390 cont_scan:
2391 s = e + 1;
2392 }
2393 else
5777a3f7 2394 yyerror("Missing braces on \\N{}");
423cee85
JH
2395 continue;
2396
02aa26ce 2397 /* \c is a control character */
79072805
LW
2398 case 'c':
2399 s++;
961ce445 2400 if (s < send) {
ba210ebe 2401 U8 c = *s++;
c7f1f016
NIS
2402#ifdef EBCDIC
2403 if (isLOWER(c))
2404 c = toUPPER(c);
2405#endif
db42d148 2406 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2407 }
961ce445
RGS
2408 else {
2409 yyerror("Missing control char name in \\c");
2410 }
79072805 2411 continue;
02aa26ce
NT
2412
2413 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2414 case 'b':
db42d148 2415 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2416 break;
2417 case 'n':
db42d148 2418 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2419 break;
2420 case 'r':
db42d148 2421 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2422 break;
2423 case 'f':
db42d148 2424 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2425 break;
2426 case 't':
db42d148 2427 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2428 break;
34a3fe2a 2429 case 'e':
db42d148 2430 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2431 break;
2432 case 'a':
db42d148 2433 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2434 break;
02aa26ce
NT
2435 } /* end switch */
2436
79072805
LW
2437 s++;
2438 continue;
02aa26ce 2439 } /* end if (backslash) */
4c3a8340
TS
2440#ifdef EBCDIC
2441 else
2442 literal_endpoint++;
2443#endif
02aa26ce 2444
f9a63242 2445 default_action:
77a135fe
KW
2446 /* If we started with encoded form, or already know we want it,
2447 then encode the next character */
2448 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 2449 STRLEN len = 1;
77a135fe
KW
2450
2451
2452 /* One might think that it is wasted effort in the case of the
2453 * source being utf8 (this_utf8 == TRUE) to take the next character
2454 * in the source, convert it to an unsigned value, and then convert
2455 * it back again. But the source has not been validated here. The
2456 * routine that does the conversion checks for errors like
2457 * malformed utf8 */
2458
5f66b61c
AL
2459 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2460 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
2461 if (!has_utf8) {
2462 SvCUR_set(sv, d - SvPVX_const(sv));
2463 SvPOK_on(sv);
2464 *d = '\0';
77a135fe 2465 /* See Note on sizing above. */
7bf79863
KW
2466 sv_utf8_upgrade_flags_grow(sv,
2467 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2468 need + (STRLEN)(send - s) + 1);
77a135fe
KW
2469 d = SvPVX(sv) + SvCUR(sv);
2470 has_utf8 = TRUE;
2471 } else if (need > len) {
2472 /* encoded value larger than old, may need extra space (NOTE:
2473 * SvCUR() is not set correctly here). See Note on sizing
2474 * above. */
9d4ba2ae 2475 const STRLEN off = d - SvPVX_const(sv);
77a135fe 2476 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 2477 }
77a135fe
KW
2478 s += len;
2479
5f66b61c 2480 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
2481#ifdef EBCDIC
2482 if (uv > 255 && !dorange)
2483 native_range = FALSE;
2484#endif
2b9d42f0
NIS
2485 }
2486 else {
2487 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2488 }
02aa26ce
NT
2489 } /* while loop to process each character */
2490
2491 /* terminate the string and set up the sv */
79072805 2492 *d = '\0';
95a20fc0 2493 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2494 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2495 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2496
79072805 2497 SvPOK_on(sv);
9f4817db 2498 if (PL_encoding && !has_utf8) {
d0063567
DK
2499 sv_recode_to_utf8(sv, PL_encoding);
2500 if (SvUTF8(sv))
2501 has_utf8 = TRUE;
9f4817db 2502 }
2b9d42f0 2503 if (has_utf8) {
7e2040f0 2504 SvUTF8_on(sv);
2b9d42f0 2505 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2506 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2507 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2508 }
2509 }
79072805 2510
02aa26ce 2511 /* shrink the sv if we allocated more than we used */
79072805 2512 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2513 SvPV_shrink_to_cur(sv);
79072805 2514 }
02aa26ce 2515
6154021b 2516 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 2517 if (s > PL_bufptr) {
eb0d8d16
NC
2518 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2519 const char *const key = PL_lex_inpat ? "qr" : "q";
2520 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2521 const char *type;
2522 STRLEN typelen;
2523
2524 if (PL_lex_inwhat == OP_TRANS) {
2525 type = "tr";
2526 typelen = 2;
2527 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2528 type = "s";
2529 typelen = 1;
2530 } else {
2531 type = "qq";
2532 typelen = 2;
2533 }
2534
2535 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2536 type, typelen);
2537 }
6154021b 2538 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2539 } else
8990e307 2540 SvREFCNT_dec(sv);
79072805
LW
2541 return s;
2542}
2543
ffb4593c
NT
2544/* S_intuit_more
2545 * Returns TRUE if there's more to the expression (e.g., a subscript),
2546 * FALSE otherwise.
ffb4593c
NT
2547 *
2548 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2549 *
2550 * ->[ and ->{ return TRUE
2551 * { and [ outside a pattern are always subscripts, so return TRUE
2552 * if we're outside a pattern and it's not { or [, then return FALSE
2553 * if we're in a pattern and the first char is a {
2554 * {4,5} (any digits around the comma) returns FALSE
2555 * if we're in a pattern and the first char is a [
2556 * [] returns FALSE
2557 * [SOMETHING] has a funky algorithm to decide whether it's a
2558 * character class or not. It has to deal with things like
2559 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2560 * anything else returns TRUE
2561 */
2562
9cbb5ea2
GS
2563/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2564
76e3520e 2565STATIC int
cea2e8a9 2566S_intuit_more(pTHX_ register char *s)
79072805 2567{
97aff369 2568 dVAR;
7918f24d
NC
2569
2570 PERL_ARGS_ASSERT_INTUIT_MORE;
2571
3280af22 2572 if (PL_lex_brackets)
79072805
LW
2573 return TRUE;
2574 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2575 return TRUE;
2576 if (*s != '{' && *s != '[')
2577 return FALSE;
3280af22 2578 if (!PL_lex_inpat)
79072805
LW
2579 return TRUE;
2580
2581 /* In a pattern, so maybe we have {n,m}. */
2582 if (*s == '{') {
2583 s++;
2584 if (!isDIGIT(*s))
2585 return TRUE;
2586 while (isDIGIT(*s))
2587 s++;
2588 if (*s == ',')
2589 s++;
2590 while (isDIGIT(*s))
2591 s++;
2592 if (*s == '}')
2593 return FALSE;
2594 return TRUE;
2595
2596 }
2597
2598 /* On the other hand, maybe we have a character class */
2599
2600 s++;
2601 if (*s == ']' || *s == '^')
2602 return FALSE;
2603 else {
ffb4593c 2604 /* this is terrifying, and it works */
79072805
LW
2605 int weight = 2; /* let's weigh the evidence */
2606 char seen[256];
f27ffc4a 2607 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2608 const char * const send = strchr(s,']');
3280af22 2609 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2610
2611 if (!send) /* has to be an expression */
2612 return TRUE;
2613
2614 Zero(seen,256,char);
2615 if (*s == '$')
2616 weight -= 3;
2617 else if (isDIGIT(*s)) {
2618 if (s[1] != ']') {
2619 if (isDIGIT(s[1]) && s[2] == ']')
2620 weight -= 10;
2621 }
2622 else
2623 weight -= 100;
2624 }
2625 for (; s < send; s++) {
2626 last_un_char = un_char;
2627 un_char = (unsigned char)*s;
2628 switch (*s) {
2629 case '@':
2630 case '&':
2631 case '$':
2632 weight -= seen[un_char] * 10;
7e2040f0 2633 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2634 int len;
8903cb82 2635 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2636 len = (int)strlen(tmpbuf);
2637 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2638 weight -= 100;
2639 else
2640 weight -= 10;
2641 }
2642 else if (*s == '$' && s[1] &&
93a17b20
LW
2643 strchr("[#!%*<>()-=",s[1])) {
2644 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2645 weight -= 10;
2646 else
2647 weight -= 1;
2648 }
2649 break;
2650 case '\\':
2651 un_char = 254;
2652 if (s[1]) {
93a17b20 2653 if (strchr("wds]",s[1]))
79072805 2654 weight += 100;
10edeb5d 2655 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2656 weight += 1;
93a17b20 2657 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2658 weight += 40;
2659 else if (isDIGIT(s[1])) {
2660 weight += 40;
2661 while (s[1] && isDIGIT(s[1]))
2662 s++;
2663 }
2664 }
2665 else
2666 weight += 100;
2667 break;
2668 case '-':
2669 if (s[1] == '\\')
2670 weight += 50;
93a17b20 2671 if (strchr("aA01! ",last_un_char))
79072805 2672 weight += 30;
93a17b20 2673 if (strchr("zZ79~",s[1]))
79072805 2674 weight += 30;
f27ffc4a
GS
2675 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2676 weight -= 5; /* cope with negative subscript */
79072805
LW
2677 break;
2678 default:
3792a11b
NC
2679 if (!isALNUM(last_un_char)
2680 && !(last_un_char == '$' || last_un_char == '@'
2681 || last_un_char == '&')
2682 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2683 char *d = tmpbuf;
2684 while (isALPHA(*s))
2685 *d++ = *s++;
2686 *d = '\0';
5458a98a 2687 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
2688 weight -= 150;
2689 }
2690 if (un_char == last_un_char + 1)
2691 weight += 5;
2692 weight -= seen[un_char];
2693 break;
2694 }
2695 seen[un_char]++;
2696 }
2697 if (weight >= 0) /* probably a character class */
2698 return FALSE;
2699 }
2700
2701 return TRUE;
2702}
ffed7fef 2703
ffb4593c
NT
2704/*
2705 * S_intuit_method
2706 *
2707 * Does all the checking to disambiguate
2708 * foo bar
2709 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2710 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2711 *
2712 * First argument is the stuff after the first token, e.g. "bar".
2713 *
2714 * Not a method if bar is a filehandle.
2715 * Not a method if foo is a subroutine prototyped to take a filehandle.
2716 * Not a method if it's really "Foo $bar"
2717 * Method if it's "foo $bar"
2718 * Not a method if it's really "print foo $bar"
2719 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2720 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2721 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2722 * =>
2723 */
2724
76e3520e 2725STATIC int
62d55b22 2726S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2727{
97aff369 2728 dVAR;
a0d0e21e 2729 char *s = start + (*start == '$');
3280af22 2730 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2731 STRLEN len;
2732 GV* indirgv;
5db06880
NC
2733#ifdef PERL_MAD
2734 int soff;
2735#endif
a0d0e21e 2736
7918f24d
NC
2737 PERL_ARGS_ASSERT_INTUIT_METHOD;
2738
a0d0e21e 2739 if (gv) {
62d55b22 2740 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2741 return 0;
62d55b22
NC
2742 if (cv) {
2743 if (SvPOK(cv)) {
2744 const char *proto = SvPVX_const(cv);
2745 if (proto) {
2746 if (*proto == ';')
2747 proto++;
2748 if (*proto == '*')
2749 return 0;
2750 }
b6c543e3
IZ
2751 }
2752 } else
c35e046a 2753 gv = NULL;
a0d0e21e 2754 }
8903cb82 2755 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2756 /* start is the beginning of the possible filehandle/object,
2757 * and s is the end of it
2758 * tmpbuf is a copy of it
2759 */
2760
a0d0e21e 2761 if (*start == '$') {
3ef1310e
RGS
2762 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2763 isUPPER(*PL_tokenbuf))
a0d0e21e 2764 return 0;
5db06880
NC
2765#ifdef PERL_MAD
2766 len = start - SvPVX(PL_linestr);
2767#endif
29595ff2 2768 s = PEEKSPACE(s);
f0092767 2769#ifdef PERL_MAD
5db06880
NC
2770 start = SvPVX(PL_linestr) + len;
2771#endif
3280af22
NIS
2772 PL_bufptr = start;
2773 PL_expect = XREF;
a0d0e21e
LW
2774 return *s == '(' ? FUNCMETH : METHOD;
2775 }
5458a98a 2776 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
2777 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2778 len -= 2;
2779 tmpbuf[len] = '\0';
5db06880
NC
2780#ifdef PERL_MAD
2781 soff = s - SvPVX(PL_linestr);
2782#endif
c3e0f903
GS
2783 goto bare_package;
2784 }
90e5519e 2785 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2786 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2787 return 0;
2788 /* filehandle or package name makes it a method */
da51bb9b 2789 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
2790#ifdef PERL_MAD
2791 soff = s - SvPVX(PL_linestr);
2792#endif
29595ff2 2793 s = PEEKSPACE(s);
3280af22 2794 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2795 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2796 bare_package:
cd81e915 2797 start_force(PL_curforce);
9ded7720 2798 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 2799 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 2800 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2801 if (PL_madskills)
2802 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2803 PL_expect = XTERM;
a0d0e21e 2804 force_next(WORD);
3280af22 2805 PL_bufptr = s;
5db06880
NC
2806#ifdef PERL_MAD
2807 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2808#endif
a0d0e21e
LW
2809 return *s == '(' ? FUNCMETH : METHOD;
2810 }
2811 }
2812 return 0;
2813}
2814
16d20bd9 2815/* Encoded script support. filter_add() effectively inserts a
4e553d73 2816 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2817 * Note that the filter function only applies to the current source file
2818 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2819 *
2820 * The datasv parameter (which may be NULL) can be used to pass
2821 * private data to this instance of the filter. The filter function
2822 * can recover the SV using the FILTER_DATA macro and use it to
2823 * store private buffers and state information.
2824 *
2825 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2826 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2827 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2828 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2829 * private use must be set using malloc'd pointers.
2830 */
16d20bd9
AD
2831
2832SV *
864dbfa3 2833Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2834{
97aff369 2835 dVAR;
f4c556ac 2836 if (!funcp)
a0714e2c 2837 return NULL;
f4c556ac 2838
5486870f
DM
2839 if (!PL_parser)
2840 return NULL;
2841
3280af22
NIS
2842 if (!PL_rsfp_filters)
2843 PL_rsfp_filters = newAV();
16d20bd9 2844 if (!datasv)
561b68a9 2845 datasv = newSV(0);
862a34c6 2846 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2847 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2848 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2849 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2850 FPTR2DPTR(void *, IoANY(datasv)),
2851 SvPV_nolen(datasv)));
3280af22
NIS
2852 av_unshift(PL_rsfp_filters, 1);
2853 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2854 return(datasv);
2855}
4e553d73 2856
16d20bd9
AD
2857
2858/* Delete most recently added instance of this filter function. */
a0d0e21e 2859void
864dbfa3 2860Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2861{
97aff369 2862 dVAR;
e0c19803 2863 SV *datasv;
24801a4b 2864
7918f24d
NC
2865 PERL_ARGS_ASSERT_FILTER_DEL;
2866
33073adb 2867#ifdef DEBUGGING
55662e27
JH
2868 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2869 FPTR2DPTR(void*, funcp)));
33073adb 2870#endif
5486870f 2871 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2872 return;
2873 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2874 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2875 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2876 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2877 IoANY(datasv) = (void *)NULL;
3280af22 2878 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2879
16d20bd9
AD
2880 return;
2881 }
2882 /* we need to search for the correct entry and clear it */
cea2e8a9 2883 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2884}
2885
2886
1de9afcd
RGS
2887/* Invoke the idxth filter function for the current rsfp. */
2888/* maxlen 0 = read one text line */
16d20bd9 2889I32
864dbfa3 2890Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2891{
97aff369 2892 dVAR;
16d20bd9
AD
2893 filter_t funcp;
2894 SV *datasv = NULL;
f482118e
NC
2895 /* This API is bad. It should have been using unsigned int for maxlen.
2896 Not sure if we want to change the API, but if not we should sanity
2897 check the value here. */
39cd7a59
NC
2898 const unsigned int correct_length
2899 = maxlen < 0 ?
2900#ifdef PERL_MICRO
2901 0x7FFFFFFF
2902#else
2903 INT_MAX
2904#endif
2905 : maxlen;
e50aee73 2906
7918f24d
NC
2907 PERL_ARGS_ASSERT_FILTER_READ;
2908
5486870f 2909 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 2910 return -1;
1de9afcd 2911 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2912 /* Provide a default input filter to make life easy. */
2913 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2914 DEBUG_P(PerlIO_printf(Perl_debug_log,
2915 "filter_read %d: from rsfp\n", idx));
f482118e 2916 if (correct_length) {
16d20bd9
AD
2917 /* Want a block */
2918 int len ;
f54cb97a 2919 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2920
2921 /* ensure buf_sv is large enough */
881d8f0a 2922 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
2923 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2924 correct_length)) <= 0) {
3280af22 2925 if (PerlIO_error(PL_rsfp))
37120919
AD
2926 return -1; /* error */
2927 else
2928 return 0 ; /* end of file */
2929 }
16d20bd9 2930 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 2931 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
2932 } else {
2933 /* Want a line */
3280af22
NIS
2934 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2935 if (PerlIO_error(PL_rsfp))
37120919
AD
2936 return -1; /* error */
2937 else
2938 return 0 ; /* end of file */
2939 }
16d20bd9
AD
2940 }
2941 return SvCUR(buf_sv);
2942 }
2943 /* Skip this filter slot if filter has been deleted */
1de9afcd 2944 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2945 DEBUG_P(PerlIO_printf(Perl_debug_log,
2946 "filter_read %d: skipped (filter deleted)\n",
2947 idx));
f482118e 2948 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2949 }
2950 /* Get function pointer hidden within datasv */
8141890a 2951 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2952 DEBUG_P(PerlIO_printf(Perl_debug_log,
2953 "filter_read %d: via function %p (%s)\n",
ca0270c4 2954 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2955 /* Call function. The function is expected to */
2956 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2957 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2958 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2959}
2960
76e3520e 2961STATIC char *
5cc814fd 2962S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 2963{
97aff369 2964 dVAR;
7918f24d
NC
2965
2966 PERL_ARGS_ASSERT_FILTER_GETS;
2967
c39cd008 2968#ifdef PERL_CR_FILTER
3280af22 2969 if (!PL_rsfp_filters) {
c39cd008 2970 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2971 }
2972#endif
3280af22 2973 if (PL_rsfp_filters) {
55497cff 2974 if (!append)
2975 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2976 if (FILTER_READ(0, sv, 0) > 0)
2977 return ( SvPVX(sv) ) ;
2978 else
bd61b366 2979 return NULL ;
16d20bd9 2980 }
9d116dd7 2981 else
5cc814fd 2982 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
2983}
2984
01ec43d0 2985STATIC HV *
9bde8eb0 2986S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 2987{
97aff369 2988 dVAR;
def3634b
GS
2989 GV *gv;
2990
7918f24d
NC
2991 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
2992
01ec43d0 2993 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2994 return PL_curstash;
2995
2996 if (len > 2 &&
2997 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2998 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2999 {
3000 return GvHV(gv); /* Foo:: */
def3634b
GS
3001 }
3002
3003 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3004 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3005 if (gv && GvCV(gv)) {
3006 SV * const sv = cv_const_sv(GvCV(gv));
3007 if (sv)
9bde8eb0 3008 pkgname = SvPV_const(sv, len);
def3634b
GS
3009 }
3010
9bde8eb0 3011 return gv_stashpvn(pkgname, len, 0);
def3634b 3012}
a0d0e21e 3013
e3f73d4e
RGS
3014/*
3015 * S_readpipe_override
3016 * Check whether readpipe() is overriden, and generates the appropriate
3017 * optree, provided sublex_start() is called afterwards.
3018 */
3019STATIC void
1d51329b 3020S_readpipe_override(pTHX)
e3f73d4e
RGS
3021{
3022 GV **gvp;
3023 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 3024 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
3025 if ((gv_readpipe
3026 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3027 ||
3028 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 3029 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
3030 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3031 {
3032 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3033 append_elem(OP_LIST,
3034 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3035 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3036 }
e3f73d4e
RGS
3037}
3038
5db06880
NC
3039#ifdef PERL_MAD
3040 /*
3041 * Perl_madlex
3042 * The intent of this yylex wrapper is to minimize the changes to the
3043 * tokener when we aren't interested in collecting madprops. It remains
3044 * to be seen how successful this strategy will be...
3045 */
3046
3047int
3048Perl_madlex(pTHX)
3049{
3050 int optype;
3051 char *s = PL_bufptr;
3052
cd81e915
NC
3053 /* make sure PL_thiswhite is initialized */
3054 PL_thiswhite = 0;
3055 PL_thismad = 0;
5db06880 3056
cd81e915 3057 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
3058 if (PL_pending_ident)
3059 return S_pending_ident(aTHX);
3060
3061 /* previous token ate up our whitespace? */
cd81e915
NC
3062 if (!PL_lasttoke && PL_nextwhite) {
3063 PL_thiswhite = PL_nextwhite;
3064 PL_nextwhite = 0;
5db06880
NC
3065 }
3066
3067 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3068 PL_realtokenstart = -1;
3069 PL_thistoken = 0;
5db06880
NC
3070 optype = yylex();
3071 s = PL_bufptr;
cd81e915 3072 assert(PL_curforce < 0);
5db06880 3073
cd81e915
NC
3074 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3075 if (!PL_thistoken) {
3076 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3077 PL_thistoken = newSVpvs("");
5db06880 3078 else {
c35e046a 3079 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3080 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3081 }
3082 }
cd81e915
NC
3083 if (PL_thismad) /* install head */
3084 CURMAD('X', PL_thistoken);
5db06880
NC
3085 }
3086
3087 /* last whitespace of a sublex? */
cd81e915
NC
3088 if (optype == ')' && PL_endwhite) {
3089 CURMAD('X', PL_endwhite);
5db06880
NC
3090 }
3091
cd81e915 3092 if (!PL_thismad) {
5db06880
NC
3093
3094 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3095 if (!PL_thiswhite && !PL_endwhite && !optype) {
3096 sv_free(PL_thistoken);
3097 PL_thistoken = 0;
5db06880
NC
3098 return 0;
3099 }
3100
3101 /* put off final whitespace till peg */
3102 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3103 PL_nextwhite = PL_thiswhite;
3104 PL_thiswhite = 0;
5db06880 3105 }
cd81e915
NC
3106 else if (PL_thisopen) {
3107 CURMAD('q', PL_thisopen);
3108 if (PL_thistoken)
3109 sv_free(PL_thistoken);
3110 PL_thistoken = 0;
5db06880
NC
3111 }
3112 else {
3113 /* Store actual token text as madprop X */
cd81e915 3114 CURMAD('X', PL_thistoken);
5db06880
NC
3115 }
3116
cd81e915 3117 if (PL_thiswhite) {
5db06880 3118 /* add preceding whitespace as madprop _ */
cd81e915 3119 CURMAD('_', PL_thiswhite);
5db06880
NC
3120 }
3121
cd81e915 3122 if (PL_thisstuff) {
5db06880 3123 /* add quoted material as madprop = */
cd81e915 3124 CURMAD('=', PL_thisstuff);
5db06880
NC
3125 }
3126
cd81e915 3127 if (PL_thisclose) {
5db06880 3128 /* add terminating quote as madprop Q */
cd81e915 3129 CURMAD('Q', PL_thisclose);
5db06880
NC
3130 }
3131 }
3132
3133 /* special processing based on optype */
3134
3135 switch (optype) {
3136
3137 /* opval doesn't need a TOKEN since it can already store mp */
3138 case WORD:
3139 case METHOD:
3140 case FUNCMETH:
3141 case THING:
3142 case PMFUNC:
3143 case PRIVATEREF:
3144 case FUNC0SUB:
3145 case UNIOPSUB:
3146 case LSTOPSUB:
6154021b
RGS
3147 if (pl_yylval.opval)
3148 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 3149 PL_thismad = 0;
5db06880
NC
3150 return optype;
3151
3152 /* fake EOF */
3153 case 0:
3154 optype = PEG;
cd81e915
NC
3155 if (PL_endwhite) {
3156 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3157 PL_endwhite = 0;
5db06880
NC
3158 }
3159 break;
3160
3161 case ']':
3162 case '}':
cd81e915 3163 if (PL_faketokens)
5db06880
NC
3164 break;
3165 /* remember any fake bracket that lexer is about to discard */
3166 if (PL_lex_brackets == 1 &&
3167 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3168 {
3169 s = PL_bufptr;
3170 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3171 s++;
3172 if (*s == '}') {
cd81e915
NC
3173 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3174 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3175 PL_thiswhite = 0;
5db06880
NC
3176 PL_bufptr = s - 1;
3177 break; /* don't bother looking for trailing comment */
3178 }
3179 else
3180 s = PL_bufptr;
3181 }
3182 if (optype == ']')
3183 break;
3184 /* FALLTHROUGH */
3185
3186 /* attach a trailing comment to its statement instead of next token */
3187 case ';':
cd81e915 3188 if (PL_faketokens)
5db06880
NC
3189 break;
3190 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3191 s = PL_bufptr;
3192 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3193 s++;
3194 if (*s == '\n' || *s == '#') {
3195 while (s < PL_bufend && *s != '\n')
3196 s++;
3197 if (s < PL_bufend)
3198 s++;
cd81e915
NC
3199 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3200 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3201 PL_thiswhite = 0;
5db06880
NC
3202 PL_bufptr = s;
3203 }
3204 }
3205 break;
3206
3207 /* pval */
3208 case LABEL:
3209 break;
3210
3211 /* ival */
3212 default:
3213 break;
3214
3215 }
3216
3217 /* Create new token struct. Note: opvals return early above. */
6154021b 3218 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 3219 PL_thismad = 0;
5db06880
NC
3220 return optype;
3221}
3222#endif
3223
468aa647 3224STATIC char *
cc6ed77d 3225S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3226 dVAR;
7918f24d
NC
3227
3228 PERL_ARGS_ASSERT_TOKENIZE_USE;
3229
468aa647
RGS
3230 if (PL_expect != XSTATE)
3231 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3232 is_use ? "use" : "no"));
29595ff2 3233 s = SKIPSPACE1(s);
468aa647
RGS
3234 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3235 s = force_version(s, TRUE);
29595ff2 3236 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3237 start_force(PL_curforce);
9ded7720 3238 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3239 force_next(WORD);
3240 }
3241 else if (*s == 'v') {
3242 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3243 s = force_version(s, FALSE);
3244 }
3245 }
3246 else {
3247 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3248 s = force_version(s, FALSE);
3249 }
6154021b 3250 pl_yylval.ival = is_use;
468aa647
RGS
3251 return s;
3252}
748a9306 3253#ifdef DEBUGGING
27da23d5 3254 static const char* const exp_name[] =
09bef843 3255 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3256 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3257 };
748a9306 3258#endif
463ee0b2 3259
02aa26ce
NT
3260/*
3261 yylex
3262
3263 Works out what to call the token just pulled out of the input
3264 stream. The yacc parser takes care of taking the ops we return and
3265 stitching them into a tree.
3266
3267 Returns:
3268 PRIVATEREF
3269
3270 Structure:
3271 if read an identifier
3272 if we're in a my declaration
3273 croak if they tried to say my($foo::bar)
3274 build the ops for a my() declaration
3275 if it's an access to a my() variable
3276 are we in a sort block?
3277 croak if my($a); $a <=> $b
3278 build ops for access to a my() variable
3279 if in a dq string, and they've said @foo and we can't find @foo
3280 croak
3281 build ops for a bareword
3282 if we already built the token before, use it.
3283*/
3284
20141f0e 3285
dba4d153
JH
3286#ifdef __SC__
3287#pragma segment Perl_yylex
3288#endif
dba4d153 3289int
dba4d153 3290Perl_yylex(pTHX)
20141f0e 3291{
97aff369 3292 dVAR;
3afc138a 3293 register char *s = PL_bufptr;
378cc40b 3294 register char *d;
463ee0b2 3295 STRLEN len;
aa7440fb 3296 bool bof = FALSE;
a687059c 3297
10edeb5d
JH
3298 /* orig_keyword, gvp, and gv are initialized here because
3299 * jump to the label just_a_word_zero can bypass their
3300 * initialization later. */
3301 I32 orig_keyword = 0;
3302 GV *gv = NULL;
3303 GV **gvp = NULL;
3304
bbf60fe6 3305 DEBUG_T( {
396482e1 3306 SV* tmp = newSVpvs("");
b6007c36
DM
3307 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3308 (IV)CopLINE(PL_curcop),
3309 lex_state_names[PL_lex_state],
3310 exp_name[PL_expect],
3311 pv_display(tmp, s, strlen(s), 0, 60));
3312 SvREFCNT_dec(tmp);
bbf60fe6 3313 } );
02aa26ce 3314 /* check if there's an identifier for us to look at */
ba979b31 3315 if (PL_pending_ident)
bbf60fe6 3316 return REPORT(S_pending_ident(aTHX));
bbce6d69 3317
02aa26ce
NT
3318 /* no identifier pending identification */
3319
3280af22 3320 switch (PL_lex_state) {
79072805
LW
3321#ifdef COMMENTARY
3322 case LEX_NORMAL: /* Some compilers will produce faster */
3323 case LEX_INTERPNORMAL: /* code if we comment these out. */
3324 break;
3325#endif
3326
09bef843 3327 /* when we've already built the next token, just pull it out of the queue */
79072805 3328 case LEX_KNOWNEXT:
5db06880
NC
3329#ifdef PERL_MAD
3330 PL_lasttoke--;
6154021b 3331 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 3332 if (PL_madskills) {
cd81e915 3333 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3334 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 3335 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 3336 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
3337 PL_thismad->mad_val = 0;
3338 mad_free(PL_thismad);
3339 PL_thismad = 0;
5db06880
NC
3340 }
3341 }
3342 if (!PL_lasttoke) {
3343 PL_lex_state = PL_lex_defer;
3344 PL_expect = PL_lex_expect;
3345 PL_lex_defer = LEX_NORMAL;
3346 if (!PL_nexttoke[PL_lasttoke].next_type)
3347 return yylex();
3348 }
3349#else
3280af22 3350 PL_nexttoke--;
6154021b 3351 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3352 if (!PL_nexttoke) {
3353 PL_lex_state = PL_lex_defer;
3354 PL_expect = PL_lex_expect;
3355 PL_lex_defer = LEX_NORMAL;
463ee0b2 3356 }
5db06880
NC
3357#endif
3358#ifdef PERL_MAD
3359 /* FIXME - can these be merged? */
3360 return(PL_nexttoke[PL_lasttoke].next_type);
3361#else
bbf60fe6 3362 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3363#endif
79072805 3364
02aa26ce 3365 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3366 when we get here, PL_bufptr is at the \
02aa26ce 3367 */
79072805
LW
3368 case LEX_INTERPCASEMOD:
3369#ifdef DEBUGGING
3280af22 3370 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3371 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3372#endif
02aa26ce 3373 /* handle \E or end of string */
3280af22 3374 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3375 /* if at a \E */
3280af22 3376 if (PL_lex_casemods) {
f54cb97a 3377 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3378 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3379
3792a11b
NC
3380 if (PL_bufptr != PL_bufend
3381 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3382 PL_bufptr += 2;
3383 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3384#ifdef PERL_MAD
3385 if (PL_madskills)
6b29d1f5 3386 PL_thistoken = newSVpvs("\\E");
5db06880 3387#endif
a0d0e21e 3388 }
bbf60fe6 3389 return REPORT(')');
79072805 3390 }
5db06880
NC
3391#ifdef PERL_MAD
3392 while (PL_bufptr != PL_bufend &&
3393 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3394 if (!PL_thiswhite)
6b29d1f5 3395 PL_thiswhite = newSVpvs("");
cd81e915 3396 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3397 PL_bufptr += 2;
3398 }
3399#else
3280af22
NIS
3400 if (PL_bufptr != PL_bufend)
3401 PL_bufptr += 2;
5db06880 3402#endif
3280af22 3403 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3404 return yylex();
79072805
LW
3405 }
3406 else {
607df283 3407 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3408 "### Saw case modifier\n"); });
3280af22 3409 s = PL_bufptr + 1;
6e909404 3410 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3411#ifdef PERL_MAD
cd81e915 3412 if (!PL_thiswhite)
6b29d1f5 3413 PL_thiswhite = newSVpvs("");
cd81e915 3414 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3415#endif
89122651 3416 PL_bufptr = s + 3;
6e909404
JH
3417 PL_lex_state = LEX_INTERPCONCAT;
3418 return yylex();
a0d0e21e 3419 }
6e909404 3420 else {
90771dc0 3421 I32 tmp;
5db06880
NC
3422 if (!PL_madskills) /* when just compiling don't need correct */
3423 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3424 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3425 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3426 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3427 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3428 return REPORT(')');
6e909404
JH
3429 }
3430 if (PL_lex_casemods > 10)
3431 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3432 PL_lex_casestack[PL_lex_casemods++] = *s;
3433 PL_lex_casestack[PL_lex_casemods] = '\0';
3434 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3435 start_force(PL_curforce);
9ded7720 3436 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3437 force_next('(');
cd81e915 3438 start_force(PL_curforce);
6e909404 3439 if (*s == 'l')
9ded7720 3440 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3441 else if (*s == 'u')
9ded7720 3442 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3443 else if (*s == 'L')
9ded7720 3444 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3445 else if (*s == 'U')
9ded7720 3446 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3447 else if (*s == 'Q')
9ded7720 3448 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3449 else
3450 Perl_croak(aTHX_ "panic: yylex");
5db06880 3451 if (PL_madskills) {
a5849ce5
NC
3452 SV* const tmpsv = newSVpvs("\\ ");
3453 /* replace the space with the character we want to escape
3454 */
3455 SvPVX(tmpsv)[1] = *s;
5db06880
NC
3456 curmad('_', tmpsv);
3457 }
6e909404 3458 PL_bufptr = s + 1;
a0d0e21e 3459 }
79072805 3460 force_next(FUNC);
3280af22
NIS
3461 if (PL_lex_starts) {
3462 s = PL_bufptr;
3463 PL_lex_starts = 0;
5db06880
NC
3464#ifdef PERL_MAD
3465 if (PL_madskills) {
cd81e915
NC
3466 if (PL_thistoken)
3467 sv_free(PL_thistoken);
6b29d1f5 3468 PL_thistoken = newSVpvs("");
5db06880
NC
3469 }
3470#endif
131b3ad0
DM
3471 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3472 if (PL_lex_casemods == 1 && PL_lex_inpat)
3473 OPERATOR(',');
3474 else
3475 Aop(OP_CONCAT);
79072805
LW
3476 }
3477 else
cea2e8a9 3478 return yylex();
79072805
LW
3479 }
3480
55497cff 3481 case LEX_INTERPPUSH:
bbf60fe6 3482 return REPORT(sublex_push());
55497cff 3483
79072805 3484 case LEX_INTERPSTART:
3280af22 3485 if (PL_bufptr == PL_bufend)
bbf60fe6 3486 return REPORT(sublex_done());
607df283 3487 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3488 "### Interpolated variable\n"); });
3280af22
NIS
3489 PL_expect = XTERM;
3490 PL_lex_dojoin = (*PL_bufptr == '@');
3491 PL_lex_state = LEX_INTERPNORMAL;
3492 if (PL_lex_dojoin) {
cd81e915 3493 start_force(PL_curforce);
9ded7720 3494 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3495 force_next(',');
cd81e915 3496 start_force(PL_curforce);
a0d0e21e 3497 force_ident("\"", '$');
cd81e915 3498 start_force(PL_curforce);
9ded7720 3499 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3500 force_next('$');
cd81e915 3501 start_force(PL_curforce);
9ded7720 3502 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3503 force_next('(');
cd81e915 3504 start_force(PL_curforce);
9ded7720 3505 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3506 force_next(FUNC);
3507 }
3280af22
NIS
3508 if (PL_lex_starts++) {
3509 s = PL_bufptr;
5db06880
NC
3510#ifdef PERL_MAD
3511 if (PL_madskills) {
cd81e915
NC
3512 if (PL_thistoken)
3513 sv_free(PL_thistoken);
6b29d1f5 3514 PL_thistoken = newSVpvs("");
5db06880
NC
3515 }
3516#endif
131b3ad0
DM
3517 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3518 if (!PL_lex_casemods && PL_lex_inpat)
3519 OPERATOR(',');
3520 else
3521 Aop(OP_CONCAT);
79072805 3522 }
cea2e8a9 3523 return yylex();
79072805
LW
3524
3525 case LEX_INTERPENDMAYBE:
3280af22
NIS
3526 if (intuit_more(PL_bufptr)) {
3527 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3528 break;
3529 }
3530 /* FALL THROUGH */
3531
3532 case LEX_INTERPEND:
3280af22
NIS