This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Correct the paramter to Perl_op_xmldump(). The one that got away from
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
6ef55633 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
eb0d8d16
NC
26#define new_constant(a,b,c,d,e,f,g) \
27 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
28
6154021b 29#define pl_yylval (PL_parser->yylval)
d3b6f988 30
acdf0a21
DM
31/* YYINITDEPTH -- initial size of the parser's stacks. */
32#define YYINITDEPTH 200
33
199e78b7
DM
34/* XXX temporary backwards compatibility */
35#define PL_lex_brackets (PL_parser->lex_brackets)
36#define PL_lex_brackstack (PL_parser->lex_brackstack)
37#define PL_lex_casemods (PL_parser->lex_casemods)
38#define PL_lex_casestack (PL_parser->lex_casestack)
39#define PL_lex_defer (PL_parser->lex_defer)
40#define PL_lex_dojoin (PL_parser->lex_dojoin)
41#define PL_lex_expect (PL_parser->lex_expect)
42#define PL_lex_formbrack (PL_parser->lex_formbrack)
43#define PL_lex_inpat (PL_parser->lex_inpat)
44#define PL_lex_inwhat (PL_parser->lex_inwhat)
45#define PL_lex_op (PL_parser->lex_op)
46#define PL_lex_repl (PL_parser->lex_repl)
47#define PL_lex_starts (PL_parser->lex_starts)
48#define PL_lex_stuff (PL_parser->lex_stuff)
49#define PL_multi_start (PL_parser->multi_start)
50#define PL_multi_open (PL_parser->multi_open)
51#define PL_multi_close (PL_parser->multi_close)
52#define PL_pending_ident (PL_parser->pending_ident)
53#define PL_preambled (PL_parser->preambled)
54#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 55#define PL_linestr (PL_parser->linestr)
c2598295
DM
56#define PL_expect (PL_parser->expect)
57#define PL_copline (PL_parser->copline)
f06b5848
DM
58#define PL_bufptr (PL_parser->bufptr)
59#define PL_oldbufptr (PL_parser->oldbufptr)
60#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
61#define PL_linestart (PL_parser->linestart)
62#define PL_bufend (PL_parser->bufend)
63#define PL_last_uni (PL_parser->last_uni)
64#define PL_last_lop (PL_parser->last_lop)
65#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 66#define PL_lex_state (PL_parser->lex_state)
2f9285f8 67#define PL_rsfp (PL_parser->rsfp)
5486870f 68#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
69#define PL_in_my (PL_parser->in_my)
70#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 71#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 72#define PL_multi_end (PL_parser->multi_end)
13765c85 73#define PL_error_count (PL_parser->error_count)
199e78b7
DM
74
75#ifdef PERL_MAD
76# define PL_endwhite (PL_parser->endwhite)
77# define PL_faketokens (PL_parser->faketokens)
78# define PL_lasttoke (PL_parser->lasttoke)
79# define PL_nextwhite (PL_parser->nextwhite)
80# define PL_realtokenstart (PL_parser->realtokenstart)
81# define PL_skipwhite (PL_parser->skipwhite)
82# define PL_thisclose (PL_parser->thisclose)
83# define PL_thismad (PL_parser->thismad)
84# define PL_thisopen (PL_parser->thisopen)
85# define PL_thisstuff (PL_parser->thisstuff)
86# define PL_thistoken (PL_parser->thistoken)
87# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
88# define PL_thiswhite (PL_parser->thiswhite)
89# define PL_nexttoke (PL_parser->nexttoke)
90# define PL_curforce (PL_parser->curforce)
91#else
92# define PL_nexttoke (PL_parser->nexttoke)
93# define PL_nexttype (PL_parser->nexttype)
94# define PL_nextval (PL_parser->nextval)
199e78b7
DM
95#endif
96
3cbf51f5
DM
97static int
98S_pending_ident(pTHX);
199e78b7 99
0bd48802 100static const char ident_too_long[] = "Identifier too long";
c445ea15 101static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 102
6e3aabd6 103#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
104static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
105static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 106#endif
51371543 107
29595ff2 108#ifdef PERL_MAD
29595ff2 109# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 110# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 111#else
5db06880 112# define CURMAD(slot,sv)
9ded7720 113# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
114#endif
115
9059aa12
LW
116#define XFAKEBRACK 128
117#define XENUMMASK 127
118
39e02b42
JH
119#ifdef USE_UTF8_SCRIPTS
120# define UTF (!IN_BYTES)
2b9d42f0 121#else
746b446a 122# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 123#endif
a0ed51b3 124
61f0cdd9 125/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
126 * 1999-02-27 mjd-perl-patch@plover.com */
127#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
128
bf4acbe4
GS
129/* On MacOS, respect nonbreaking spaces */
130#ifdef MACOS_TRADITIONAL
131#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
132#else
133#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
134#endif
135
ffb4593c
NT
136/* LEX_* are values for PL_lex_state, the state of the lexer.
137 * They are arranged oddly so that the guard on the switch statement
79072805
LW
138 * can get by with a single comparison (if the compiler is smart enough).
139 */
140
fb73857a
PP
141/* #define LEX_NOTPARSING 11 is done in perl.h. */
142
b6007c36
DM
143#define LEX_NORMAL 10 /* normal code (ie not within "...") */
144#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
145#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
146#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
147#define LEX_INTERPSTART 6 /* expecting the start of a $var */
148
149 /* at end of code, eg "$x" followed by: */
150#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
151#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
152
153#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
154 string or after \E, $foo, etc */
155#define LEX_INTERPCONST 2 /* NOT USED */
156#define LEX_FORMLINE 1 /* expecting a format line */
157#define LEX_KNOWNEXT 0 /* next token known; just return it */
158
79072805 159
bbf60fe6 160#ifdef DEBUGGING
27da23d5 161static const char* const lex_state_names[] = {
bbf60fe6
DM
162 "KNOWNEXT",
163 "FORMLINE",
164 "INTERPCONST",
165 "INTERPCONCAT",
166 "INTERPENDMAYBE",
167 "INTERPEND",
168 "INTERPSTART",
169 "INTERPPUSH",
170 "INTERPCASEMOD",
171 "INTERPNORMAL",
172 "NORMAL"
173};
174#endif
175
79072805
LW
176#ifdef ff_next
177#undef ff_next
d48672a2
LW
178#endif
179
79072805 180#include "keywords.h"
fe14fcc3 181
ffb4593c
NT
182/* CLINE is a macro that ensures PL_copline has a sane value */
183
ae986130
LW
184#ifdef CLINE
185#undef CLINE
186#endif
57843af0 187#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 188
5db06880 189#ifdef PERL_MAD
29595ff2
NC
190# define SKIPSPACE0(s) skipspace0(s)
191# define SKIPSPACE1(s) skipspace1(s)
192# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
193# define PEEKSPACE(s) skipspace2(s,0)
194#else
195# define SKIPSPACE0(s) skipspace(s)
196# define SKIPSPACE1(s) skipspace(s)
197# define SKIPSPACE2(s,tsv) skipspace(s)
198# define PEEKSPACE(s) skipspace(s)
199#endif
200
ffb4593c
NT
201/*
202 * Convenience functions to return different tokens and prime the
9cbb5ea2 203 * lexer for the next token. They all take an argument.
ffb4593c
NT
204 *
205 * TOKEN : generic token (used for '(', DOLSHARP, etc)
206 * OPERATOR : generic operator
207 * AOPERATOR : assignment operator
208 * PREBLOCK : beginning the block after an if, while, foreach, ...
209 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
210 * PREREF : *EXPR where EXPR is not a simple identifier
211 * TERM : expression term
212 * LOOPX : loop exiting command (goto, last, dump, etc)
213 * FTST : file test operator
214 * FUN0 : zero-argument function
2d2e263d 215 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
216 * BOop : bitwise or or xor
217 * BAop : bitwise and
218 * SHop : shift operator
219 * PWop : power operator
9cbb5ea2 220 * PMop : pattern-matching operator
ffb4593c
NT
221 * Aop : addition-level operator
222 * Mop : multiplication-level operator
223 * Eop : equality-testing operator
e5edeb50 224 * Rop : relational operator <= != gt
ffb4593c
NT
225 *
226 * Also see LOP and lop() below.
227 */
228
998054bd 229#ifdef DEBUGGING /* Serve -DT. */
704d4215 230# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 231#else
bbf60fe6 232# define REPORT(retval) (retval)
998054bd
SC
233#endif
234
bbf60fe6
DM
235#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
236#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
237#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
238#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
239#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
240#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
241#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
242#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
243#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
244#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
245#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
246#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
247#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
248#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
249#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
250#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
251#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
252#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
253#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
254#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 255
a687059c
LW
256/* This bit of chicanery makes a unary function followed by
257 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
258 * The UNIDOR macro is for unary functions that can be followed by the //
259 * operator (such as C<shift // 0>).
a687059c 260 */
376fcdbf 261#define UNI2(f,x) { \
6154021b 262 pl_yylval.ival = f; \
376fcdbf
AL
263 PL_expect = x; \
264 PL_bufptr = s; \
265 PL_last_uni = PL_oldbufptr; \
266 PL_last_lop_op = f; \
267 if (*s == '(') \
268 return REPORT( (int)FUNC1 ); \
29595ff2 269 s = PEEKSPACE(s); \
376fcdbf
AL
270 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
271 }
6f33ba73
RGS
272#define UNI(f) UNI2(f,XTERM)
273#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 274
376fcdbf 275#define UNIBRACK(f) { \
6154021b 276 pl_yylval.ival = f; \
376fcdbf
AL
277 PL_bufptr = s; \
278 PL_last_uni = PL_oldbufptr; \
279 if (*s == '(') \
280 return REPORT( (int)FUNC1 ); \
29595ff2 281 s = PEEKSPACE(s); \
376fcdbf
AL
282 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
283 }
79072805 284
9f68db38 285/* grandfather return to old style */
6154021b 286#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 287
8fa7f367
JH
288#ifdef DEBUGGING
289
6154021b 290/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
291enum token_type {
292 TOKENTYPE_NONE,
293 TOKENTYPE_IVAL,
6154021b 294 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
295 TOKENTYPE_PVAL,
296 TOKENTYPE_OPVAL,
297 TOKENTYPE_GVVAL
298};
299
6d4a66ac
NC
300static struct debug_tokens {
301 const int token;
302 enum token_type type;
303 const char *name;
304} const debug_tokens[] =
9041c2e3 305{
bbf60fe6
DM
306 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
307 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
308 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
309 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
310 { ARROW, TOKENTYPE_NONE, "ARROW" },
311 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
312 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
313 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
314 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
315 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 316 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
317 { DO, TOKENTYPE_NONE, "DO" },
318 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
319 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
320 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
321 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
322 { ELSE, TOKENTYPE_NONE, "ELSE" },
323 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
324 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
325 { FOR, TOKENTYPE_IVAL, "FOR" },
326 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
327 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
328 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
329 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
330 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
331 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 332 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
333 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
334 { IF, TOKENTYPE_IVAL, "IF" },
335 { LABEL, TOKENTYPE_PVAL, "LABEL" },
336 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
337 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
338 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
339 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
340 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
341 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
342 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
343 { MY, TOKENTYPE_IVAL, "MY" },
344 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
345 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
346 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
347 { OROP, TOKENTYPE_IVAL, "OROP" },
348 { OROR, TOKENTYPE_NONE, "OROR" },
349 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
350 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
351 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
352 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
353 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
354 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
355 { PREINC, TOKENTYPE_NONE, "PREINC" },
356 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
357 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
358 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
359 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
360 { SUB, TOKENTYPE_NONE, "SUB" },
361 { THING, TOKENTYPE_OPVAL, "THING" },
362 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
363 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
364 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
365 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
366 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
367 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 368 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
369 { WHILE, TOKENTYPE_IVAL, "WHILE" },
370 { WORD, TOKENTYPE_OPVAL, "WORD" },
c35e046a 371 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
372};
373
6154021b 374/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 375
bbf60fe6 376STATIC int
704d4215 377S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 378{
97aff369 379 dVAR;
bbf60fe6 380 if (DEBUG_T_TEST) {
bd61b366 381 const char *name = NULL;
bbf60fe6 382 enum token_type type = TOKENTYPE_NONE;
f54cb97a 383 const struct debug_tokens *p;
396482e1 384 SV* const report = newSVpvs("<== ");
bbf60fe6 385
f54cb97a 386 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
387 if (p->token == (int)rv) {
388 name = p->name;
389 type = p->type;
390 break;
391 }
392 }
393 if (name)
54667de8 394 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
395 else if ((char)rv > ' ' && (char)rv < '~')
396 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
397 else if (!rv)
396482e1 398 sv_catpvs(report, "EOF");
bbf60fe6
DM
399 else
400 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
401 switch (type) {
402 case TOKENTYPE_NONE:
403 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
404 break;
405 case TOKENTYPE_IVAL:
704d4215 406 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
407 break;
408 case TOKENTYPE_OPNUM:
409 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 410 PL_op_name[lvalp->ival]);
bbf60fe6
DM
411 break;
412 case TOKENTYPE_PVAL:
704d4215 413 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
414 break;
415 case TOKENTYPE_OPVAL:
704d4215 416 if (lvalp->opval) {
401441c0 417 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
418 PL_op_name[lvalp->opval->op_type]);
419 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 420 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 421 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
422 }
423
424 }
401441c0 425 else
396482e1 426 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
427 break;
428 }
b6007c36 429 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
430 };
431 return (int)rv;
998054bd
SC
432}
433
b6007c36
DM
434
435/* print the buffer with suitable escapes */
436
437STATIC void
15f169a1 438S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 439{
396482e1 440 SV* const tmp = newSVpvs("");
b6007c36
DM
441 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
442 SvREFCNT_dec(tmp);
443}
444
8fa7f367
JH
445#endif
446
ffb4593c
NT
447/*
448 * S_ao
449 *
c963b151
BD
450 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
451 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
452 */
453
76e3520e 454STATIC int
cea2e8a9 455S_ao(pTHX_ int toketype)
a0d0e21e 456{
97aff369 457 dVAR;
3280af22
NIS
458 if (*PL_bufptr == '=') {
459 PL_bufptr++;
a0d0e21e 460 if (toketype == ANDAND)
6154021b 461 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 462 else if (toketype == OROR)
6154021b 463 pl_yylval.ival = OP_ORASSIGN;
c963b151 464 else if (toketype == DORDOR)
6154021b 465 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
466 toketype = ASSIGNOP;
467 }
468 return toketype;
469}
470
ffb4593c
NT
471/*
472 * S_no_op
473 * When Perl expects an operator and finds something else, no_op
474 * prints the warning. It always prints "<something> found where
475 * operator expected. It prints "Missing semicolon on previous line?"
476 * if the surprise occurs at the start of the line. "do you need to
477 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
478 * where the compiler doesn't know if foo is a method call or a function.
479 * It prints "Missing operator before end of line" if there's nothing
480 * after the missing operator, or "... before <...>" if there is something
481 * after the missing operator.
482 */
483
76e3520e 484STATIC void
15f169a1 485S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 486{
97aff369 487 dVAR;
9d4ba2ae
AL
488 char * const oldbp = PL_bufptr;
489 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 490
1189a94a
GS
491 if (!s)
492 s = oldbp;
07c798fb 493 else
1189a94a 494 PL_bufptr = s;
cea2e8a9 495 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
496 if (ckWARN_d(WARN_SYNTAX)) {
497 if (is_first)
498 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
499 "\t(Missing semicolon on previous line?)\n");
500 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 501 const char *t;
c35e046a
AL
502 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
503 NOOP;
56da5a46
RGS
504 if (t < PL_bufptr && isSPACE(*t))
505 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
506 "\t(Do you need to predeclare %.*s?)\n",
551405c4 507 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
508 }
509 else {
510 assert(s >= oldbp);
511 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 512 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 513 }
07c798fb 514 }
3280af22 515 PL_bufptr = oldbp;
8990e307
LW
516}
517
ffb4593c
NT
518/*
519 * S_missingterm
520 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 521 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
522 * If we're in a delimited string and the delimiter is a control
523 * character, it's reformatted into a two-char sequence like ^C.
524 * This is fatal.
525 */
526
76e3520e 527STATIC void
cea2e8a9 528S_missingterm(pTHX_ char *s)
8990e307 529{
97aff369 530 dVAR;
8990e307
LW
531 char tmpbuf[3];
532 char q;
533 if (s) {
9d4ba2ae 534 char * const nl = strrchr(s,'\n');
d2719217 535 if (nl)
8990e307
LW
536 *nl = '\0';
537 }
9d116dd7
JH
538 else if (
539#ifdef EBCDIC
540 iscntrl(PL_multi_close)
541#else
542 PL_multi_close < 32 || PL_multi_close == 127
543#endif
544 ) {
8990e307 545 *tmpbuf = '^';
585ec06d 546 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
547 tmpbuf[2] = '\0';
548 s = tmpbuf;
549 }
550 else {
eb160463 551 *tmpbuf = (char)PL_multi_close;
8990e307
LW
552 tmpbuf[1] = '\0';
553 s = tmpbuf;
554 }
555 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 556 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 557}
79072805 558
ef89dcc3 559#define FEATURE_IS_ENABLED(name) \
0d863452 560 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 561 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b
NC
562/* The longest string we pass in. */
563#define MAX_FEATURE_LEN (sizeof("switch")-1)
564
0d863452
RH
565/*
566 * S_feature_is_enabled
567 * Check whether the named feature is enabled.
568 */
569STATIC bool
15f169a1 570S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 571{
97aff369 572 dVAR;
0d863452 573 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b
NC
574 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
575 assert(namelen <= MAX_FEATURE_LEN);
576 memcpy(&he_name[8], name, namelen);
d4c19fe8 577
7b9ef140 578 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
579}
580
ffb4593c
NT
581/*
582 * Perl_deprecate
ffb4593c
NT
583 */
584
79072805 585void
15f169a1 586Perl_deprecate(pTHX_ const char *const s)
a0d0e21e 587{
599cee73 588 if (ckWARN(WARN_DEPRECATED))
9014280d 589 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
590}
591
12bcd1a6 592void
15f169a1 593Perl_deprecate_old(pTHX_ const char *const s)
12bcd1a6
PM
594{
595 /* This function should NOT be called for any new deprecated warnings */
596 /* Use Perl_deprecate instead */
597 /* */
598 /* It is here to maintain backward compatibility with the pre-5.8 */
599 /* warnings category hierarchy. The "deprecated" category used to */
600 /* live under the "syntax" category. It is now a top-level category */
601 /* in its own right. */
602
603 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 604 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
605 "Use of %s is deprecated", s);
606}
607
ffb4593c 608/*
9cbb5ea2
GS
609 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
610 * utf16-to-utf8-reversed.
ffb4593c
NT
611 */
612
c39cd008
GS
613#ifdef PERL_CR_FILTER
614static void
615strip_return(SV *sv)
616{
95a20fc0 617 register const char *s = SvPVX_const(sv);
9d4ba2ae 618 register const char * const e = s + SvCUR(sv);
c39cd008
GS
619 /* outer loop optimized to do nothing if there are no CR-LFs */
620 while (s < e) {
621 if (*s++ == '\r' && *s == '\n') {
622 /* hit a CR-LF, need to copy the rest */
623 register char *d = s - 1;
624 *d++ = *s++;
625 while (s < e) {
626 if (*s == '\r' && s[1] == '\n')
627 s++;
628 *d++ = *s++;
629 }
630 SvCUR(sv) -= s - d;
631 return;
632 }
633 }
634}
a868473f 635
76e3520e 636STATIC I32
c39cd008 637S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 638{
f54cb97a 639 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
640 if (count > 0 && !maxlen)
641 strip_return(sv);
642 return count;
a868473f
NIS
643}
644#endif
645
199e78b7
DM
646
647
ffb4593c
NT
648/*
649 * Perl_lex_start
5486870f 650 *
e3abe207 651 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
652 *
653 * rsfp is the opened file handle to read from (if any),
654 *
655 * line holds any initial content already read from the file (or in
656 * the case of no file, such as an eval, the whole contents);
657 *
658 * new_filter indicates that this is a new file and it shouldn't inherit
659 * the filters from the current parser (ie require).
ffb4593c
NT
660 */
661
a0d0e21e 662void
5486870f 663Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 664{
97aff369 665 dVAR;
6ef55633 666 const char *s = NULL;
8990e307 667 STRLEN len;
5486870f 668 yy_parser *parser, *oparser;
acdf0a21
DM
669
670 /* create and initialise a parser */
671
199e78b7 672 Newxz(parser, 1, yy_parser);
5486870f 673 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
674 PL_parser = parser;
675
676 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
677 parser->ps = parser->stack;
678 parser->stack_size = YYINITDEPTH;
679
680 parser->stack->state = 0;
681 parser->yyerrstatus = 0;
682 parser->yychar = YYEMPTY; /* Cause a token to be read. */
683
e3abe207
DM
684 /* on scope exit, free this parser and restore any outer one */
685 SAVEPARSER(parser);
7c4baf47 686 parser->saved_curcop = PL_curcop;
e3abe207 687
acdf0a21 688 /* initialise lexer state */
8990e307 689
fb205e7a
DM
690#ifdef PERL_MAD
691 parser->curforce = -1;
692#else
693 parser->nexttoke = 0;
694#endif
ca4cfd28 695 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 696 parser->copline = NOLINE;
5afb0a62 697 parser->lex_state = LEX_NORMAL;
c2598295 698 parser->expect = XSTATE;
2f9285f8 699 parser->rsfp = rsfp;
56b27c9a 700 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
5486870f 701 : (AV*)SvREFCNT_inc(oparser->rsfp_filters);
2f9285f8 702
199e78b7
DM
703 Newx(parser->lex_brackstack, 120, char);
704 Newx(parser->lex_casestack, 12, char);
705 *parser->lex_casestack = '\0';
02b34bbe 706
10efb74f
NC
707 if (line) {
708 s = SvPV_const(line, len);
709 } else {
710 len = 0;
711 }
bdc0bf6f 712
10efb74f 713 if (!len) {
bdc0bf6f 714 parser->linestr = newSVpvs("\n;");
10efb74f 715 } else if (SvREADONLY(line) || s[len-1] != ';') {
bdc0bf6f 716 parser->linestr = newSVsv(line);
10efb74f 717 if (s[len-1] != ';')
bdc0bf6f 718 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
719 } else {
720 SvTEMP_off(line);
721 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 722 parser->linestr = line;
8990e307 723 }
f06b5848
DM
724 parser->oldoldbufptr =
725 parser->oldbufptr =
726 parser->bufptr =
727 parser->linestart = SvPVX(parser->linestr);
728 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
729 parser->last_lop = parser->last_uni = NULL;
79072805 730}
a687059c 731
e3abe207
DM
732
733/* delete a parser object */
734
735void
736Perl_parser_free(pTHX_ const yy_parser *parser)
737{
7c4baf47 738 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
739 SvREFCNT_dec(parser->linestr);
740
2f9285f8
DM
741 if (parser->rsfp == PerlIO_stdin())
742 PerlIO_clearerr(parser->rsfp);
743 else if (parser->rsfp && parser->old_parser
744 && parser->rsfp != parser->old_parser->rsfp)
745 PerlIO_close(parser->rsfp);
5486870f 746 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 747
e3abe207
DM
748 Safefree(parser->stack);
749 Safefree(parser->lex_brackstack);
750 Safefree(parser->lex_casestack);
751 PL_parser = parser->old_parser;
752 Safefree(parser);
753}
754
755
ffb4593c
NT
756/*
757 * Perl_lex_end
9cbb5ea2
GS
758 * Finalizer for lexing operations. Must be called when the parser is
759 * done with the lexer.
ffb4593c
NT
760 */
761
463ee0b2 762void
864dbfa3 763Perl_lex_end(pTHX)
463ee0b2 764{
97aff369 765 dVAR;
3280af22 766 PL_doextract = FALSE;
463ee0b2
LW
767}
768
ffb4593c
NT
769/*
770 * S_incline
771 * This subroutine has nothing to do with tilting, whether at windmills
772 * or pinball tables. Its name is short for "increment line". It
57843af0 773 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 774 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
775 * # line 500 "foo.pm"
776 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
777 */
778
76e3520e 779STATIC void
d9095cec 780S_incline(pTHX_ const char *s)
463ee0b2 781{
97aff369 782 dVAR;
d9095cec
NC
783 const char *t;
784 const char *n;
785 const char *e;
463ee0b2 786
57843af0 787 CopLINE_inc(PL_curcop);
463ee0b2
LW
788 if (*s++ != '#')
789 return;
d4c19fe8
AL
790 while (SPACE_OR_TAB(*s))
791 s++;
73659bf1
GS
792 if (strnEQ(s, "line", 4))
793 s += 4;
794 else
795 return;
084592ab 796 if (SPACE_OR_TAB(*s))
73659bf1 797 s++;
4e553d73 798 else
73659bf1 799 return;
d4c19fe8
AL
800 while (SPACE_OR_TAB(*s))
801 s++;
463ee0b2
LW
802 if (!isDIGIT(*s))
803 return;
d4c19fe8 804
463ee0b2
LW
805 n = s;
806 while (isDIGIT(*s))
807 s++;
bf4acbe4 808 while (SPACE_OR_TAB(*s))
463ee0b2 809 s++;
73659bf1 810 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 811 s++;
73659bf1
GS
812 e = t + 1;
813 }
463ee0b2 814 else {
c35e046a
AL
815 t = s;
816 while (!isSPACE(*t))
817 t++;
73659bf1 818 e = t;
463ee0b2 819 }
bf4acbe4 820 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
821 e++;
822 if (*e != '\n' && *e != '\0')
823 return; /* false alarm */
824
f4dd75d9 825 if (t - s > 0) {
d9095cec 826 const STRLEN len = t - s;
8a5ee598 827#ifndef USE_ITHREADS
19bad673
NC
828 SV *const temp_sv = CopFILESV(PL_curcop);
829 const char *cf;
830 STRLEN tmplen;
831
832 if (temp_sv) {
833 cf = SvPVX(temp_sv);
834 tmplen = SvCUR(temp_sv);
835 } else {
836 cf = NULL;
837 tmplen = 0;
838 }
839
42d9b98d 840 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
841 /* must copy *{"::_<(eval N)[oldfilename:L]"}
842 * to *{"::_<newfilename"} */
44867030
NC
843 /* However, the long form of evals is only turned on by the
844 debugger - usually they're "(eval %lu)" */
845 char smallbuf[128];
846 char *tmpbuf;
847 GV **gvp;
d9095cec 848 STRLEN tmplen2 = len;
798b63bc 849 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
850 tmpbuf = smallbuf;
851 else
2ae0db35 852 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
853 tmpbuf[0] = '_';
854 tmpbuf[1] = '<';
2ae0db35 855 memcpy(tmpbuf + 2, cf, tmplen);
44867030 856 tmplen += 2;
8a5ee598
RGS
857 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
858 if (gvp) {
44867030
NC
859 char *tmpbuf2;
860 GV *gv2;
861
862 if (tmplen2 + 2 <= sizeof smallbuf)
863 tmpbuf2 = smallbuf;
864 else
865 Newx(tmpbuf2, tmplen2 + 2, char);
866
867 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
868 /* Either they malloc'd it, or we malloc'd it,
869 so no prefix is present in ours. */
870 tmpbuf2[0] = '_';
871 tmpbuf2[1] = '<';
872 }
873
874 memcpy(tmpbuf2 + 2, s, tmplen2);
875 tmplen2 += 2;
876
8a5ee598 877 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 878 if (!isGV(gv2)) {
8a5ee598 879 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
880 /* adjust ${"::_<newfilename"} to store the new file name */
881 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
882 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
883 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
884 }
44867030
NC
885
886 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 887 }
e66cf94c 888 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 889 }
8a5ee598 890#endif
05ec9bb3 891 CopFILE_free(PL_curcop);
d9095cec 892 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 893 }
57843af0 894 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
895}
896
29595ff2 897#ifdef PERL_MAD
cd81e915 898/* skip space before PL_thistoken */
29595ff2
NC
899
900STATIC char *
901S_skipspace0(pTHX_ register char *s)
902{
903 s = skipspace(s);
904 if (!PL_madskills)
905 return s;
cd81e915
NC
906 if (PL_skipwhite) {
907 if (!PL_thiswhite)
6b29d1f5 908 PL_thiswhite = newSVpvs("");
cd81e915
NC
909 sv_catsv(PL_thiswhite, PL_skipwhite);
910 sv_free(PL_skipwhite);
911 PL_skipwhite = 0;
912 }
913 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
914 return s;
915}
916
cd81e915 917/* skip space after PL_thistoken */
29595ff2
NC
918
919STATIC char *
920S_skipspace1(pTHX_ register char *s)
921{
d4c19fe8 922 const char *start = s;
29595ff2
NC
923 I32 startoff = start - SvPVX(PL_linestr);
924
925 s = skipspace(s);
926 if (!PL_madskills)
927 return s;
928 start = SvPVX(PL_linestr) + startoff;
cd81e915 929 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 930 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
931 PL_thistoken = newSVpvn(tstart, start - tstart);
932 }
933 PL_realtokenstart = -1;
934 if (PL_skipwhite) {
935 if (!PL_nextwhite)
6b29d1f5 936 PL_nextwhite = newSVpvs("");
cd81e915
NC
937 sv_catsv(PL_nextwhite, PL_skipwhite);
938 sv_free(PL_skipwhite);
939 PL_skipwhite = 0;
29595ff2
NC
940 }
941 return s;
942}
943
944STATIC char *
945S_skipspace2(pTHX_ register char *s, SV **svp)
946{
c35e046a
AL
947 char *start;
948 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
949 const I32 startoff = s - SvPVX(PL_linestr);
950
29595ff2
NC
951 s = skipspace(s);
952 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
953 if (!PL_madskills || !svp)
954 return s;
955 start = SvPVX(PL_linestr) + startoff;
cd81e915 956 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 957 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
958 PL_thistoken = newSVpvn(tstart, start - tstart);
959 PL_realtokenstart = -1;
29595ff2 960 }
cd81e915 961 if (PL_skipwhite) {
29595ff2 962 if (!*svp)
6b29d1f5 963 *svp = newSVpvs("");
cd81e915
NC
964 sv_setsv(*svp, PL_skipwhite);
965 sv_free(PL_skipwhite);
966 PL_skipwhite = 0;
29595ff2
NC
967 }
968
969 return s;
970}
971#endif
972
80a702cd 973STATIC void
15f169a1 974S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
975{
976 AV *av = CopFILEAVx(PL_curcop);
977 if (av) {
b9f83d2f 978 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
979 if (orig_sv)
980 sv_setsv(sv, orig_sv);
981 else
982 sv_setpvn(sv, buf, len);
80a702cd
RGS
983 (void)SvIOK_on(sv);
984 SvIV_set(sv, 0);
985 av_store(av, (I32)CopLINE(PL_curcop), sv);
986 }
987}
988
ffb4593c
NT
989/*
990 * S_skipspace
991 * Called to gobble the appropriate amount and type of whitespace.
992 * Skips comments as well.
993 */
994
76e3520e 995STATIC char *
cea2e8a9 996S_skipspace(pTHX_ register char *s)
a687059c 997{
97aff369 998 dVAR;
5db06880
NC
999#ifdef PERL_MAD
1000 int curoff;
1001 int startoff = s - SvPVX(PL_linestr);
1002
cd81e915
NC
1003 if (PL_skipwhite) {
1004 sv_free(PL_skipwhite);
1005 PL_skipwhite = 0;
5db06880
NC
1006 }
1007#endif
1008
3280af22 1009 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1010 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1011 s++;
5db06880
NC
1012#ifdef PERL_MAD
1013 goto done;
1014#else
463ee0b2 1015 return s;
5db06880 1016#endif
463ee0b2
LW
1017 }
1018 for (;;) {
fd049845 1019 STRLEN prevlen;
09bef843 1020 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 1021 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
1022 while (s < PL_bufend && isSPACE(*s)) {
1023 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1024 incline(s);
1025 }
ffb4593c
NT
1026
1027 /* comment */
3280af22
NIS
1028 if (s < PL_bufend && *s == '#') {
1029 while (s < PL_bufend && *s != '\n')
463ee0b2 1030 s++;
60e6418e 1031 if (s < PL_bufend) {
463ee0b2 1032 s++;
60e6418e
GS
1033 if (PL_in_eval && !PL_rsfp) {
1034 incline(s);
1035 continue;
1036 }
1037 }
463ee0b2 1038 }
ffb4593c
NT
1039
1040 /* only continue to recharge the buffer if we're at the end
1041 * of the buffer, we're not reading from a source filter, and
1042 * we're in normal lexing mode
1043 */
09bef843
SB
1044 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1045 PL_lex_state == LEX_FORMLINE)
5db06880
NC
1046#ifdef PERL_MAD
1047 goto done;
1048#else
463ee0b2 1049 return s;
5db06880 1050#endif
ffb4593c
NT
1051
1052 /* try to recharge the buffer */
5db06880
NC
1053#ifdef PERL_MAD
1054 curoff = s - SvPVX(PL_linestr);
1055#endif
1056
9cbb5ea2 1057 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 1058 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 1059 {
5db06880
NC
1060#ifdef PERL_MAD
1061 if (PL_madskills && curoff != startoff) {
cd81e915 1062 if (!PL_skipwhite)
6b29d1f5 1063 PL_skipwhite = newSVpvs("");
cd81e915 1064 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1065 curoff - startoff);
1066 }
1067
1068 /* mustn't throw out old stuff yet if madpropping */
1069 SvCUR(PL_linestr) = curoff;
1070 s = SvPVX(PL_linestr) + curoff;
1071 *s = 0;
1072 if (curoff && s[-1] == '\n')
1073 s[-1] = ' ';
1074#endif
1075
9cbb5ea2 1076 /* end of file. Add on the -p or -n magic */
cd81e915 1077 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 1078 if (PL_minus_p) {
5db06880 1079#ifdef PERL_MAD
6502358f 1080 sv_catpvs(PL_linestr,
5db06880
NC
1081 ";}continue{print or die qq(-p destination: $!\\n);}");
1082#else
6502358f 1083 sv_setpvs(PL_linestr,
01a19ab0 1084 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 1085#endif
3280af22 1086 PL_minus_n = PL_minus_p = 0;
a0d0e21e 1087 }
01a19ab0 1088 else if (PL_minus_n) {
5db06880
NC
1089#ifdef PERL_MAD
1090 sv_catpvn(PL_linestr, ";}", 2);
1091#else
01a19ab0 1092 sv_setpvn(PL_linestr, ";}", 2);
5db06880 1093#endif
01a19ab0
NC
1094 PL_minus_n = 0;
1095 }
a0d0e21e 1096 else
5db06880
NC
1097#ifdef PERL_MAD
1098 sv_catpvn(PL_linestr,";", 1);
1099#else
4147a61b 1100 sv_setpvn(PL_linestr,";", 1);
5db06880 1101#endif
ffb4593c
NT
1102
1103 /* reset variables for next time we lex */
9cbb5ea2 1104 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
1105 = SvPVX(PL_linestr)
1106#ifdef PERL_MAD
1107 + curoff
1108#endif
1109 ;
3280af22 1110 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1111 PL_last_lop = PL_last_uni = NULL;
ffb4593c 1112
4c84d7f2 1113 /* Close the filehandle. Could be from
ffb4593c
NT
1114 * STDIN, or a regular file. If we were reading code from
1115 * STDIN (because the commandline held no -e or filename)
1116 * then we don't close it, we reset it so the code can
1117 * read from STDIN too.
1118 */
1119
4c84d7f2 1120 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
3280af22 1121 PerlIO_clearerr(PL_rsfp);
8990e307 1122 else
3280af22 1123 (void)PerlIO_close(PL_rsfp);
4608196e 1124 PL_rsfp = NULL;
463ee0b2
LW
1125 return s;
1126 }
ffb4593c
NT
1127
1128 /* not at end of file, so we only read another line */
09bef843
SB
1129 /* make corresponding updates to old pointers, for yyerror() */
1130 oldprevlen = PL_oldbufptr - PL_bufend;
1131 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1132 if (PL_last_uni)
1133 oldunilen = PL_last_uni - PL_bufend;
1134 if (PL_last_lop)
1135 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1136 PL_linestart = PL_bufptr = s + prevlen;
1137 PL_bufend = s + SvCUR(PL_linestr);
1138 s = PL_bufptr;
09bef843
SB
1139 PL_oldbufptr = s + oldprevlen;
1140 PL_oldoldbufptr = s + oldoldprevlen;
1141 if (PL_last_uni)
1142 PL_last_uni = s + oldunilen;
1143 if (PL_last_lop)
1144 PL_last_lop = s + oldloplen;
a0d0e21e 1145 incline(s);
ffb4593c
NT
1146
1147 /* debugger active and we're not compiling the debugger code,
1148 * so store the line into the debugger's array of lines
1149 */
80a702cd 1150 if (PERLDB_LINE && PL_curstash != PL_debstash)
5fa550fb 1151 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
463ee0b2 1152 }
5db06880
NC
1153
1154#ifdef PERL_MAD
1155 done:
1156 if (PL_madskills) {
cd81e915 1157 if (!PL_skipwhite)
6b29d1f5 1158 PL_skipwhite = newSVpvs("");
5db06880
NC
1159 curoff = s - SvPVX(PL_linestr);
1160 if (curoff - startoff)
cd81e915 1161 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1162 curoff - startoff);
1163 }
1164 return s;
1165#endif
a687059c 1166}
378cc40b 1167
ffb4593c
NT
1168/*
1169 * S_check_uni
1170 * Check the unary operators to ensure there's no ambiguity in how they're
1171 * used. An ambiguous piece of code would be:
1172 * rand + 5
1173 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1174 * the +5 is its argument.
1175 */
1176
76e3520e 1177STATIC void
cea2e8a9 1178S_check_uni(pTHX)
ba106d47 1179{
97aff369 1180 dVAR;
d4c19fe8
AL
1181 const char *s;
1182 const char *t;
2f3197b3 1183
3280af22 1184 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1185 return;
3280af22
NIS
1186 while (isSPACE(*PL_last_uni))
1187 PL_last_uni++;
c35e046a
AL
1188 s = PL_last_uni;
1189 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1190 s++;
3280af22 1191 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1192 return;
6136c704 1193
0453d815 1194 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1195 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1196 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1197 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1198 }
2f3197b3
LW
1199}
1200
ffb4593c
NT
1201/*
1202 * LOP : macro to build a list operator. Its behaviour has been replaced
1203 * with a subroutine, S_lop() for which LOP is just another name.
1204 */
1205
a0d0e21e
LW
1206#define LOP(f,x) return lop(f,x,s)
1207
ffb4593c
NT
1208/*
1209 * S_lop
1210 * Build a list operator (or something that might be one). The rules:
1211 * - if we have a next token, then it's a list operator [why?]
1212 * - if the next thing is an opening paren, then it's a function
1213 * - else it's a list operator
1214 */
1215
76e3520e 1216STATIC I32
a0be28da 1217S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1218{
97aff369 1219 dVAR;
6154021b 1220 pl_yylval.ival = f;
35c8bce7 1221 CLINE;
3280af22
NIS
1222 PL_expect = x;
1223 PL_bufptr = s;
1224 PL_last_lop = PL_oldbufptr;
eb160463 1225 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1226#ifdef PERL_MAD
1227 if (PL_lasttoke)
1228 return REPORT(LSTOP);
1229#else
3280af22 1230 if (PL_nexttoke)
bbf60fe6 1231 return REPORT(LSTOP);
5db06880 1232#endif
79072805 1233 if (*s == '(')
bbf60fe6 1234 return REPORT(FUNC);
29595ff2 1235 s = PEEKSPACE(s);
79072805 1236 if (*s == '(')
bbf60fe6 1237 return REPORT(FUNC);
79072805 1238 else
bbf60fe6 1239 return REPORT(LSTOP);
79072805
LW
1240}
1241
5db06880
NC
1242#ifdef PERL_MAD
1243 /*
1244 * S_start_force
1245 * Sets up for an eventual force_next(). start_force(0) basically does
1246 * an unshift, while start_force(-1) does a push. yylex removes items
1247 * on the "pop" end.
1248 */
1249
1250STATIC void
1251S_start_force(pTHX_ int where)
1252{
1253 int i;
1254
cd81e915 1255 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1256 where = PL_lasttoke;
cd81e915
NC
1257 assert(PL_curforce < 0 || PL_curforce == where);
1258 if (PL_curforce != where) {
5db06880
NC
1259 for (i = PL_lasttoke; i > where; --i) {
1260 PL_nexttoke[i] = PL_nexttoke[i-1];
1261 }
1262 PL_lasttoke++;
1263 }
cd81e915 1264 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1265 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1266 PL_curforce = where;
1267 if (PL_nextwhite) {
5db06880 1268 if (PL_madskills)
6b29d1f5 1269 curmad('^', newSVpvs(""));
cd81e915 1270 CURMAD('_', PL_nextwhite);
5db06880
NC
1271 }
1272}
1273
1274STATIC void
1275S_curmad(pTHX_ char slot, SV *sv)
1276{
1277 MADPROP **where;
1278
1279 if (!sv)
1280 return;
cd81e915
NC
1281 if (PL_curforce < 0)
1282 where = &PL_thismad;
5db06880 1283 else
cd81e915 1284 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1285
cd81e915 1286 if (PL_faketokens)
5db06880
NC
1287 sv_setpvn(sv, "", 0);
1288 else {
1289 if (!IN_BYTES) {
1290 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1291 SvUTF8_on(sv);
1292 else if (PL_encoding) {
1293 sv_recode_to_utf8(sv, PL_encoding);
1294 }
1295 }
1296 }
1297
1298 /* keep a slot open for the head of the list? */
1299 if (slot != '_' && *where && (*where)->mad_key == '^') {
1300 (*where)->mad_key = slot;
035e2bcc 1301 sv_free((SV*)((*where)->mad_val));
5db06880
NC
1302 (*where)->mad_val = (void*)sv;
1303 }
1304 else
1305 addmad(newMADsv(slot, sv), where, 0);
1306}
1307#else
b3f24c00
MHM
1308# define start_force(where) NOOP
1309# define curmad(slot, sv) NOOP
5db06880
NC
1310#endif
1311
ffb4593c
NT
1312/*
1313 * S_force_next
9cbb5ea2 1314 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1315 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1316 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1317 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1318 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1319 */
1320
4e553d73 1321STATIC void
cea2e8a9 1322S_force_next(pTHX_ I32 type)
79072805 1323{
97aff369 1324 dVAR;
704d4215
GG
1325#ifdef DEBUGGING
1326 if (DEBUG_T_TEST) {
1327 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1328 tokereport(THING, &NEXTVAL_NEXTTOKE);
1329 }
1330#endif
5db06880 1331#ifdef PERL_MAD
cd81e915 1332 if (PL_curforce < 0)
5db06880 1333 start_force(PL_lasttoke);
cd81e915 1334 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1335 if (PL_lex_state != LEX_KNOWNEXT)
1336 PL_lex_defer = PL_lex_state;
1337 PL_lex_state = LEX_KNOWNEXT;
1338 PL_lex_expect = PL_expect;
cd81e915 1339 PL_curforce = -1;
5db06880 1340#else
3280af22
NIS
1341 PL_nexttype[PL_nexttoke] = type;
1342 PL_nexttoke++;
1343 if (PL_lex_state != LEX_KNOWNEXT) {
1344 PL_lex_defer = PL_lex_state;
1345 PL_lex_expect = PL_expect;
1346 PL_lex_state = LEX_KNOWNEXT;
79072805 1347 }
5db06880 1348#endif
79072805
LW
1349}
1350
d0a148a6 1351STATIC SV *
15f169a1 1352S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1353{
97aff369 1354 dVAR;
740cce10
NC
1355 SV * const sv = newSVpvn_utf8(start, len,
1356 UTF && !IN_BYTES
1357 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1358 return sv;
1359}
1360
ffb4593c
NT
1361/*
1362 * S_force_word
1363 * When the lexer knows the next thing is a word (for instance, it has
1364 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1365 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1366 * lookahead.
ffb4593c
NT
1367 *
1368 * Arguments:
b1b65b59 1369 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1370 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1371 * int check_keyword : if true, Perl checks to make sure the word isn't
1372 * a keyword (do this if the word is a label, e.g. goto FOO)
1373 * int allow_pack : if true, : characters will also be allowed (require,
1374 * use, etc. do this)
9cbb5ea2 1375 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1376 */
1377
76e3520e 1378STATIC char *
cea2e8a9 1379S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1380{
97aff369 1381 dVAR;
463ee0b2
LW
1382 register char *s;
1383 STRLEN len;
4e553d73 1384
29595ff2 1385 start = SKIPSPACE1(start);
463ee0b2 1386 s = start;
7e2040f0 1387 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1388 (allow_pack && *s == ':') ||
15f0808c 1389 (allow_initial_tick && *s == '\'') )
a0d0e21e 1390 {
3280af22 1391 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1392 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1393 return start;
cd81e915 1394 start_force(PL_curforce);
5db06880
NC
1395 if (PL_madskills)
1396 curmad('X', newSVpvn(start,s-start));
463ee0b2 1397 if (token == METHOD) {
29595ff2 1398 s = SKIPSPACE1(s);
463ee0b2 1399 if (*s == '(')
3280af22 1400 PL_expect = XTERM;
463ee0b2 1401 else {
3280af22 1402 PL_expect = XOPERATOR;
463ee0b2 1403 }
79072805 1404 }
e74e6b3d 1405 if (PL_madskills)
63575281 1406 curmad('g', newSVpvs( "forced" ));
9ded7720 1407 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1408 = (OP*)newSVOP(OP_CONST,0,
1409 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1410 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1411 force_next(token);
1412 }
1413 return s;
1414}
1415
ffb4593c
NT
1416/*
1417 * S_force_ident
9cbb5ea2 1418 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1419 * text only contains the "foo" portion. The first argument is a pointer
1420 * to the "foo", and the second argument is the type symbol to prefix.
1421 * Forces the next token to be a "WORD".
9cbb5ea2 1422 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1423 */
1424
76e3520e 1425STATIC void
bfed75c6 1426S_force_ident(pTHX_ register const char *s, int kind)
79072805 1427{
97aff369 1428 dVAR;
c35e046a 1429 if (*s) {
90e5519e
NC
1430 const STRLEN len = strlen(s);
1431 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1432 start_force(PL_curforce);
9ded7720 1433 NEXTVAL_NEXTTOKE.opval = o;
79072805 1434 force_next(WORD);
748a9306 1435 if (kind) {
11343788 1436 o->op_private = OPpCONST_ENTERED;
55497cff
PP
1437 /* XXX see note in pp_entereval() for why we forgo typo
1438 warnings if the symbol must be introduced in an eval.
1439 GSAR 96-10-12 */
90e5519e
NC
1440 gv_fetchpvn_flags(s, len,
1441 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1442 : GV_ADD,
1443 kind == '$' ? SVt_PV :
1444 kind == '@' ? SVt_PVAV :
1445 kind == '%' ? SVt_PVHV :
a0d0e21e 1446 SVt_PVGV
90e5519e 1447 );
748a9306 1448 }
79072805
LW
1449 }
1450}
1451
1571675a
GS
1452NV
1453Perl_str_to_version(pTHX_ SV *sv)
1454{
1455 NV retval = 0.0;
1456 NV nshift = 1.0;
1457 STRLEN len;
cfd0369c 1458 const char *start = SvPV_const(sv,len);
9d4ba2ae 1459 const char * const end = start + len;
504618e9 1460 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1461 while (start < end) {
ba210ebe 1462 STRLEN skip;
1571675a
GS
1463 UV n;
1464 if (utf)
9041c2e3 1465 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1466 else {
1467 n = *(U8*)start;
1468 skip = 1;
1469 }
1470 retval += ((NV)n)/nshift;
1471 start += skip;
1472 nshift *= 1000;
1473 }
1474 return retval;
1475}
1476
4e553d73 1477/*
ffb4593c
NT
1478 * S_force_version
1479 * Forces the next token to be a version number.
e759cc13
RGS
1480 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1481 * and if "guessing" is TRUE, then no new token is created (and the caller
1482 * must use an alternative parsing method).
ffb4593c
NT
1483 */
1484
76e3520e 1485STATIC char *
e759cc13 1486S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1487{
97aff369 1488 dVAR;
5f66b61c 1489 OP *version = NULL;
44dcb63b 1490 char *d;
5db06880
NC
1491#ifdef PERL_MAD
1492 I32 startoff = s - SvPVX(PL_linestr);
1493#endif
89bfa8cd 1494
29595ff2 1495 s = SKIPSPACE1(s);
89bfa8cd 1496
44dcb63b 1497 d = s;
dd629d5b 1498 if (*d == 'v')
44dcb63b 1499 d++;
44dcb63b 1500 if (isDIGIT(*d)) {
e759cc13
RGS
1501 while (isDIGIT(*d) || *d == '_' || *d == '.')
1502 d++;
5db06880
NC
1503#ifdef PERL_MAD
1504 if (PL_madskills) {
cd81e915 1505 start_force(PL_curforce);
5db06880
NC
1506 curmad('X', newSVpvn(s,d-s));
1507 }
1508#endif
9f3d182e 1509 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1510 SV *ver;
6154021b
RGS
1511 s = scan_num(s, &pl_yylval);
1512 version = pl_yylval.opval;
dd629d5b
GS
1513 ver = cSVOPx(version)->op_sv;
1514 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1515 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1516 SvNV_set(ver, str_to_version(ver));
1571675a 1517 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1518 }
89bfa8cd 1519 }
5db06880
NC
1520 else if (guessing) {
1521#ifdef PERL_MAD
1522 if (PL_madskills) {
cd81e915
NC
1523 sv_free(PL_nextwhite); /* let next token collect whitespace */
1524 PL_nextwhite = 0;
5db06880
NC
1525 s = SvPVX(PL_linestr) + startoff;
1526 }
1527#endif
e759cc13 1528 return s;
5db06880 1529 }
89bfa8cd
PP
1530 }
1531
5db06880
NC
1532#ifdef PERL_MAD
1533 if (PL_madskills && !version) {
cd81e915
NC
1534 sv_free(PL_nextwhite); /* let next token collect whitespace */
1535 PL_nextwhite = 0;
5db06880
NC
1536 s = SvPVX(PL_linestr) + startoff;
1537 }
1538#endif
89bfa8cd 1539 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1540 start_force(PL_curforce);
9ded7720 1541 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1542 force_next(WORD);
89bfa8cd 1543
e759cc13 1544 return s;
89bfa8cd
PP
1545}
1546
ffb4593c
NT
1547/*
1548 * S_tokeq
1549 * Tokenize a quoted string passed in as an SV. It finds the next
1550 * chunk, up to end of string or a backslash. It may make a new
1551 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1552 * turns \\ into \.
1553 */
1554
76e3520e 1555STATIC SV *
cea2e8a9 1556S_tokeq(pTHX_ SV *sv)
79072805 1557{
97aff369 1558 dVAR;
79072805
LW
1559 register char *s;
1560 register char *send;
1561 register char *d;
b3ac6de7
IZ
1562 STRLEN len = 0;
1563 SV *pv = sv;
79072805
LW
1564
1565 if (!SvLEN(sv))
b3ac6de7 1566 goto finish;
79072805 1567
a0d0e21e 1568 s = SvPV_force(sv, len);
21a311ee 1569 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1570 goto finish;
463ee0b2 1571 send = s + len;
79072805
LW
1572 while (s < send && *s != '\\')
1573 s++;
1574 if (s == send)
b3ac6de7 1575 goto finish;
79072805 1576 d = s;
be4731d2 1577 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 1578 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 1579 }
79072805
LW
1580 while (s < send) {
1581 if (*s == '\\') {
a0d0e21e 1582 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1583 s++; /* all that, just for this */
1584 }
1585 *d++ = *s++;
1586 }
1587 *d = '\0';
95a20fc0 1588 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1589 finish:
3280af22 1590 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 1591 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
1592 return sv;
1593}
1594
ffb4593c
NT
1595/*
1596 * Now come three functions related to double-quote context,
1597 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1598 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1599 * interact with PL_lex_state, and create fake ( ... ) argument lists
1600 * to handle functions and concatenation.
1601 * They assume that whoever calls them will be setting up a fake
1602 * join call, because each subthing puts a ',' after it. This lets
1603 * "lower \luPpEr"
1604 * become
1605 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1606 *
1607 * (I'm not sure whether the spurious commas at the end of lcfirst's
1608 * arguments and join's arguments are created or not).
1609 */
1610
1611/*
1612 * S_sublex_start
6154021b 1613 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
1614 *
1615 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 1616 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
1617 *
1618 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1619 *
1620 * Everything else becomes a FUNC.
1621 *
1622 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1623 * had an OP_CONST or OP_READLINE). This just sets us up for a
1624 * call to S_sublex_push().
1625 */
1626
76e3520e 1627STATIC I32
cea2e8a9 1628S_sublex_start(pTHX)
79072805 1629{
97aff369 1630 dVAR;
6154021b 1631 register const I32 op_type = pl_yylval.ival;
79072805
LW
1632
1633 if (op_type == OP_NULL) {
6154021b 1634 pl_yylval.opval = PL_lex_op;
5f66b61c 1635 PL_lex_op = NULL;
79072805
LW
1636 return THING;
1637 }
1638 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1639 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1640
1641 if (SvTYPE(sv) == SVt_PVIV) {
1642 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1643 STRLEN len;
96a5add6 1644 const char * const p = SvPV_const(sv, len);
740cce10 1645 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
1646 SvREFCNT_dec(sv);
1647 sv = nsv;
4e553d73 1648 }
6154021b 1649 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1650 PL_lex_stuff = NULL;
6f33ba73
RGS
1651 /* Allow <FH> // "foo" */
1652 if (op_type == OP_READLINE)
1653 PL_expect = XTERMORDORDOR;
79072805
LW
1654 return THING;
1655 }
e3f73d4e
RGS
1656 else if (op_type == OP_BACKTICK && PL_lex_op) {
1657 /* readpipe() vas overriden */
1658 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 1659 pl_yylval.opval = PL_lex_op;
9b201d7d 1660 PL_lex_op = NULL;
e3f73d4e
RGS
1661 PL_lex_stuff = NULL;
1662 return THING;
1663 }
79072805 1664
3280af22 1665 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 1666 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
1667 PL_sublex_info.sub_op = PL_lex_op;
1668 PL_lex_state = LEX_INTERPPUSH;
55497cff 1669
3280af22
NIS
1670 PL_expect = XTERM;
1671 if (PL_lex_op) {
6154021b 1672 pl_yylval.opval = PL_lex_op;
5f66b61c 1673 PL_lex_op = NULL;
55497cff
PP
1674 return PMFUNC;
1675 }
1676 else
1677 return FUNC;
1678}
1679
ffb4593c
NT
1680/*
1681 * S_sublex_push
1682 * Create a new scope to save the lexing state. The scope will be
1683 * ended in S_sublex_done. Returns a '(', starting the function arguments
1684 * to the uc, lc, etc. found before.
1685 * Sets PL_lex_state to LEX_INTERPCONCAT.
1686 */
1687
76e3520e 1688STATIC I32
cea2e8a9 1689S_sublex_push(pTHX)
55497cff 1690{
27da23d5 1691 dVAR;
f46d017c 1692 ENTER;
55497cff 1693
3280af22 1694 PL_lex_state = PL_sublex_info.super_state;
651b5b28 1695 SAVEBOOL(PL_lex_dojoin);
3280af22 1696 SAVEI32(PL_lex_brackets);
3280af22
NIS
1697 SAVEI32(PL_lex_casemods);
1698 SAVEI32(PL_lex_starts);
651b5b28 1699 SAVEI8(PL_lex_state);
7766f137 1700 SAVEVPTR(PL_lex_inpat);
98246f1e 1701 SAVEI16(PL_lex_inwhat);
57843af0 1702 SAVECOPLINE(PL_curcop);
3280af22 1703 SAVEPPTR(PL_bufptr);
8452ff4b 1704 SAVEPPTR(PL_bufend);
3280af22
NIS
1705 SAVEPPTR(PL_oldbufptr);
1706 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1707 SAVEPPTR(PL_last_lop);
1708 SAVEPPTR(PL_last_uni);
3280af22
NIS
1709 SAVEPPTR(PL_linestart);
1710 SAVESPTR(PL_linestr);
8edd5f42
RGS
1711 SAVEGENERICPV(PL_lex_brackstack);
1712 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1713
1714 PL_linestr = PL_lex_stuff;
a0714e2c 1715 PL_lex_stuff = NULL;
3280af22 1716
9cbb5ea2
GS
1717 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1718 = SvPVX(PL_linestr);
3280af22 1719 PL_bufend += SvCUR(PL_linestr);
bd61b366 1720 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1721 SAVEFREESV(PL_linestr);
1722
1723 PL_lex_dojoin = FALSE;
1724 PL_lex_brackets = 0;
a02a5408
JC
1725 Newx(PL_lex_brackstack, 120, char);
1726 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1727 PL_lex_casemods = 0;
1728 *PL_lex_casestack = '\0';
1729 PL_lex_starts = 0;
1730 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1731 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1732
1733 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1734 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1735 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1736 else
5f66b61c 1737 PL_lex_inpat = NULL;
79072805 1738
55497cff 1739 return '(';
79072805
LW
1740}
1741
ffb4593c
NT
1742/*
1743 * S_sublex_done
1744 * Restores lexer state after a S_sublex_push.
1745 */
1746
76e3520e 1747STATIC I32
cea2e8a9 1748S_sublex_done(pTHX)
79072805 1749{
27da23d5 1750 dVAR;
3280af22 1751 if (!PL_lex_starts++) {
396482e1 1752 SV * const sv = newSVpvs("");
9aa983d2
JH
1753 if (SvUTF8(PL_linestr))
1754 SvUTF8_on(sv);
3280af22 1755 PL_expect = XOPERATOR;
6154021b 1756 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1757 return THING;
1758 }
1759
3280af22
NIS
1760 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1761 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1762 return yylex();
79072805
LW
1763 }
1764
ffb4593c 1765 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1766 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1767 PL_linestr = PL_lex_repl;
1768 PL_lex_inpat = 0;
1769 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1770 PL_bufend += SvCUR(PL_linestr);
bd61b366 1771 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1772 SAVEFREESV(PL_linestr);
1773 PL_lex_dojoin = FALSE;
1774 PL_lex_brackets = 0;
3280af22
NIS
1775 PL_lex_casemods = 0;
1776 *PL_lex_casestack = '\0';
1777 PL_lex_starts = 0;
25da4f38 1778 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1779 PL_lex_state = LEX_INTERPNORMAL;
1780 PL_lex_starts++;
e9fa98b2
HS
1781 /* we don't clear PL_lex_repl here, so that we can check later
1782 whether this is an evalled subst; that means we rely on the
1783 logic to ensure sublex_done() is called again only via the
1784 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1785 }
e9fa98b2 1786 else {
3280af22 1787 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1788 PL_lex_repl = NULL;
e9fa98b2 1789 }
79072805 1790 return ',';
ffed7fef
LW
1791 }
1792 else {
5db06880
NC
1793#ifdef PERL_MAD
1794 if (PL_madskills) {
cd81e915
NC
1795 if (PL_thiswhite) {
1796 if (!PL_endwhite)
6b29d1f5 1797 PL_endwhite = newSVpvs("");
cd81e915
NC
1798 sv_catsv(PL_endwhite, PL_thiswhite);
1799 PL_thiswhite = 0;
1800 }
1801 if (PL_thistoken)
1802 sv_setpvn(PL_thistoken,"",0);
5db06880 1803 else
cd81e915 1804 PL_realtokenstart = -1;
5db06880
NC
1805 }
1806#endif
f46d017c 1807 LEAVE;
3280af22
NIS
1808 PL_bufend = SvPVX(PL_linestr);
1809 PL_bufend += SvCUR(PL_linestr);
1810 PL_expect = XOPERATOR;
09bef843 1811 PL_sublex_info.sub_inwhat = 0;
79072805 1812 return ')';
ffed7fef
LW
1813 }
1814}
1815
02aa26ce
NT
1816/*
1817 scan_const
1818
1819 Extracts a pattern, double-quoted string, or transliteration. This
1820 is terrifying code.
1821
94def140 1822 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1823 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1824 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1825
94def140
ST
1826 Returns a pointer to the character scanned up to. If this is
1827 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 1828 successfully parsed), will leave an OP for the substring scanned
6154021b 1829 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
1830 by looking at the next characters herself.
1831
02aa26ce
NT
1832 In patterns:
1833 backslashes:
1834 double-quoted style: \r and \n
1835 regexp special ones: \D \s
94def140
ST
1836 constants: \x31
1837 backrefs: \1
02aa26ce
NT
1838 case and quoting: \U \Q \E
1839 stops on @ and $, but not for $ as tail anchor
1840
1841 In transliterations:
1842 characters are VERY literal, except for - not at the start or end
94def140
ST
1843 of the string, which indicates a range. If the range is in bytes,
1844 scan_const expands the range to the full set of intermediate
1845 characters. If the range is in utf8, the hyphen is replaced with
1846 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1847
1848 In double-quoted strings:
1849 backslashes:
1850 double-quoted style: \r and \n
94def140
ST
1851 constants: \x31
1852 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1853 case and quoting: \U \Q \E
1854 stops on @ and $
1855
1856 scan_const does *not* construct ops to handle interpolated strings.
1857 It stops processing as soon as it finds an embedded $ or @ variable
1858 and leaves it to the caller to work out what's going on.
1859
94def140
ST
1860 embedded arrays (whether in pattern or not) could be:
1861 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1862
1863 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1864
1865 $ in pattern could be $foo or could be tail anchor. Assumption:
1866 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1867 followed by one of "()| \r\n\t"
02aa26ce
NT
1868
1869 \1 (backreferences) are turned into $1
1870
1871 The structure of the code is
1872 while (there's a character to process) {
94def140
ST
1873 handle transliteration ranges
1874 skip regexp comments /(?#comment)/ and codes /(?{code})/
1875 skip #-initiated comments in //x patterns
1876 check for embedded arrays
02aa26ce
NT
1877 check for embedded scalars
1878 if (backslash) {
94def140
ST
1879 leave intact backslashes from leaveit (below)
1880 deprecate \1 in substitution replacements
02aa26ce
NT
1881 handle string-changing backslashes \l \U \Q \E, etc.
1882 switch (what was escaped) {
94def140
ST
1883 handle \- in a transliteration (becomes a literal -)
1884 handle \132 (octal characters)
1885 handle \x15 and \x{1234} (hex characters)
1886 handle \N{name} (named characters)
1887 handle \cV (control characters)
1888 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce
NT
1889 } (end switch)
1890 } (end if backslash)
1891 } (end while character to read)
4e553d73 1892
02aa26ce
NT
1893*/
1894
76e3520e 1895STATIC char *
cea2e8a9 1896S_scan_const(pTHX_ char *start)
79072805 1897{
97aff369 1898 dVAR;
3280af22 1899 register char *send = PL_bufend; /* end of the constant */
561b68a9 1900 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1901 register char *s = start; /* start of the constant */
1902 register char *d = SvPVX(sv); /* destination for copies */
1903 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1904 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1905 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1906 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1907 UV uv;
4c3a8340
ST
1908#ifdef EBCDIC
1909 UV literal_endpoint = 0;
e294cc5d 1910 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1911#endif
012bcf8d 1912
2b9d42f0
NIS
1913 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1914 /* If we are doing a trans and we know we want UTF8 set expectation */
1915 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1916 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1917 }
1918
1919
79072805 1920 while (s < send || dorange) {
02aa26ce 1921 /* get transliterations out of the way (they're most literal) */
3280af22 1922 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1923 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1924 if (dorange) {
1ba5c669
JH
1925 I32 i; /* current expanded character */
1926 I32 min; /* first character in range */
1927 I32 max; /* last character in range */
02aa26ce 1928
e294cc5d
JH
1929#ifdef EBCDIC
1930 UV uvmax = 0;
1931#endif
1932
1933 if (has_utf8
1934#ifdef EBCDIC
1935 && !native_range
1936#endif
1937 ) {
9d4ba2ae 1938 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1939 char *e = d++;
1940 while (e-- > c)
1941 *(e + 1) = *e;
25716404 1942 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1943 /* mark the range as done, and continue */
1944 dorange = FALSE;
1945 didrange = TRUE;
1946 continue;
1947 }
2b9d42f0 1948
95a20fc0 1949 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1950#ifdef EBCDIC
1951 SvGROW(sv,
1952 SvLEN(sv) + (has_utf8 ?
1953 (512 - UTF_CONTINUATION_MARK +
1954 UNISKIP(0x100))
1955 : 256));
1956 /* How many two-byte within 0..255: 128 in UTF-8,
1957 * 96 in UTF-8-mod. */
1958#else
9cbb5ea2 1959 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 1960#endif
9cbb5ea2 1961 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
1962#ifdef EBCDIC
1963 if (has_utf8) {
1964 int j;
1965 for (j = 0; j <= 1; j++) {
1966 char * const c = (char*)utf8_hop((U8*)d, -1);
1967 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1968 if (j)
1969 min = (U8)uv;
1970 else if (uv < 256)
1971 max = (U8)uv;
1972 else {
1973 max = (U8)0xff; /* only to \xff */
1974 uvmax = uv; /* \x{100} to uvmax */
1975 }
1976 d = c; /* eat endpoint chars */
1977 }
1978 }
1979 else {
1980#endif
1981 d -= 2; /* eat the first char and the - */
1982 min = (U8)*d; /* first char in range */
1983 max = (U8)d[1]; /* last char in range */
1984#ifdef EBCDIC
1985 }
1986#endif
8ada0baa 1987
c2e66d9e 1988 if (min > max) {
01ec43d0 1989 Perl_croak(aTHX_
d1573ac7 1990 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1991 (char)min, (char)max);
c2e66d9e
GS
1992 }
1993
c7f1f016 1994#ifdef EBCDIC
4c3a8340
ST
1995 if (literal_endpoint == 2 &&
1996 ((isLOWER(min) && isLOWER(max)) ||
1997 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1998 if (isLOWER(min)) {
1999 for (i = min; i <= max; i++)
2000 if (isLOWER(i))
db42d148 2001 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2002 } else {
2003 for (i = min; i <= max; i++)
2004 if (isUPPER(i))
db42d148 2005 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2006 }
2007 }
2008 else
2009#endif
2010 for (i = min; i <= max; i++)
e294cc5d
JH
2011#ifdef EBCDIC
2012 if (has_utf8) {
2013 const U8 ch = (U8)NATIVE_TO_UTF(i);
2014 if (UNI_IS_INVARIANT(ch))
2015 *d++ = (U8)i;
2016 else {
2017 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2018 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2019 }
2020 }
2021 else
2022#endif
2023 *d++ = (char)i;
2024
2025#ifdef EBCDIC
2026 if (uvmax) {
2027 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2028 if (uvmax > 0x101)
2029 *d++ = (char)UTF_TO_NATIVE(0xff);
2030 if (uvmax > 0x100)
2031 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2032 }
2033#endif
02aa26ce
NT
2034
2035 /* mark the range as done, and continue */
79072805 2036 dorange = FALSE;
01ec43d0 2037 didrange = TRUE;
4c3a8340
ST
2038#ifdef EBCDIC
2039 literal_endpoint = 0;
2040#endif
79072805 2041 continue;
4e553d73 2042 }
02aa26ce
NT
2043
2044 /* range begins (ignore - as first or last char) */
79072805 2045 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2046 if (didrange) {
1fafa243 2047 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2048 }
e294cc5d
JH
2049 if (has_utf8
2050#ifdef EBCDIC
2051 && !native_range
2052#endif
2053 ) {
25716404 2054 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2055 s++;
2056 continue;
2057 }
79072805
LW
2058 dorange = TRUE;
2059 s++;
01ec43d0
GS
2060 }
2061 else {
2062 didrange = FALSE;
4c3a8340
ST
2063#ifdef EBCDIC
2064 literal_endpoint = 0;
e294cc5d 2065 native_range = TRUE;
4c3a8340 2066#endif
01ec43d0 2067 }
79072805 2068 }
02aa26ce
NT
2069
2070 /* if we get here, we're not doing a transliteration */
2071
0f5d15d6
IZ
2072 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2073 except for the last char, which will be done separately. */
3280af22 2074 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2075 if (s[2] == '#') {
e994fd66 2076 while (s+1 < send && *s != ')')
db42d148 2077 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2078 }
2079 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2080 || (s[2] == '?' && s[3] == '{'))
155aba94 2081 {
cc6b7395 2082 I32 count = 1;
0f5d15d6 2083 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2084 char c;
2085
d9f97599
GS
2086 while (count && (c = *regparse)) {
2087 if (c == '\\' && regparse[1])
2088 regparse++;
4e553d73 2089 else if (c == '{')
cc6b7395 2090 count++;
4e553d73 2091 else if (c == '}')
cc6b7395 2092 count--;
d9f97599 2093 regparse++;
cc6b7395 2094 }
e994fd66 2095 if (*regparse != ')')
5bdf89e7 2096 regparse--; /* Leave one char for continuation. */
0f5d15d6 2097 while (s < regparse)
db42d148 2098 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2099 }
748a9306 2100 }
02aa26ce
NT
2101
2102 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2103 else if (*s == '#' && PL_lex_inpat &&
2104 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2105 while (s+1 < send && *s != '\n')
db42d148 2106 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2107 }
02aa26ce 2108
5d1d4326 2109 /* check for embedded arrays
da6eedaa 2110 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2111 */
1749ea0d
ST
2112 else if (*s == '@' && s[1]) {
2113 if (isALNUM_lazy_if(s+1,UTF))
2114 break;
2115 if (strchr(":'{$", s[1]))
2116 break;
2117 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2118 break; /* in regexp, neither @+ nor @- are interpolated */
2119 }
02aa26ce
NT
2120
2121 /* check for embedded scalars. only stop if we're sure it's a
2122 variable.
2123 */
79072805 2124 else if (*s == '$') {
3280af22 2125 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2126 break;
6002328a 2127 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
2128 break; /* in regexp, $ might be tail anchor */
2129 }
02aa26ce 2130
2b9d42f0
NIS
2131 /* End of else if chain - OP_TRANS rejoin rest */
2132
02aa26ce 2133 /* backslashes */
79072805
LW
2134 if (*s == '\\' && s+1 < send) {
2135 s++;
02aa26ce 2136
02aa26ce 2137 /* deprecate \1 in strings and substitution replacements */
3280af22 2138 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2139 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2140 {
599cee73 2141 if (ckWARN(WARN_SYNTAX))
9014280d 2142 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2143 *--s = '$';
2144 break;
2145 }
02aa26ce
NT
2146
2147 /* string-change backslash escapes */
3280af22 2148 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2149 --s;
2150 break;
2151 }
cc74c5bd
ST
2152 /* skip any other backslash escapes in a pattern */
2153 else if (PL_lex_inpat) {
2154 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2155 goto default_action;
2156 }
02aa26ce
NT
2157
2158 /* if we get here, it's either a quoted -, or a digit */
79072805 2159 switch (*s) {
02aa26ce
NT
2160
2161 /* quoted - in transliterations */
79072805 2162 case '-':
3280af22 2163 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2164 *d++ = *s++;
2165 continue;
2166 }
2167 /* FALL THROUGH */
2168 default:
11b8faa4 2169 {
86f97054 2170 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 2171 ckWARN(WARN_MISC))
9014280d 2172 Perl_warner(aTHX_ packWARN(WARN_MISC),
e294cc5d
JH
2173 "Unrecognized escape \\%c passed through",
2174 *s);
11b8faa4 2175 /* default action is to copy the quoted character */
f9a63242 2176 goto default_action;
11b8faa4 2177 }
02aa26ce
NT
2178
2179 /* \132 indicates an octal constant */
79072805
LW
2180 case '0': case '1': case '2': case '3':
2181 case '4': case '5': case '6': case '7':
ba210ebe 2182 {
53305cf1
NC
2183 I32 flags = 0;
2184 STRLEN len = 3;
2185 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2186 s += len;
2187 }
012bcf8d 2188 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2189
2190 /* \x24 indicates a hex constant */
79072805 2191 case 'x':
a0ed51b3
LW
2192 ++s;
2193 if (*s == '{') {
9d4ba2ae 2194 char* const e = strchr(s, '}');
a4c04bdc
NC
2195 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2196 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2197 STRLEN len;
355860ce 2198
53305cf1 2199 ++s;
adaeee49 2200 if (!e) {
a0ed51b3 2201 yyerror("Missing right brace on \\x{}");
355860ce 2202 continue;
ba210ebe 2203 }
53305cf1
NC
2204 len = e - s;
2205 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2206 s = e + 1;
a0ed51b3
LW
2207 }
2208 else {
ba210ebe 2209 {
53305cf1 2210 STRLEN len = 2;
a4c04bdc 2211 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2212 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2213 s += len;
2214 }
012bcf8d
GS
2215 }
2216
2217 NUM_ESCAPE_INSERT:
2218 /* Insert oct or hex escaped character.
301d3d20 2219 * There will always enough room in sv since such
db42d148 2220 * escapes will be longer than any UTF-8 sequence
301d3d20 2221 * they can end up as. */
ba7cea30 2222
c7f1f016
NIS
2223 /* We need to map to chars to ASCII before doing the tests
2224 to cover EBCDIC
2225 */
c4d5f83a 2226 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2227 if (!has_utf8 && uv > 255) {
301d3d20
JH
2228 /* Might need to recode whatever we have
2229 * accumulated so far if it contains any
2230 * hibit chars.
2231 *
2232 * (Can't we keep track of that and avoid
2233 * this rescan? --jhi)
012bcf8d 2234 */
c7f1f016 2235 int hicount = 0;
63cd0674
NIS
2236 U8 *c;
2237 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2238 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2239 hicount++;
db42d148 2240 }
012bcf8d 2241 }
63cd0674 2242 if (hicount) {
9d4ba2ae 2243 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2244 U8 *src, *dst;
2245 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2246 src = (U8 *)d - 1;
2247 dst = src+hicount;
2248 d += hicount;
cfd0369c 2249 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2250 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2251 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2252 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2253 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2254 }
2255 else {
63cd0674 2256 *dst-- = *src;
012bcf8d 2257 }
c7f1f016 2258 src--;
012bcf8d
GS
2259 }
2260 }
2261 }
2262
9aa983d2 2263 if (has_utf8 || uv > 255) {
9041c2e3 2264 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2265 has_utf8 = TRUE;
f9a63242
JH
2266 if (PL_lex_inwhat == OP_TRANS &&
2267 PL_sublex_info.sub_op) {
2268 PL_sublex_info.sub_op->op_private |=
2269 (PL_lex_repl ? OPpTRANS_FROM_UTF
2270 : OPpTRANS_TO_UTF);
f9a63242 2271 }
e294cc5d
JH
2272#ifdef EBCDIC
2273 if (uv > 255 && !dorange)
2274 native_range = FALSE;
2275#endif
012bcf8d 2276 }
a0ed51b3 2277 else {
012bcf8d 2278 *d++ = (char)uv;
a0ed51b3 2279 }
012bcf8d
GS
2280 }
2281 else {
c4d5f83a 2282 *d++ = (char) uv;
a0ed51b3 2283 }
79072805 2284 continue;
02aa26ce 2285
b239daa5 2286 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2287 case 'N':
55eda711 2288 ++s;
423cee85
JH
2289 if (*s == '{') {
2290 char* e = strchr(s, '}');
155aba94 2291 SV *res;
423cee85 2292 STRLEN len;
cfd0369c 2293 const char *str;
4e553d73 2294
423cee85 2295 if (!e) {
5777a3f7 2296 yyerror("Missing right brace on \\N{}");
423cee85
JH
2297 e = s - 1;
2298 goto cont_scan;
2299 }
dbc0d4f2
JH
2300 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2301 /* \N{U+...} */
2302 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2303 PERL_SCAN_DISALLOW_PREFIX;
2304 s += 3;
2305 len = e - s;
2306 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2307 if ( e > s && len != (STRLEN)(e - s) ) {
2308 uv = 0xFFFD;
fc8cd66c 2309 }
dbc0d4f2
JH
2310 s = e + 1;
2311 goto NUM_ESCAPE_INSERT;
2312 }
55eda711 2313 res = newSVpvn(s + 1, e - s - 1);
bd61b366 2314 res = new_constant( NULL, 0, "charnames",
eb0d8d16 2315 res, NULL, s - 2, e - s + 3 );
f9a63242
JH
2316 if (has_utf8)
2317 sv_utf8_upgrade(res);
cfd0369c 2318 str = SvPV_const(res,len);
1c47067b
JH
2319#ifdef EBCDIC_NEVER_MIND
2320 /* charnames uses pack U and that has been
2321 * recently changed to do the below uni->native
2322 * mapping, so this would be redundant (and wrong,
2323 * the code point would be doubly converted).
2324 * But leave this in just in case the pack U change
2325 * gets revoked, but the semantics is still
2326 * desireable for charnames. --jhi */
cddc7ef4 2327 {
cfd0369c 2328 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2329
2330 if (uv < 0x100) {
89ebb4a3 2331 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2332
2333 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2334 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2335 str = SvPV_const(res, len);
cddc7ef4
JH
2336 }
2337 }
2338#endif
89491803 2339 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2340 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2341 SvCUR_set(sv, d - ostart);
2342 SvPOK_on(sv);
e4f3eed8 2343 *d = '\0';
f08d6ad9 2344 sv_utf8_upgrade(sv);
d2f449dd 2345 /* this just broke our allocation above... */
eb160463 2346 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2347 d = SvPVX(sv) + SvCUR(sv);
89491803 2348 has_utf8 = TRUE;
f08d6ad9 2349 }
eb160463 2350 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2351 const char * const odest = SvPVX_const(sv);
423cee85 2352
8973db79 2353 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2354 d = SvPVX(sv) + (d - odest);
2355 }
e294cc5d
JH
2356#ifdef EBCDIC
2357 if (!dorange)
2358 native_range = FALSE; /* \N{} is guessed to be Unicode */
2359#endif
423cee85
JH
2360 Copy(str, d, len, char);
2361 d += len;
2362 SvREFCNT_dec(res);
2363 cont_scan:
2364 s = e + 1;
2365 }
2366 else
5777a3f7 2367 yyerror("Missing braces on \\N{}");
423cee85
JH
2368 continue;
2369
02aa26ce 2370 /* \c is a control character */
79072805
LW
2371 case 'c':
2372 s++;
961ce445 2373 if (s < send) {
ba210ebe 2374 U8 c = *s++;
c7f1f016
NIS
2375#ifdef EBCDIC
2376 if (isLOWER(c))
2377 c = toUPPER(c);
2378#endif
db42d148 2379 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2380 }
961ce445
RGS
2381 else {
2382 yyerror("Missing control char name in \\c");
2383 }
79072805 2384 continue;
02aa26ce
NT
2385
2386 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2387 case 'b':
db42d148 2388 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2389 break;
2390 case 'n':
db42d148 2391 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2392 break;
2393 case 'r':
db42d148 2394 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2395 break;
2396 case 'f':
db42d148 2397 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2398 break;
2399 case 't':
db42d148 2400 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2401 break;
34a3fe2a 2402 case 'e':
db42d148 2403 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2404 break;
2405 case 'a':
db42d148 2406 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2407 break;
02aa26ce
NT
2408 } /* end switch */
2409
79072805
LW
2410 s++;
2411 continue;
02aa26ce 2412 } /* end if (backslash) */
4c3a8340
ST
2413#ifdef EBCDIC
2414 else
2415 literal_endpoint++;
2416#endif
02aa26ce 2417
f9a63242 2418 default_action:
2b9d42f0
NIS
2419 /* If we started with encoded form, or already know we want it
2420 and then encode the next character */
2421 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2422 STRLEN len = 1;
5f66b61c
AL
2423 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2424 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2425 s += len;
2426 if (need > len) {
2427 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2428 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2429 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2430 }
5f66b61c 2431 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0 2432 has_utf8 = TRUE;
e294cc5d
JH
2433#ifdef EBCDIC
2434 if (uv > 255 && !dorange)
2435 native_range = FALSE;
2436#endif
2b9d42f0
NIS
2437 }
2438 else {
2439 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2440 }
02aa26ce
NT
2441 } /* while loop to process each character */
2442
2443 /* terminate the string and set up the sv */
79072805 2444 *d = '\0';
95a20fc0 2445 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2446 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2447 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2448
79072805 2449 SvPOK_on(sv);
9f4817db 2450 if (PL_encoding && !has_utf8) {
d0063567
DK
2451 sv_recode_to_utf8(sv, PL_encoding);
2452 if (SvUTF8(sv))
2453 has_utf8 = TRUE;
9f4817db 2454 }
2b9d42f0 2455 if (has_utf8) {
7e2040f0 2456 SvUTF8_on(sv);
2b9d42f0 2457 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2458 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2459 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2460 }
2461 }
79072805 2462
02aa26ce 2463 /* shrink the sv if we allocated more than we used */
79072805 2464 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2465 SvPV_shrink_to_cur(sv);
79072805 2466 }
02aa26ce 2467
6154021b 2468 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 2469 if (s > PL_bufptr) {
eb0d8d16
NC
2470 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2471 const char *const key = PL_lex_inpat ? "qr" : "q";
2472 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2473 const char *type;
2474 STRLEN typelen;
2475
2476 if (PL_lex_inwhat == OP_TRANS) {
2477 type = "tr";
2478 typelen = 2;
2479 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2480 type = "s";
2481 typelen = 1;
2482 } else {
2483 type = "qq";
2484 typelen = 2;
2485 }
2486
2487 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2488 type, typelen);
2489 }
6154021b 2490 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2491 } else
8990e307 2492 SvREFCNT_dec(sv);
79072805
LW
2493 return s;
2494}
2495
ffb4593c
NT
2496/* S_intuit_more
2497 * Returns TRUE if there's more to the expression (e.g., a subscript),
2498 * FALSE otherwise.
ffb4593c
NT
2499 *
2500 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2501 *
2502 * ->[ and ->{ return TRUE
2503 * { and [ outside a pattern are always subscripts, so return TRUE
2504 * if we're outside a pattern and it's not { or [, then return FALSE
2505 * if we're in a pattern and the first char is a {
2506 * {4,5} (any digits around the comma) returns FALSE
2507 * if we're in a pattern and the first char is a [
2508 * [] returns FALSE
2509 * [SOMETHING] has a funky algorithm to decide whether it's a
2510 * character class or not. It has to deal with things like
2511 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2512 * anything else returns TRUE
2513 */
2514
9cbb5ea2
GS
2515/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2516
76e3520e 2517STATIC int
cea2e8a9 2518S_intuit_more(pTHX_ register char *s)
79072805 2519{
97aff369 2520 dVAR;
3280af22 2521 if (PL_lex_brackets)
79072805
LW
2522 return TRUE;
2523 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2524 return TRUE;
2525 if (*s != '{' && *s != '[')
2526 return FALSE;
3280af22 2527 if (!PL_lex_inpat)
79072805
LW
2528 return TRUE;
2529
2530 /* In a pattern, so maybe we have {n,m}. */
2531 if (*s == '{') {
2532 s++;
2533 if (!isDIGIT(*s))
2534 return TRUE;
2535 while (isDIGIT(*s))
2536 s++;
2537 if (*s == ',')
2538 s++;
2539 while (isDIGIT(*s))
2540 s++;
2541 if (*s == '}')
2542 return FALSE;
2543 return TRUE;
2544
2545 }
2546
2547 /* On the other hand, maybe we have a character class */
2548
2549 s++;
2550 if (*s == ']' || *s == '^')
2551 return FALSE;
2552 else {
ffb4593c 2553 /* this is terrifying, and it works */
79072805
LW
2554 int weight = 2; /* let's weigh the evidence */
2555 char seen[256];
f27ffc4a 2556 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2557 const char * const send = strchr(s,']');
3280af22 2558 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2559
2560 if (!send) /* has to be an expression */
2561 return TRUE;
2562
2563 Zero(seen,256,char);
2564 if (*s == '$')
2565 weight -= 3;
2566 else if (isDIGIT(*s)) {
2567 if (s[1] != ']') {
2568 if (isDIGIT(s[1]) && s[2] == ']')
2569 weight -= 10;
2570 }
2571 else
2572 weight -= 100;
2573 }
2574 for (; s < send; s++) {
2575 last_un_char = un_char;
2576 un_char = (unsigned char)*s;
2577 switch (*s) {
2578 case '@':
2579 case '&':
2580 case '$':
2581 weight -= seen[un_char] * 10;
7e2040f0 2582 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2583 int len;
8903cb82 2584 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2585 len = (int)strlen(tmpbuf);
2586 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2587 weight -= 100;
2588 else
2589 weight -= 10;
2590 }
2591 else if (*s == '$' && s[1] &&
93a17b20
LW
2592 strchr("[#!%*<>()-=",s[1])) {
2593 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2594 weight -= 10;
2595 else
2596 weight -= 1;
2597 }
2598 break;
2599 case '\\':
2600 un_char = 254;
2601 if (s[1]) {
93a17b20 2602 if (strchr("wds]",s[1]))
79072805 2603 weight += 100;
10edeb5d 2604 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2605 weight += 1;
93a17b20 2606 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2607 weight += 40;
2608 else if (isDIGIT(s[1])) {
2609 weight += 40;
2610 while (s[1] && isDIGIT(s[1]))
2611 s++;
2612 }
2613 }
2614 else
2615 weight += 100;
2616 break;
2617 case '-':
2618 if (s[1] == '\\')
2619 weight += 50;
93a17b20 2620 if (strchr("aA01! ",last_un_char))
79072805 2621 weight += 30;
93a17b20 2622 if (strchr("zZ79~",s[1]))
79072805 2623 weight += 30;
f27ffc4a
GS
2624 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2625 weight -= 5; /* cope with negative subscript */
79072805
LW
2626 break;
2627 default:
3792a11b
NC
2628 if (!isALNUM(last_un_char)
2629 && !(last_un_char == '$' || last_un_char == '@'
2630 || last_un_char == '&')
2631 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2632 char *d = tmpbuf;
2633 while (isALPHA(*s))
2634 *d++ = *s++;
2635 *d = '\0';
5458a98a 2636 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
2637 weight -= 150;
2638 }
2639 if (un_char == last_un_char + 1)
2640 weight += 5;
2641 weight -= seen[un_char];
2642 break;
2643 }
2644 seen[un_char]++;
2645 }
2646 if (weight >= 0) /* probably a character class */
2647 return FALSE;
2648 }
2649
2650 return TRUE;
2651}
ffed7fef 2652
ffb4593c
NT
2653/*
2654 * S_intuit_method
2655 *
2656 * Does all the checking to disambiguate
2657 * foo bar
2658 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2659 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2660 *
2661 * First argument is the stuff after the first token, e.g. "bar".
2662 *
2663 * Not a method if bar is a filehandle.
2664 * Not a method if foo is a subroutine prototyped to take a filehandle.
2665 * Not a method if it's really "Foo $bar"
2666 * Method if it's "foo $bar"
2667 * Not a method if it's really "print foo $bar"
2668 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2669 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2670 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2671 * =>
2672 */
2673
76e3520e 2674STATIC int
62d55b22 2675S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2676{
97aff369 2677 dVAR;
a0d0e21e 2678 char *s = start + (*start == '$');
3280af22 2679 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2680 STRLEN len;
2681 GV* indirgv;
5db06880
NC
2682#ifdef PERL_MAD
2683 int soff;
2684#endif
a0d0e21e
LW
2685
2686 if (gv) {
62d55b22 2687 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2688 return 0;
62d55b22
NC
2689 if (cv) {
2690 if (SvPOK(cv)) {
2691 const char *proto = SvPVX_const(cv);
2692 if (proto) {
2693 if (*proto == ';')
2694 proto++;
2695 if (*proto == '*')
2696 return 0;
2697 }
b6c543e3
IZ
2698 }
2699 } else
c35e046a 2700 gv = NULL;
a0d0e21e 2701 }
8903cb82 2702 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2703 /* start is the beginning of the possible filehandle/object,
2704 * and s is the end of it
2705 * tmpbuf is a copy of it
2706 */
2707
a0d0e21e 2708 if (*start == '$') {
3ef1310e
RGS
2709 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2710 isUPPER(*PL_tokenbuf))
a0d0e21e 2711 return 0;
5db06880
NC
2712#ifdef PERL_MAD
2713 len = start - SvPVX(PL_linestr);
2714#endif
29595ff2 2715 s = PEEKSPACE(s);
f0092767 2716#ifdef PERL_MAD
5db06880
NC
2717 start = SvPVX(PL_linestr) + len;
2718#endif
3280af22
NIS
2719 PL_bufptr = start;
2720 PL_expect = XREF;
a0d0e21e
LW
2721 return *s == '(' ? FUNCMETH : METHOD;
2722 }
5458a98a 2723 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
2724 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2725 len -= 2;
2726 tmpbuf[len] = '\0';
5db06880
NC
2727#ifdef PERL_MAD
2728 soff = s - SvPVX(PL_linestr);
2729#endif
c3e0f903
GS
2730 goto bare_package;
2731 }
90e5519e 2732 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2733 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2734 return 0;
2735 /* filehandle or package name makes it a method */
da51bb9b 2736 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
2737#ifdef PERL_MAD
2738 soff = s - SvPVX(PL_linestr);
2739#endif
29595ff2 2740 s = PEEKSPACE(s);
3280af22 2741 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2742 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2743 bare_package:
cd81e915 2744 start_force(PL_curforce);
9ded7720 2745 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2746 newSVpvn(tmpbuf,len));
9ded7720 2747 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2748 if (PL_madskills)
2749 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2750 PL_expect = XTERM;
a0d0e21e 2751 force_next(WORD);
3280af22 2752 PL_bufptr = s;
5db06880
NC
2753#ifdef PERL_MAD
2754 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2755#endif
a0d0e21e
LW
2756 return *s == '(' ? FUNCMETH : METHOD;
2757 }
2758 }
2759 return 0;
2760}
2761
16d20bd9 2762/* Encoded script support. filter_add() effectively inserts a
4e553d73 2763 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2764 * Note that the filter function only applies to the current source file
2765 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2766 *
2767 * The datasv parameter (which may be NULL) can be used to pass
2768 * private data to this instance of the filter. The filter function
2769 * can recover the SV using the FILTER_DATA macro and use it to
2770 * store private buffers and state information.
2771 *
2772 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2773 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2774 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2775 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2776 * private use must be set using malloc'd pointers.
2777 */
16d20bd9
AD
2778
2779SV *
864dbfa3 2780Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2781{
97aff369 2782 dVAR;
f4c556ac 2783 if (!funcp)
a0714e2c 2784 return NULL;
f4c556ac 2785
5486870f
DM
2786 if (!PL_parser)
2787 return NULL;
2788
3280af22
NIS
2789 if (!PL_rsfp_filters)
2790 PL_rsfp_filters = newAV();
16d20bd9 2791 if (!datasv)
561b68a9 2792 datasv = newSV(0);
862a34c6 2793 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2794 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2795 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2796 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2797 FPTR2DPTR(void *, IoANY(datasv)),
2798 SvPV_nolen(datasv)));
3280af22
NIS
2799 av_unshift(PL_rsfp_filters, 1);
2800 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2801 return(datasv);
2802}
4e553d73 2803
16d20bd9
AD
2804
2805/* Delete most recently added instance of this filter function. */
a0d0e21e 2806void
864dbfa3 2807Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2808{
97aff369 2809 dVAR;
e0c19803 2810 SV *datasv;
24801a4b 2811
33073adb 2812#ifdef DEBUGGING
55662e27
JH
2813 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2814 FPTR2DPTR(void*, funcp)));
33073adb 2815#endif
5486870f 2816 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2817 return;
2818 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2819 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2820 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2821 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2822 IoANY(datasv) = (void *)NULL;
3280af22 2823 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2824
16d20bd9
AD
2825 return;
2826 }
2827 /* we need to search for the correct entry and clear it */
cea2e8a9 2828 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2829}
2830
2831
1de9afcd
RGS
2832/* Invoke the idxth filter function for the current rsfp. */
2833/* maxlen 0 = read one text line */
16d20bd9 2834I32
864dbfa3 2835Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2836{
97aff369 2837 dVAR;
16d20bd9
AD
2838 filter_t funcp;
2839 SV *datasv = NULL;
f482118e
NC
2840 /* This API is bad. It should have been using unsigned int for maxlen.
2841 Not sure if we want to change the API, but if not we should sanity
2842 check the value here. */
39cd7a59
NC
2843 const unsigned int correct_length
2844 = maxlen < 0 ?
2845#ifdef PERL_MICRO
2846 0x7FFFFFFF
2847#else
2848 INT_MAX
2849#endif
2850 : maxlen;
e50aee73 2851
5486870f 2852 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 2853 return -1;
1de9afcd 2854 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2855 /* Provide a default input filter to make life easy. */
2856 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2857 DEBUG_P(PerlIO_printf(Perl_debug_log,
2858 "filter_read %d: from rsfp\n", idx));
f482118e 2859 if (correct_length) {
16d20bd9
AD
2860 /* Want a block */
2861 int len ;
f54cb97a 2862 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2863
2864 /* ensure buf_sv is large enough */
f482118e
NC
2865 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2866 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2867 correct_length)) <= 0) {
3280af22 2868 if (PerlIO_error(PL_rsfp))
37120919
AD
2869 return -1; /* error */
2870 else
2871 return 0 ; /* end of file */
2872 }
16d20bd9
AD
2873 SvCUR_set(buf_sv, old_len + len) ;
2874 } else {
2875 /* Want a line */
3280af22
NIS
2876 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2877 if (PerlIO_error(PL_rsfp))
37120919
AD
2878 return -1; /* error */
2879 else
2880 return 0 ; /* end of file */
2881 }
16d20bd9
AD
2882 }
2883 return SvCUR(buf_sv);
2884 }
2885 /* Skip this filter slot if filter has been deleted */
1de9afcd 2886 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2887 DEBUG_P(PerlIO_printf(Perl_debug_log,
2888 "filter_read %d: skipped (filter deleted)\n",
2889 idx));
f482118e 2890 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2891 }
2892 /* Get function pointer hidden within datasv */
8141890a 2893 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2894 DEBUG_P(PerlIO_printf(Perl_debug_log,
2895 "filter_read %d: via function %p (%s)\n",
ca0270c4 2896 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2897 /* Call function. The function is expected to */
2898 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2899 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2900 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2901}
2902
76e3520e 2903STATIC char *
cea2e8a9 2904S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2905{
97aff369 2906 dVAR;
c39cd008 2907#ifdef PERL_CR_FILTER
3280af22 2908 if (!PL_rsfp_filters) {
c39cd008 2909 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2910 }
2911#endif
3280af22 2912 if (PL_rsfp_filters) {
55497cff
PP
2913 if (!append)
2914 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2915 if (FILTER_READ(0, sv, 0) > 0)
2916 return ( SvPVX(sv) ) ;
2917 else
bd61b366 2918 return NULL ;
16d20bd9 2919 }
9d116dd7 2920 else
fd049845 2921 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2922}
2923
01ec43d0 2924STATIC HV *
9bde8eb0 2925S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 2926{
97aff369 2927 dVAR;
def3634b
GS
2928 GV *gv;
2929
01ec43d0 2930 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2931 return PL_curstash;
2932
2933 if (len > 2 &&
2934 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2935 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2936 {
2937 return GvHV(gv); /* Foo:: */
def3634b
GS
2938 }
2939
2940 /* use constant CLASS => 'MyClass' */
c35e046a
AL
2941 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2942 if (gv && GvCV(gv)) {
2943 SV * const sv = cv_const_sv(GvCV(gv));
2944 if (sv)
9bde8eb0 2945 pkgname = SvPV_const(sv, len);
def3634b
GS
2946 }
2947
9bde8eb0 2948 return gv_stashpvn(pkgname, len, 0);
def3634b 2949}
a0d0e21e 2950
e3f73d4e
RGS
2951/*
2952 * S_readpipe_override
2953 * Check whether readpipe() is overriden, and generates the appropriate
2954 * optree, provided sublex_start() is called afterwards.
2955 */
2956STATIC void
1d51329b 2957S_readpipe_override(pTHX)
e3f73d4e
RGS
2958{
2959 GV **gvp;
2960 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 2961 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
2962 if ((gv_readpipe
2963 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2964 ||
2965 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 2966 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
2967 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2968 {
2969 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2970 append_elem(OP_LIST,
2971 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2972 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2973 }
e3f73d4e
RGS
2974}
2975
5db06880
NC
2976#ifdef PERL_MAD
2977 /*
2978 * Perl_madlex
2979 * The intent of this yylex wrapper is to minimize the changes to the
2980 * tokener when we aren't interested in collecting madprops. It remains
2981 * to be seen how successful this strategy will be...
2982 */
2983
2984int
2985Perl_madlex(pTHX)
2986{
2987 int optype;
2988 char *s = PL_bufptr;
2989
cd81e915
NC
2990 /* make sure PL_thiswhite is initialized */
2991 PL_thiswhite = 0;
2992 PL_thismad = 0;
5db06880 2993
cd81e915 2994 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
2995 if (PL_pending_ident)
2996 return S_pending_ident(aTHX);
2997
2998 /* previous token ate up our whitespace? */
cd81e915
NC
2999 if (!PL_lasttoke && PL_nextwhite) {
3000 PL_thiswhite = PL_nextwhite;
3001 PL_nextwhite = 0;
5db06880
NC
3002 }
3003
3004 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3005 PL_realtokenstart = -1;
3006 PL_thistoken = 0;
5db06880
NC
3007 optype = yylex();
3008 s = PL_bufptr;
cd81e915 3009 assert(PL_curforce < 0);
5db06880 3010
cd81e915
NC
3011 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3012 if (!PL_thistoken) {
3013 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3014 PL_thistoken = newSVpvs("");
5db06880 3015 else {
c35e046a 3016 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3017 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3018 }
3019 }
cd81e915
NC
3020 if (PL_thismad) /* install head */
3021 CURMAD('X', PL_thistoken);
5db06880
NC
3022 }
3023
3024 /* last whitespace of a sublex? */
cd81e915
NC
3025 if (optype == ')' && PL_endwhite) {
3026 CURMAD('X', PL_endwhite);
5db06880
NC
3027 }
3028
cd81e915 3029 if (!PL_thismad) {
5db06880
NC
3030
3031 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3032 if (!PL_thiswhite && !PL_endwhite && !optype) {
3033 sv_free(PL_thistoken);
3034 PL_thistoken = 0;
5db06880
NC
3035 return 0;
3036 }
3037
3038 /* put off final whitespace till peg */
3039 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3040 PL_nextwhite = PL_thiswhite;
3041 PL_thiswhite = 0;
5db06880 3042 }
cd81e915
NC
3043 else if (PL_thisopen) {
3044 CURMAD('q', PL_thisopen);
3045 if (PL_thistoken)
3046 sv_free(PL_thistoken);
3047 PL_thistoken = 0;
5db06880
NC
3048 }
3049 else {
3050 /* Store actual token text as madprop X */
cd81e915 3051 CURMAD('X', PL_thistoken);
5db06880
NC
3052 }
3053
cd81e915 3054 if (PL_thiswhite) {
5db06880 3055 /* add preceding whitespace as madprop _ */
cd81e915 3056 CURMAD('_', PL_thiswhite);
5db06880
NC
3057 }
3058
cd81e915 3059 if (PL_thisstuff) {
5db06880 3060 /* add quoted material as madprop = */
cd81e915 3061 CURMAD('=', PL_thisstuff);
5db06880
NC
3062 }
3063
cd81e915 3064 if (PL_thisclose) {
5db06880 3065 /* add terminating quote as madprop Q */
cd81e915 3066 CURMAD('Q', PL_thisclose);
5db06880
NC
3067 }
3068 }
3069
3070 /* special processing based on optype */
3071
3072 switch (optype) {
3073
3074 /* opval doesn't need a TOKEN since it can already store mp */
3075 case WORD:
3076 case METHOD:
3077 case FUNCMETH:
3078 case THING:
3079 case PMFUNC:
3080 case PRIVATEREF:
3081 case FUNC0SUB:
3082 case UNIOPSUB:
3083 case LSTOPSUB:
6154021b
RGS
3084 if (pl_yylval.opval)
3085 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 3086 PL_thismad = 0;
5db06880
NC
3087 return optype;
3088
3089 /* fake EOF */
3090 case 0:
3091 optype = PEG;
cd81e915
NC
3092 if (PL_endwhite) {
3093 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3094 PL_endwhite = 0;
5db06880
NC
3095 }
3096 break;
3097
3098 case ']':
3099 case '}':
cd81e915 3100 if (PL_faketokens)
5db06880
NC
3101 break;
3102 /* remember any fake bracket that lexer is about to discard */
3103 if (PL_lex_brackets == 1 &&
3104 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3105 {
3106 s = PL_bufptr;
3107 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3108 s++;
3109 if (*s == '}') {
cd81e915
NC
3110 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3111 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3112 PL_thiswhite = 0;
5db06880
NC
3113 PL_bufptr = s - 1;
3114 break; /* don't bother looking for trailing comment */
3115 }
3116 else
3117 s = PL_bufptr;
3118 }
3119 if (optype == ']')
3120 break;
3121 /* FALLTHROUGH */
3122
3123 /* attach a trailing comment to its statement instead of next token */
3124 case ';':
cd81e915 3125 if (PL_faketokens)
5db06880
NC
3126 break;
3127 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3128 s = PL_bufptr;
3129 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3130 s++;
3131 if (*s == '\n' || *s == '#') {
3132 while (s < PL_bufend && *s != '\n')
3133 s++;
3134 if (s < PL_bufend)
3135 s++;
cd81e915
NC
3136 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3137 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3138 PL_thiswhite = 0;
5db06880
NC
3139 PL_bufptr = s;
3140 }
3141 }
3142 break;
3143
3144 /* pval */
3145 case LABEL:
3146 break;
3147
3148 /* ival */
3149 default:
3150 break;
3151
3152 }
3153
3154 /* Create new token struct. Note: opvals return early above. */
6154021b 3155 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 3156 PL_thismad = 0;
5db06880
NC
3157 return optype;
3158}
3159#endif
3160
468aa647 3161STATIC char *
cc6ed77d 3162S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3163 dVAR;
468aa647
RGS
3164 if (PL_expect != XSTATE)
3165 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3166 is_use ? "use" : "no"));
29595ff2 3167 s = SKIPSPACE1(s);
468aa647
RGS
3168 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3169 s = force_version(s, TRUE);
29595ff2 3170 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3171 start_force(PL_curforce);
9ded7720 3172 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3173 force_next(WORD);
3174 }
3175 else if (*s == 'v') {
3176 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3177 s = force_version(s, FALSE);
3178 }
3179 }
3180 else {
3181 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3182 s = force_version(s, FALSE);
3183 }
6154021b 3184 pl_yylval.ival = is_use;
468aa647
RGS
3185 return s;
3186}
748a9306 3187#ifdef DEBUGGING
27da23d5 3188 static const char* const exp_name[] =
09bef843 3189 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3190 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3191 };
748a9306 3192#endif
463ee0b2 3193
02aa26ce
NT
3194/*
3195 yylex
3196
3197 Works out what to call the token just pulled out of the input
3198 stream. The yacc parser takes care of taking the ops we return and
3199 stitching them into a tree.
3200
3201 Returns:
3202 PRIVATEREF
3203
3204 Structure:
3205 if read an identifier
3206 if we're in a my declaration
3207 croak if they tried to say my($foo::bar)
3208 build the ops for a my() declaration
3209 if it's an access to a my() variable
3210 are we in a sort block?
3211 croak if my($a); $a <=> $b
3212 build ops for access to a my() variable
3213 if in a dq string, and they've said @foo and we can't find @foo
3214 croak
3215 build ops for a bareword
3216 if we already built the token before, use it.
3217*/
3218
20141f0e 3219
dba4d153
JH
3220#ifdef __SC__
3221#pragma segment Perl_yylex
3222#endif
dba4d153 3223int
dba4d153 3224Perl_yylex(pTHX)
20141f0e 3225{
97aff369 3226 dVAR;
3afc138a 3227 register char *s = PL_bufptr;
378cc40b 3228 register char *d;
463ee0b2 3229 STRLEN len;
aa7440fb 3230 bool bof = FALSE;
a687059c 3231
10edeb5d
JH
3232 /* orig_keyword, gvp, and gv are initialized here because
3233 * jump to the label just_a_word_zero can bypass their
3234 * initialization later. */
3235 I32 orig_keyword = 0;
3236 GV *gv = NULL;
3237 GV **gvp = NULL;
3238
bbf60fe6 3239 DEBUG_T( {
396482e1 3240 SV* tmp = newSVpvs("");
b6007c36
DM
3241 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3242 (IV)CopLINE(PL_curcop),
3243 lex_state_names[PL_lex_state],
3244 exp_name[PL_expect],
3245 pv_display(tmp, s, strlen(s), 0, 60));
3246 SvREFCNT_dec(tmp);
bbf60fe6 3247 } );
02aa26ce 3248 /* check if there's an identifier for us to look at */
ba979b31 3249 if (PL_pending_ident)
bbf60fe6 3250 return REPORT(S_pending_ident(aTHX));
bbce6d69 3251
02aa26ce
NT
3252 /* no identifier pending identification */
3253
3280af22 3254 switch (PL_lex_state) {
79072805
LW
3255#ifdef COMMENTARY
3256 case LEX_NORMAL: /* Some compilers will produce faster */
3257 case LEX_INTERPNORMAL: /* code if we comment these out. */
3258 break;
3259#endif
3260
09bef843 3261 /* when we've already built the next token, just pull it out of the queue */
79072805 3262 case LEX_KNOWNEXT:
5db06880
NC
3263#ifdef PERL_MAD
3264 PL_lasttoke--;
6154021b 3265 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 3266 if (PL_madskills) {
cd81e915 3267 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3268 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3269 if (PL_thismad && PL_thismad->mad_key == '_') {
3270 PL_thiswhite = (SV*)PL_thismad->mad_val;
3271 PL_thismad->mad_val = 0;
3272 mad_free(PL_thismad);
3273 PL_thismad = 0;
5db06880
NC
3274 }
3275 }
3276 if (!PL_lasttoke) {
3277 PL_lex_state = PL_lex_defer;
3278 PL_expect = PL_lex_expect;
3279 PL_lex_defer = LEX_NORMAL;
3280 if (!PL_nexttoke[PL_lasttoke].next_type)
3281 return yylex();
3282 }
3283#else
3280af22 3284 PL_nexttoke--;
6154021b 3285 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3286 if (!PL_nexttoke) {
3287 PL_lex_state = PL_lex_defer;
3288 PL_expect = PL_lex_expect;
3289 PL_lex_defer = LEX_NORMAL;
463ee0b2 3290 }
5db06880
NC
3291#endif
3292#ifdef PERL_MAD
3293 /* FIXME - can these be merged? */
3294 return(PL_nexttoke[PL_lasttoke].next_type);
3295#else
bbf60fe6 3296 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3297#endif
79072805 3298
02aa26ce 3299 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3300 when we get here, PL_bufptr is at the \
02aa26ce 3301 */
79072805
LW
3302 case LEX_INTERPCASEMOD:
3303#ifdef DEBUGGING
3280af22 3304 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3305 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3306#endif
02aa26ce 3307 /* handle \E or end of string */
3280af22 3308 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3309 /* if at a \E */
3280af22 3310 if (PL_lex_casemods) {
f54cb97a 3311 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3312 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3313
3792a11b
NC
3314 if (PL_bufptr != PL_bufend
3315 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3316 PL_bufptr += 2;
3317 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3318#ifdef PERL_MAD
3319 if (PL_madskills)
6b29d1f5 3320 PL_thistoken = newSVpvs("\\E");
5db06880 3321#endif
a0d0e21e 3322 }
bbf60fe6 3323 return REPORT(')');
79072805 3324 }
5db06880
NC
3325#ifdef PERL_MAD
3326 while (PL_bufptr != PL_bufend &&
3327 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3328 if (!PL_thiswhite)
6b29d1f5 3329 PL_thiswhite = newSVpvs("");
cd81e915 3330 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3331 PL_bufptr += 2;
3332 }
3333#else
3280af22
NIS
3334 if (PL_bufptr != PL_bufend)
3335 PL_bufptr += 2;
5db06880 3336#endif
3280af22 3337 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3338 return yylex();
79072805
LW
3339 }
3340 else {
607df283 3341 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3342 "### Saw case modifier\n"); });
3280af22 3343 s = PL_bufptr + 1;
6e909404 3344 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3345#ifdef PERL_MAD
cd81e915 3346 if (!PL_thiswhite)
6b29d1f5 3347 PL_thiswhite = newSVpvs("");
cd81e915 3348 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3349#endif
89122651 3350 PL_bufptr = s + 3;
6e909404
JH
3351 PL_lex_state = LEX_INTERPCONCAT;
3352 return yylex();
a0d0e21e 3353 }
6e909404 3354 else {
90771dc0 3355 I32 tmp;
5db06880
NC
3356 if (!PL_madskills) /* when just compiling don't need correct */
3357 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3358 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3359 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3360 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3361 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3362 return REPORT(')');
6e909404
JH
3363 }
3364 if (PL_lex_casemods > 10)
3365 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3366 PL_lex_casestack[PL_lex_casemods++] = *s;
3367 PL_lex_casestack[PL_lex_casemods] = '\0';
3368 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3369 start_force(PL_curforce);
9ded7720 3370 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3371 force_next('(');
cd81e915 3372 start_force(PL_curforce);
6e909404 3373 if (*s == 'l')
9ded7720 3374 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3375 else if (*s == 'u')
9ded7720 3376 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3377 else if (*s == 'L')
9ded7720 3378 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3379 else if (*s == 'U')
9ded7720 3380 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3381 else if (*s == 'Q')
9ded7720 3382 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3383 else
3384 Perl_croak(aTHX_ "panic: yylex");
5db06880 3385 if (PL_madskills) {
a5849ce5
NC
3386 SV* const tmpsv = newSVpvs("\\ ");
3387 /* replace the space with the character we want to escape
3388 */
3389 SvPVX(tmpsv)[1] = *s;
5db06880
NC
3390 curmad('_', tmpsv);
3391 }
6e909404 3392 PL_bufptr = s + 1;
a0d0e21e 3393 }
79072805 3394 force_next(FUNC);
3280af22
NIS
3395 if (PL_lex_starts) {
3396 s = PL_bufptr;
3397 PL_lex_starts = 0;
5db06880
NC
3398#ifdef PERL_MAD
3399 if (PL_madskills) {
cd81e915
NC
3400 if (PL_thistoken)
3401 sv_free(PL_thistoken);
6b29d1f5 3402 PL_thistoken = newSVpvs("");
5db06880
NC
3403 }
3404#endif
131b3ad0
DM
3405 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3406 if (PL_lex_casemods == 1 && PL_lex_inpat)
3407 OPERATOR(',');
3408 else
3409 Aop(OP_CONCAT);
79072805
LW
3410 }
3411 else
cea2e8a9 3412 return yylex();
79072805
LW
3413 }
3414
55497cff 3415 case LEX_INTERPPUSH:
bbf60fe6 3416 return REPORT(sublex_push());
55497cff 3417
79072805 3418 case LEX_INTERPSTART:
3280af22 3419 if (PL_bufptr == PL_bufend)
bbf60fe6 3420 return REPORT(sublex_done());
607df283 3421 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3422 "### Interpolated variable\n"); });
3280af22
NIS
3423 PL_expect = XTERM;
3424 PL_lex_dojoin = (*PL_bufptr == '@');
3425 PL_lex_state = LEX_INTERPNORMAL;
3426 if (PL_lex_dojoin) {
cd81e915 3427 start_force(PL_curforce);
9ded7720 3428 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3429 force_next(',');
cd81e915 3430 start_force(PL_curforce);
a0d0e21e 3431 force_ident("\"", '$');
cd81e915 3432 start_force(PL_curforce);
9ded7720 3433 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3434 force_next('$');
cd81e915 3435 start_force(PL_curforce);
9ded7720 3436 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3437 force_next('(');
cd81e915 3438 start_force(PL_curforce);
9ded7720 3439 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3440 force_next(FUNC);
3441 }
3280af22
NIS
3442 if (PL_lex_starts++) {
3443 s = PL_bufptr;
5db06880
NC
3444#ifdef PERL_MAD
3445 if (PL_madskills) {
cd81e915
NC
3446 if (PL_thistoken)
3447 sv_free(PL_thistoken);
6b29d1f5 3448 PL_thistoken = newSVpvs("");
5db06880
NC
3449 }
3450#endif
131b3ad0
DM
3451 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3452 if (!PL_lex_casemods && PL_lex_inpat)
3453 OPERATOR(',');
3454 else
3455 Aop(OP_CONCAT);
79072805 3456 }
cea2e8a9 3457 return yylex();
79072805
LW
3458
3459 case LEX_INTERPENDMAYBE:
3280af22
NIS
3460 if (intuit_more(PL_bufptr)) {
3461 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3462 break;
3463 }
3464 /* FALL THROUGH */
3465
3466 case LEX_INTERPEND:
3280af22
NIS
3467 if (PL_lex_dojoin) {
3468 PL_lex_dojoin = FALSE;
3469 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3470#ifdef PERL_MAD
3471 if (PL_madskills) {
cd81e915
NC
3472 if (PL_thistoken)
3473 sv_free(PL_thistoken);
6b29d1f5 3474 PL_thistoken = newSVpvs("");
5db06880
NC
3475 }
3476#endif
bbf60fe6 3477 return REPORT(')');
79072805 3478 }
43a16006 3479 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3480 && SvEVALED(PL_lex_repl))
43a16006 3481 {
e9fa98b2 3482 if (PL_bufptr != PL_bufend)
cea2e8a9 3483 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3484 PL_lex_repl = NULL;
e9fa98b2 3485 }
79072805
LW
3486 /* FALLTHROUGH */
3487 case LEX_INTERPCONCAT:
3488#ifdef DEBUGGING
3280af22 3489 if (PL_lex_brackets)
cea2e8a9 3490 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3491#endif
3280af22 3492 if (PL_bufptr == PL_bufend)
bbf60fe6 3493 return REPORT(sublex_done());
79072805 3494
3280af22
NIS
3495 if (SvIVX(PL_linestr) == '\'') {
3496 SV *sv = newSVsv(PL_linestr);
3497 if (!PL_lex_inpat)
76e3520e 3498 sv = tokeq(sv);
3280af22 3499 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 3500 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
6154021b 3501 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3502 s = PL_bufend;
79072805
LW
3503 }
3504 else {
3280af22 3505 s = scan_const(PL_bufptr);
79072805 3506 if (*s == '\\')
3280af22 3507 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3508 else
3280af22 3509 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3510 }
3511
3280af22 3512 if (s != PL_bufptr) {
cd81e915 3513 start_force(PL_curforce);
5db06880
NC
3514 if (PL_madskills) {
3515 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3516 }
6154021b 3517 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 3518 PL_expect = XTERM;
79072805 3519 force_next(THING);
131b3ad0 3520 if (PL_lex_starts++) {
5db06880
NC
3521#ifdef PERL_MAD
3522 if (PL_madskills) {
cd81e915
NC
3523 if (PL_thistoken)
3524 sv_free(PL_thistoken);
6b29d1f5 3525 PL_thistoken = newSVpvs("");
5db06880
NC
3526 }
3527#endif
131b3ad0
DM