This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Three variables in S_scan_trans only hold flags for op_private, so can
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
6ef55633 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
eb0d8d16
NC
26#define new_constant(a,b,c,d,e,f,g) \
27 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
28
6154021b 29#define pl_yylval (PL_parser->yylval)
d3b6f988 30
acdf0a21
DM
31/* YYINITDEPTH -- initial size of the parser's stacks. */
32#define YYINITDEPTH 200
33
199e78b7
DM
34/* XXX temporary backwards compatibility */
35#define PL_lex_brackets (PL_parser->lex_brackets)
36#define PL_lex_brackstack (PL_parser->lex_brackstack)
37#define PL_lex_casemods (PL_parser->lex_casemods)
38#define PL_lex_casestack (PL_parser->lex_casestack)
39#define PL_lex_defer (PL_parser->lex_defer)
40#define PL_lex_dojoin (PL_parser->lex_dojoin)
41#define PL_lex_expect (PL_parser->lex_expect)
42#define PL_lex_formbrack (PL_parser->lex_formbrack)
43#define PL_lex_inpat (PL_parser->lex_inpat)
44#define PL_lex_inwhat (PL_parser->lex_inwhat)
45#define PL_lex_op (PL_parser->lex_op)
46#define PL_lex_repl (PL_parser->lex_repl)
47#define PL_lex_starts (PL_parser->lex_starts)
48#define PL_lex_stuff (PL_parser->lex_stuff)
49#define PL_multi_start (PL_parser->multi_start)
50#define PL_multi_open (PL_parser->multi_open)
51#define PL_multi_close (PL_parser->multi_close)
52#define PL_pending_ident (PL_parser->pending_ident)
53#define PL_preambled (PL_parser->preambled)
54#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 55#define PL_linestr (PL_parser->linestr)
c2598295
DM
56#define PL_expect (PL_parser->expect)
57#define PL_copline (PL_parser->copline)
f06b5848
DM
58#define PL_bufptr (PL_parser->bufptr)
59#define PL_oldbufptr (PL_parser->oldbufptr)
60#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
61#define PL_linestart (PL_parser->linestart)
62#define PL_bufend (PL_parser->bufend)
63#define PL_last_uni (PL_parser->last_uni)
64#define PL_last_lop (PL_parser->last_lop)
65#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 66#define PL_lex_state (PL_parser->lex_state)
2f9285f8 67#define PL_rsfp (PL_parser->rsfp)
5486870f 68#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
69#define PL_in_my (PL_parser->in_my)
70#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 71#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 72#define PL_multi_end (PL_parser->multi_end)
13765c85 73#define PL_error_count (PL_parser->error_count)
199e78b7
DM
74
75#ifdef PERL_MAD
76# define PL_endwhite (PL_parser->endwhite)
77# define PL_faketokens (PL_parser->faketokens)
78# define PL_lasttoke (PL_parser->lasttoke)
79# define PL_nextwhite (PL_parser->nextwhite)
80# define PL_realtokenstart (PL_parser->realtokenstart)
81# define PL_skipwhite (PL_parser->skipwhite)
82# define PL_thisclose (PL_parser->thisclose)
83# define PL_thismad (PL_parser->thismad)
84# define PL_thisopen (PL_parser->thisopen)
85# define PL_thisstuff (PL_parser->thisstuff)
86# define PL_thistoken (PL_parser->thistoken)
87# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
88# define PL_thiswhite (PL_parser->thiswhite)
89# define PL_nexttoke (PL_parser->nexttoke)
90# define PL_curforce (PL_parser->curforce)
91#else
92# define PL_nexttoke (PL_parser->nexttoke)
93# define PL_nexttype (PL_parser->nexttype)
94# define PL_nextval (PL_parser->nextval)
199e78b7
DM
95#endif
96
3cbf51f5
DM
97static int
98S_pending_ident(pTHX);
199e78b7 99
0bd48802 100static const char ident_too_long[] = "Identifier too long";
c445ea15 101static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 102
6e3aabd6 103#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
104static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
105static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 106#endif
51371543 107
29595ff2 108#ifdef PERL_MAD
29595ff2 109# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 110# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 111#else
5db06880 112# define CURMAD(slot,sv)
9ded7720 113# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
114#endif
115
9059aa12
LW
116#define XFAKEBRACK 128
117#define XENUMMASK 127
118
39e02b42
JH
119#ifdef USE_UTF8_SCRIPTS
120# define UTF (!IN_BYTES)
2b9d42f0 121#else
746b446a 122# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 123#endif
a0ed51b3 124
61f0cdd9 125/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
126 * 1999-02-27 mjd-perl-patch@plover.com */
127#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
128
bf4acbe4
GS
129/* On MacOS, respect nonbreaking spaces */
130#ifdef MACOS_TRADITIONAL
131#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
132#else
133#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
134#endif
135
ffb4593c
NT
136/* LEX_* are values for PL_lex_state, the state of the lexer.
137 * They are arranged oddly so that the guard on the switch statement
79072805
LW
138 * can get by with a single comparison (if the compiler is smart enough).
139 */
140
fb73857a 141/* #define LEX_NOTPARSING 11 is done in perl.h. */
142
b6007c36
DM
143#define LEX_NORMAL 10 /* normal code (ie not within "...") */
144#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
145#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
146#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
147#define LEX_INTERPSTART 6 /* expecting the start of a $var */
148
149 /* at end of code, eg "$x" followed by: */
150#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
151#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
152
153#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
154 string or after \E, $foo, etc */
155#define LEX_INTERPCONST 2 /* NOT USED */
156#define LEX_FORMLINE 1 /* expecting a format line */
157#define LEX_KNOWNEXT 0 /* next token known; just return it */
158
79072805 159
bbf60fe6 160#ifdef DEBUGGING
27da23d5 161static const char* const lex_state_names[] = {
bbf60fe6
DM
162 "KNOWNEXT",
163 "FORMLINE",
164 "INTERPCONST",
165 "INTERPCONCAT",
166 "INTERPENDMAYBE",
167 "INTERPEND",
168 "INTERPSTART",
169 "INTERPPUSH",
170 "INTERPCASEMOD",
171 "INTERPNORMAL",
172 "NORMAL"
173};
174#endif
175
79072805
LW
176#ifdef ff_next
177#undef ff_next
d48672a2
LW
178#endif
179
79072805 180#include "keywords.h"
fe14fcc3 181
ffb4593c
NT
182/* CLINE is a macro that ensures PL_copline has a sane value */
183
ae986130
LW
184#ifdef CLINE
185#undef CLINE
186#endif
57843af0 187#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 188
5db06880 189#ifdef PERL_MAD
29595ff2
NC
190# define SKIPSPACE0(s) skipspace0(s)
191# define SKIPSPACE1(s) skipspace1(s)
192# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
193# define PEEKSPACE(s) skipspace2(s,0)
194#else
195# define SKIPSPACE0(s) skipspace(s)
196# define SKIPSPACE1(s) skipspace(s)
197# define SKIPSPACE2(s,tsv) skipspace(s)
198# define PEEKSPACE(s) skipspace(s)
199#endif
200
ffb4593c
NT
201/*
202 * Convenience functions to return different tokens and prime the
9cbb5ea2 203 * lexer for the next token. They all take an argument.
ffb4593c
NT
204 *
205 * TOKEN : generic token (used for '(', DOLSHARP, etc)
206 * OPERATOR : generic operator
207 * AOPERATOR : assignment operator
208 * PREBLOCK : beginning the block after an if, while, foreach, ...
209 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
210 * PREREF : *EXPR where EXPR is not a simple identifier
211 * TERM : expression term
212 * LOOPX : loop exiting command (goto, last, dump, etc)
213 * FTST : file test operator
214 * FUN0 : zero-argument function
2d2e263d 215 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
216 * BOop : bitwise or or xor
217 * BAop : bitwise and
218 * SHop : shift operator
219 * PWop : power operator
9cbb5ea2 220 * PMop : pattern-matching operator
ffb4593c
NT
221 * Aop : addition-level operator
222 * Mop : multiplication-level operator
223 * Eop : equality-testing operator
e5edeb50 224 * Rop : relational operator <= != gt
ffb4593c
NT
225 *
226 * Also see LOP and lop() below.
227 */
228
998054bd 229#ifdef DEBUGGING /* Serve -DT. */
704d4215 230# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 231#else
bbf60fe6 232# define REPORT(retval) (retval)
998054bd
SC
233#endif
234
bbf60fe6
DM
235#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
236#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
237#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
238#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
239#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
240#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
241#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
242#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
243#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
244#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
245#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
246#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
247#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
248#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
249#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
250#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
251#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
252#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
253#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
254#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 255
a687059c
LW
256/* This bit of chicanery makes a unary function followed by
257 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
258 * The UNIDOR macro is for unary functions that can be followed by the //
259 * operator (such as C<shift // 0>).
a687059c 260 */
376fcdbf 261#define UNI2(f,x) { \
6154021b 262 pl_yylval.ival = f; \
376fcdbf
AL
263 PL_expect = x; \
264 PL_bufptr = s; \
265 PL_last_uni = PL_oldbufptr; \
266 PL_last_lop_op = f; \
267 if (*s == '(') \
268 return REPORT( (int)FUNC1 ); \
29595ff2 269 s = PEEKSPACE(s); \
376fcdbf
AL
270 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
271 }
6f33ba73
RGS
272#define UNI(f) UNI2(f,XTERM)
273#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 274
376fcdbf 275#define UNIBRACK(f) { \
6154021b 276 pl_yylval.ival = f; \
376fcdbf
AL
277 PL_bufptr = s; \
278 PL_last_uni = PL_oldbufptr; \
279 if (*s == '(') \
280 return REPORT( (int)FUNC1 ); \
29595ff2 281 s = PEEKSPACE(s); \
376fcdbf
AL
282 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
283 }
79072805 284
9f68db38 285/* grandfather return to old style */
6154021b 286#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 287
8fa7f367
JH
288#ifdef DEBUGGING
289
6154021b 290/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
291enum token_type {
292 TOKENTYPE_NONE,
293 TOKENTYPE_IVAL,
6154021b 294 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
295 TOKENTYPE_PVAL,
296 TOKENTYPE_OPVAL,
297 TOKENTYPE_GVVAL
298};
299
6d4a66ac
NC
300static struct debug_tokens {
301 const int token;
302 enum token_type type;
303 const char *name;
304} const debug_tokens[] =
9041c2e3 305{
bbf60fe6
DM
306 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
307 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
308 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
309 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
310 { ARROW, TOKENTYPE_NONE, "ARROW" },
311 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
312 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
313 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
314 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
315 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 316 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
317 { DO, TOKENTYPE_NONE, "DO" },
318 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
319 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
320 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
321 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
322 { ELSE, TOKENTYPE_NONE, "ELSE" },
323 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
324 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
325 { FOR, TOKENTYPE_IVAL, "FOR" },
326 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
327 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
328 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
329 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
330 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
331 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 332 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
333 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
334 { IF, TOKENTYPE_IVAL, "IF" },
335 { LABEL, TOKENTYPE_PVAL, "LABEL" },
336 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
337 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
338 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
339 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
340 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
341 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
342 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
343 { MY, TOKENTYPE_IVAL, "MY" },
344 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
345 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
346 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
347 { OROP, TOKENTYPE_IVAL, "OROP" },
348 { OROR, TOKENTYPE_NONE, "OROR" },
349 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
350 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
351 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
352 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
353 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
354 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
355 { PREINC, TOKENTYPE_NONE, "PREINC" },
356 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
357 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
358 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
359 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
360 { SUB, TOKENTYPE_NONE, "SUB" },
361 { THING, TOKENTYPE_OPVAL, "THING" },
362 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
363 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
364 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
365 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
366 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
367 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 368 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
369 { WHILE, TOKENTYPE_IVAL, "WHILE" },
370 { WORD, TOKENTYPE_OPVAL, "WORD" },
c35e046a 371 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
372};
373
6154021b 374/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 375
bbf60fe6 376STATIC int
704d4215 377S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 378{
97aff369 379 dVAR;
7918f24d
NC
380
381 PERL_ARGS_ASSERT_TOKEREPORT;
382
bbf60fe6 383 if (DEBUG_T_TEST) {
bd61b366 384 const char *name = NULL;
bbf60fe6 385 enum token_type type = TOKENTYPE_NONE;
f54cb97a 386 const struct debug_tokens *p;
396482e1 387 SV* const report = newSVpvs("<== ");
bbf60fe6 388
f54cb97a 389 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
390 if (p->token == (int)rv) {
391 name = p->name;
392 type = p->type;
393 break;
394 }
395 }
396 if (name)
54667de8 397 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
398 else if ((char)rv > ' ' && (char)rv < '~')
399 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
400 else if (!rv)
396482e1 401 sv_catpvs(report, "EOF");
bbf60fe6
DM
402 else
403 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
404 switch (type) {
405 case TOKENTYPE_NONE:
406 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
407 break;
408 case TOKENTYPE_IVAL:
704d4215 409 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
410 break;
411 case TOKENTYPE_OPNUM:
412 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 413 PL_op_name[lvalp->ival]);
bbf60fe6
DM
414 break;
415 case TOKENTYPE_PVAL:
704d4215 416 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
417 break;
418 case TOKENTYPE_OPVAL:
704d4215 419 if (lvalp->opval) {
401441c0 420 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
421 PL_op_name[lvalp->opval->op_type]);
422 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 423 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 424 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
425 }
426
427 }
401441c0 428 else
396482e1 429 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
430 break;
431 }
b6007c36 432 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
433 };
434 return (int)rv;
998054bd
SC
435}
436
b6007c36
DM
437
438/* print the buffer with suitable escapes */
439
440STATIC void
15f169a1 441S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 442{
396482e1 443 SV* const tmp = newSVpvs("");
7918f24d
NC
444
445 PERL_ARGS_ASSERT_PRINTBUF;
446
b6007c36
DM
447 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
448 SvREFCNT_dec(tmp);
449}
450
8fa7f367
JH
451#endif
452
ffb4593c
NT
453/*
454 * S_ao
455 *
c963b151
BD
456 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
457 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
458 */
459
76e3520e 460STATIC int
cea2e8a9 461S_ao(pTHX_ int toketype)
a0d0e21e 462{
97aff369 463 dVAR;
3280af22
NIS
464 if (*PL_bufptr == '=') {
465 PL_bufptr++;
a0d0e21e 466 if (toketype == ANDAND)
6154021b 467 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 468 else if (toketype == OROR)
6154021b 469 pl_yylval.ival = OP_ORASSIGN;
c963b151 470 else if (toketype == DORDOR)
6154021b 471 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
472 toketype = ASSIGNOP;
473 }
474 return toketype;
475}
476
ffb4593c
NT
477/*
478 * S_no_op
479 * When Perl expects an operator and finds something else, no_op
480 * prints the warning. It always prints "<something> found where
481 * operator expected. It prints "Missing semicolon on previous line?"
482 * if the surprise occurs at the start of the line. "do you need to
483 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
484 * where the compiler doesn't know if foo is a method call or a function.
485 * It prints "Missing operator before end of line" if there's nothing
486 * after the missing operator, or "... before <...>" if there is something
487 * after the missing operator.
488 */
489
76e3520e 490STATIC void
15f169a1 491S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 492{
97aff369 493 dVAR;
9d4ba2ae
AL
494 char * const oldbp = PL_bufptr;
495 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 496
7918f24d
NC
497 PERL_ARGS_ASSERT_NO_OP;
498
1189a94a
GS
499 if (!s)
500 s = oldbp;
07c798fb 501 else
1189a94a 502 PL_bufptr = s;
cea2e8a9 503 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
504 if (ckWARN_d(WARN_SYNTAX)) {
505 if (is_first)
506 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
507 "\t(Missing semicolon on previous line?)\n");
508 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 509 const char *t;
c35e046a
AL
510 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
511 NOOP;
56da5a46
RGS
512 if (t < PL_bufptr && isSPACE(*t))
513 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
514 "\t(Do you need to predeclare %.*s?)\n",
551405c4 515 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
516 }
517 else {
518 assert(s >= oldbp);
519 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 520 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 521 }
07c798fb 522 }
3280af22 523 PL_bufptr = oldbp;
8990e307
LW
524}
525
ffb4593c
NT
526/*
527 * S_missingterm
528 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 529 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
530 * If we're in a delimited string and the delimiter is a control
531 * character, it's reformatted into a two-char sequence like ^C.
532 * This is fatal.
533 */
534
76e3520e 535STATIC void
cea2e8a9 536S_missingterm(pTHX_ char *s)
8990e307 537{
97aff369 538 dVAR;
8990e307
LW
539 char tmpbuf[3];
540 char q;
541 if (s) {
9d4ba2ae 542 char * const nl = strrchr(s,'\n');
d2719217 543 if (nl)
8990e307
LW
544 *nl = '\0';
545 }
9d116dd7
JH
546 else if (
547#ifdef EBCDIC
548 iscntrl(PL_multi_close)
549#else
550 PL_multi_close < 32 || PL_multi_close == 127
551#endif
552 ) {
8990e307 553 *tmpbuf = '^';
585ec06d 554 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
555 tmpbuf[2] = '\0';
556 s = tmpbuf;
557 }
558 else {
eb160463 559 *tmpbuf = (char)PL_multi_close;
8990e307
LW
560 tmpbuf[1] = '\0';
561 s = tmpbuf;
562 }
563 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 564 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 565}
79072805 566
ef89dcc3 567#define FEATURE_IS_ENABLED(name) \
0d863452 568 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 569 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b
NC
570/* The longest string we pass in. */
571#define MAX_FEATURE_LEN (sizeof("switch")-1)
572
0d863452
RH
573/*
574 * S_feature_is_enabled
575 * Check whether the named feature is enabled.
576 */
577STATIC bool
15f169a1 578S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 579{
97aff369 580 dVAR;
0d863452 581 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 582 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
583
584 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
585
4a731d7b
NC
586 assert(namelen <= MAX_FEATURE_LEN);
587 memcpy(&he_name[8], name, namelen);
d4c19fe8 588
7b9ef140 589 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
590}
591
ffb4593c
NT
592/*
593 * Perl_deprecate
ffb4593c
NT
594 */
595
79072805 596void
15f169a1 597Perl_deprecate(pTHX_ const char *const s)
a0d0e21e 598{
7918f24d
NC
599 PERL_ARGS_ASSERT_DEPRECATE;
600
599cee73 601 if (ckWARN(WARN_DEPRECATED))
9014280d 602 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
603}
604
12bcd1a6 605void
15f169a1 606Perl_deprecate_old(pTHX_ const char *const s)
12bcd1a6
PM
607{
608 /* This function should NOT be called for any new deprecated warnings */
609 /* Use Perl_deprecate instead */
610 /* */
611 /* It is here to maintain backward compatibility with the pre-5.8 */
612 /* warnings category hierarchy. The "deprecated" category used to */
613 /* live under the "syntax" category. It is now a top-level category */
614 /* in its own right. */
615
7918f24d
NC
616 PERL_ARGS_ASSERT_DEPRECATE_OLD;
617
12bcd1a6 618 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 619 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
620 "Use of %s is deprecated", s);
621}
622
ffb4593c 623/*
9cbb5ea2
GS
624 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
625 * utf16-to-utf8-reversed.
ffb4593c
NT
626 */
627
c39cd008
GS
628#ifdef PERL_CR_FILTER
629static void
630strip_return(SV *sv)
631{
95a20fc0 632 register const char *s = SvPVX_const(sv);
9d4ba2ae 633 register const char * const e = s + SvCUR(sv);
7918f24d
NC
634
635 PERL_ARGS_ASSERT_STRIP_RETURN;
636
c39cd008
GS
637 /* outer loop optimized to do nothing if there are no CR-LFs */
638 while (s < e) {
639 if (*s++ == '\r' && *s == '\n') {
640 /* hit a CR-LF, need to copy the rest */
641 register char *d = s - 1;
642 *d++ = *s++;
643 while (s < e) {
644 if (*s == '\r' && s[1] == '\n')
645 s++;
646 *d++ = *s++;
647 }
648 SvCUR(sv) -= s - d;
649 return;
650 }
651 }
652}
a868473f 653
76e3520e 654STATIC I32
c39cd008 655S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 656{
f54cb97a 657 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
658 if (count > 0 && !maxlen)
659 strip_return(sv);
660 return count;
a868473f
NIS
661}
662#endif
663
199e78b7
DM
664
665
ffb4593c
NT
666/*
667 * Perl_lex_start
5486870f 668 *
e3abe207 669 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
670 *
671 * rsfp is the opened file handle to read from (if any),
672 *
673 * line holds any initial content already read from the file (or in
674 * the case of no file, such as an eval, the whole contents);
675 *
676 * new_filter indicates that this is a new file and it shouldn't inherit
677 * the filters from the current parser (ie require).
ffb4593c
NT
678 */
679
a0d0e21e 680void
5486870f 681Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 682{
97aff369 683 dVAR;
6ef55633 684 const char *s = NULL;
8990e307 685 STRLEN len;
5486870f 686 yy_parser *parser, *oparser;
acdf0a21
DM
687
688 /* create and initialise a parser */
689
199e78b7 690 Newxz(parser, 1, yy_parser);
5486870f 691 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
692 PL_parser = parser;
693
694 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
695 parser->ps = parser->stack;
696 parser->stack_size = YYINITDEPTH;
697
698 parser->stack->state = 0;
699 parser->yyerrstatus = 0;
700 parser->yychar = YYEMPTY; /* Cause a token to be read. */
701
e3abe207
DM
702 /* on scope exit, free this parser and restore any outer one */
703 SAVEPARSER(parser);
7c4baf47 704 parser->saved_curcop = PL_curcop;
e3abe207 705
acdf0a21 706 /* initialise lexer state */
8990e307 707
fb205e7a
DM
708#ifdef PERL_MAD
709 parser->curforce = -1;
710#else
711 parser->nexttoke = 0;
712#endif
ca4cfd28 713 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 714 parser->copline = NOLINE;
5afb0a62 715 parser->lex_state = LEX_NORMAL;
c2598295 716 parser->expect = XSTATE;
2f9285f8 717 parser->rsfp = rsfp;
56b27c9a 718 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
5486870f 719 : (AV*)SvREFCNT_inc(oparser->rsfp_filters);
2f9285f8 720
199e78b7
DM
721 Newx(parser->lex_brackstack, 120, char);
722 Newx(parser->lex_casestack, 12, char);
723 *parser->lex_casestack = '\0';
02b34bbe 724
10efb74f
NC
725 if (line) {
726 s = SvPV_const(line, len);
727 } else {
728 len = 0;
729 }
bdc0bf6f 730
10efb74f 731 if (!len) {
bdc0bf6f 732 parser->linestr = newSVpvs("\n;");
10efb74f 733 } else if (SvREADONLY(line) || s[len-1] != ';') {
bdc0bf6f 734 parser->linestr = newSVsv(line);
10efb74f 735 if (s[len-1] != ';')
bdc0bf6f 736 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
737 } else {
738 SvTEMP_off(line);
739 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 740 parser->linestr = line;
8990e307 741 }
f06b5848
DM
742 parser->oldoldbufptr =
743 parser->oldbufptr =
744 parser->bufptr =
745 parser->linestart = SvPVX(parser->linestr);
746 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
747 parser->last_lop = parser->last_uni = NULL;
79072805 748}
a687059c 749
e3abe207
DM
750
751/* delete a parser object */
752
753void
754Perl_parser_free(pTHX_ const yy_parser *parser)
755{
7918f24d
NC
756 PERL_ARGS_ASSERT_PARSER_FREE;
757
7c4baf47 758 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
759 SvREFCNT_dec(parser->linestr);
760
2f9285f8
DM
761 if (parser->rsfp == PerlIO_stdin())
762 PerlIO_clearerr(parser->rsfp);
763 else if (parser->rsfp && parser->old_parser
764 && parser->rsfp != parser->old_parser->rsfp)
765 PerlIO_close(parser->rsfp);
5486870f 766 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 767
e3abe207
DM
768 Safefree(parser->stack);
769 Safefree(parser->lex_brackstack);
770 Safefree(parser->lex_casestack);
771 PL_parser = parser->old_parser;
772 Safefree(parser);
773}
774
775
ffb4593c
NT
776/*
777 * Perl_lex_end
9cbb5ea2
GS
778 * Finalizer for lexing operations. Must be called when the parser is
779 * done with the lexer.
ffb4593c
NT
780 */
781
463ee0b2 782void
864dbfa3 783Perl_lex_end(pTHX)
463ee0b2 784{
97aff369 785 dVAR;
3280af22 786 PL_doextract = FALSE;
463ee0b2
LW
787}
788
ffb4593c
NT
789/*
790 * S_incline
791 * This subroutine has nothing to do with tilting, whether at windmills
792 * or pinball tables. Its name is short for "increment line". It
57843af0 793 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 794 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
795 * # line 500 "foo.pm"
796 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
797 */
798
76e3520e 799STATIC void
d9095cec 800S_incline(pTHX_ const char *s)
463ee0b2 801{
97aff369 802 dVAR;
d9095cec
NC
803 const char *t;
804 const char *n;
805 const char *e;
463ee0b2 806
7918f24d
NC
807 PERL_ARGS_ASSERT_INCLINE;
808
57843af0 809 CopLINE_inc(PL_curcop);
463ee0b2
LW
810 if (*s++ != '#')
811 return;
d4c19fe8
AL
812 while (SPACE_OR_TAB(*s))
813 s++;
73659bf1
GS
814 if (strnEQ(s, "line", 4))
815 s += 4;
816 else
817 return;
084592ab 818 if (SPACE_OR_TAB(*s))
73659bf1 819 s++;
4e553d73 820 else
73659bf1 821 return;
d4c19fe8
AL
822 while (SPACE_OR_TAB(*s))
823 s++;
463ee0b2
LW
824 if (!isDIGIT(*s))
825 return;
d4c19fe8 826
463ee0b2
LW
827 n = s;
828 while (isDIGIT(*s))
829 s++;
bf4acbe4 830 while (SPACE_OR_TAB(*s))
463ee0b2 831 s++;
73659bf1 832 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 833 s++;
73659bf1
GS
834 e = t + 1;
835 }
463ee0b2 836 else {
c35e046a
AL
837 t = s;
838 while (!isSPACE(*t))
839 t++;
73659bf1 840 e = t;
463ee0b2 841 }
bf4acbe4 842 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
843 e++;
844 if (*e != '\n' && *e != '\0')
845 return; /* false alarm */
846
f4dd75d9 847 if (t - s > 0) {
d9095cec 848 const STRLEN len = t - s;
8a5ee598 849#ifndef USE_ITHREADS
19bad673
NC
850 SV *const temp_sv = CopFILESV(PL_curcop);
851 const char *cf;
852 STRLEN tmplen;
853
854 if (temp_sv) {
855 cf = SvPVX(temp_sv);
856 tmplen = SvCUR(temp_sv);
857 } else {
858 cf = NULL;
859 tmplen = 0;
860 }
861
42d9b98d 862 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
863 /* must copy *{"::_<(eval N)[oldfilename:L]"}
864 * to *{"::_<newfilename"} */
44867030
NC
865 /* However, the long form of evals is only turned on by the
866 debugger - usually they're "(eval %lu)" */
867 char smallbuf[128];
868 char *tmpbuf;
869 GV **gvp;
d9095cec 870 STRLEN tmplen2 = len;
798b63bc 871 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
872 tmpbuf = smallbuf;
873 else
2ae0db35 874 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
875 tmpbuf[0] = '_';
876 tmpbuf[1] = '<';
2ae0db35 877 memcpy(tmpbuf + 2, cf, tmplen);
44867030 878 tmplen += 2;
8a5ee598
RGS
879 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
880 if (gvp) {
44867030
NC
881 char *tmpbuf2;
882 GV *gv2;
883
884 if (tmplen2 + 2 <= sizeof smallbuf)
885 tmpbuf2 = smallbuf;
886 else
887 Newx(tmpbuf2, tmplen2 + 2, char);
888
889 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
890 /* Either they malloc'd it, or we malloc'd it,
891 so no prefix is present in ours. */
892 tmpbuf2[0] = '_';
893 tmpbuf2[1] = '<';
894 }
895
896 memcpy(tmpbuf2 + 2, s, tmplen2);
897 tmplen2 += 2;
898
8a5ee598 899 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 900 if (!isGV(gv2)) {
8a5ee598 901 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
902 /* adjust ${"::_<newfilename"} to store the new file name */
903 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
904 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
905 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
906 }
44867030
NC
907
908 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 909 }
e66cf94c 910 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 911 }
8a5ee598 912#endif
05ec9bb3 913 CopFILE_free(PL_curcop);
d9095cec 914 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 915 }
57843af0 916 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
917}
918
29595ff2 919#ifdef PERL_MAD
cd81e915 920/* skip space before PL_thistoken */
29595ff2
NC
921
922STATIC char *
923S_skipspace0(pTHX_ register char *s)
924{
7918f24d
NC
925 PERL_ARGS_ASSERT_SKIPSPACE0;
926
29595ff2
NC
927 s = skipspace(s);
928 if (!PL_madskills)
929 return s;
cd81e915
NC
930 if (PL_skipwhite) {
931 if (!PL_thiswhite)
6b29d1f5 932 PL_thiswhite = newSVpvs("");
cd81e915
NC
933 sv_catsv(PL_thiswhite, PL_skipwhite);
934 sv_free(PL_skipwhite);
935 PL_skipwhite = 0;
936 }
937 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
938 return s;
939}
940
cd81e915 941/* skip space after PL_thistoken */
29595ff2
NC
942
943STATIC char *
944S_skipspace1(pTHX_ register char *s)
945{
d4c19fe8 946 const char *start = s;
29595ff2
NC
947 I32 startoff = start - SvPVX(PL_linestr);
948
7918f24d
NC
949 PERL_ARGS_ASSERT_SKIPSPACE1;
950
29595ff2
NC
951 s = skipspace(s);
952 if (!PL_madskills)
953 return s;
954 start = SvPVX(PL_linestr) + startoff;
cd81e915 955 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 956 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
957 PL_thistoken = newSVpvn(tstart, start - tstart);
958 }
959 PL_realtokenstart = -1;
960 if (PL_skipwhite) {
961 if (!PL_nextwhite)
6b29d1f5 962 PL_nextwhite = newSVpvs("");
cd81e915
NC
963 sv_catsv(PL_nextwhite, PL_skipwhite);
964 sv_free(PL_skipwhite);
965 PL_skipwhite = 0;
29595ff2
NC
966 }
967 return s;
968}
969
970STATIC char *
971S_skipspace2(pTHX_ register char *s, SV **svp)
972{
c35e046a
AL
973 char *start;
974 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
975 const I32 startoff = s - SvPVX(PL_linestr);
976
7918f24d
NC
977 PERL_ARGS_ASSERT_SKIPSPACE2;
978
29595ff2
NC
979 s = skipspace(s);
980 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
981 if (!PL_madskills || !svp)
982 return s;
983 start = SvPVX(PL_linestr) + startoff;
cd81e915 984 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 985 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
986 PL_thistoken = newSVpvn(tstart, start - tstart);
987 PL_realtokenstart = -1;
29595ff2 988 }
cd81e915 989 if (PL_skipwhite) {
29595ff2 990 if (!*svp)
6b29d1f5 991 *svp = newSVpvs("");
cd81e915
NC
992 sv_setsv(*svp, PL_skipwhite);
993 sv_free(PL_skipwhite);
994 PL_skipwhite = 0;
29595ff2
NC
995 }
996
997 return s;
998}
999#endif
1000
80a702cd 1001STATIC void
15f169a1 1002S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1003{
1004 AV *av = CopFILEAVx(PL_curcop);
1005 if (av) {
b9f83d2f 1006 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1007 if (orig_sv)
1008 sv_setsv(sv, orig_sv);
1009 else
1010 sv_setpvn(sv, buf, len);
80a702cd
RGS
1011 (void)SvIOK_on(sv);
1012 SvIV_set(sv, 0);
1013 av_store(av, (I32)CopLINE(PL_curcop), sv);
1014 }
1015}
1016
ffb4593c
NT
1017/*
1018 * S_skipspace
1019 * Called to gobble the appropriate amount and type of whitespace.
1020 * Skips comments as well.
1021 */
1022
76e3520e 1023STATIC char *
cea2e8a9 1024S_skipspace(pTHX_ register char *s)
a687059c 1025{
97aff369 1026 dVAR;
5db06880
NC
1027#ifdef PERL_MAD
1028 int curoff;
1029 int startoff = s - SvPVX(PL_linestr);
1030
7918f24d
NC
1031 PERL_ARGS_ASSERT_SKIPSPACE;
1032
cd81e915
NC
1033 if (PL_skipwhite) {
1034 sv_free(PL_skipwhite);
1035 PL_skipwhite = 0;
5db06880
NC
1036 }
1037#endif
7918f24d 1038 PERL_ARGS_ASSERT_SKIPSPACE;
5db06880 1039
3280af22 1040 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1041 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1042 s++;
5db06880
NC
1043#ifdef PERL_MAD
1044 goto done;
1045#else
463ee0b2 1046 return s;
5db06880 1047#endif
463ee0b2
LW
1048 }
1049 for (;;) {
fd049845 1050 STRLEN prevlen;
09bef843 1051 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 1052 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
1053 while (s < PL_bufend && isSPACE(*s)) {
1054 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1055 incline(s);
1056 }
ffb4593c
NT
1057
1058 /* comment */
3280af22
NIS
1059 if (s < PL_bufend && *s == '#') {
1060 while (s < PL_bufend && *s != '\n')
463ee0b2 1061 s++;
60e6418e 1062 if (s < PL_bufend) {
463ee0b2 1063 s++;
60e6418e
GS
1064 if (PL_in_eval && !PL_rsfp) {
1065 incline(s);
1066 continue;
1067 }
1068 }
463ee0b2 1069 }
ffb4593c
NT
1070
1071 /* only continue to recharge the buffer if we're at the end
1072 * of the buffer, we're not reading from a source filter, and
1073 * we're in normal lexing mode
1074 */
09bef843
SB
1075 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1076 PL_lex_state == LEX_FORMLINE)
5db06880
NC
1077#ifdef PERL_MAD
1078 goto done;
1079#else
463ee0b2 1080 return s;
5db06880 1081#endif
ffb4593c
NT
1082
1083 /* try to recharge the buffer */
5db06880
NC
1084#ifdef PERL_MAD
1085 curoff = s - SvPVX(PL_linestr);
1086#endif
1087
9cbb5ea2 1088 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 1089 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 1090 {
5db06880
NC
1091#ifdef PERL_MAD
1092 if (PL_madskills && curoff != startoff) {
cd81e915 1093 if (!PL_skipwhite)
6b29d1f5 1094 PL_skipwhite = newSVpvs("");
cd81e915 1095 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1096 curoff - startoff);
1097 }
1098
1099 /* mustn't throw out old stuff yet if madpropping */
1100 SvCUR(PL_linestr) = curoff;
1101 s = SvPVX(PL_linestr) + curoff;
1102 *s = 0;
1103 if (curoff && s[-1] == '\n')
1104 s[-1] = ' ';
1105#endif
1106
9cbb5ea2 1107 /* end of file. Add on the -p or -n magic */
cd81e915 1108 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 1109 if (PL_minus_p) {
5db06880 1110#ifdef PERL_MAD
6502358f 1111 sv_catpvs(PL_linestr,
5db06880
NC
1112 ";}continue{print or die qq(-p destination: $!\\n);}");
1113#else
6502358f 1114 sv_setpvs(PL_linestr,
01a19ab0 1115 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 1116#endif
3280af22 1117 PL_minus_n = PL_minus_p = 0;
a0d0e21e 1118 }
01a19ab0 1119 else if (PL_minus_n) {
5db06880
NC
1120#ifdef PERL_MAD
1121 sv_catpvn(PL_linestr, ";}", 2);
1122#else
01a19ab0 1123 sv_setpvn(PL_linestr, ";}", 2);
5db06880 1124#endif
01a19ab0
NC
1125 PL_minus_n = 0;
1126 }
a0d0e21e 1127 else
5db06880
NC
1128#ifdef PERL_MAD
1129 sv_catpvn(PL_linestr,";", 1);
1130#else
4147a61b 1131 sv_setpvn(PL_linestr,";", 1);
5db06880 1132#endif
ffb4593c
NT
1133
1134 /* reset variables for next time we lex */
9cbb5ea2 1135 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
1136 = SvPVX(PL_linestr)
1137#ifdef PERL_MAD
1138 + curoff
1139#endif
1140 ;
3280af22 1141 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1142 PL_last_lop = PL_last_uni = NULL;
ffb4593c 1143
4c84d7f2 1144 /* Close the filehandle. Could be from
ffb4593c
NT
1145 * STDIN, or a regular file. If we were reading code from
1146 * STDIN (because the commandline held no -e or filename)
1147 * then we don't close it, we reset it so the code can
1148 * read from STDIN too.
1149 */
1150
4c84d7f2 1151 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
3280af22 1152 PerlIO_clearerr(PL_rsfp);
8990e307 1153 else
3280af22 1154 (void)PerlIO_close(PL_rsfp);
4608196e 1155 PL_rsfp = NULL;
463ee0b2
LW
1156 return s;
1157 }
ffb4593c
NT
1158
1159 /* not at end of file, so we only read another line */
09bef843
SB
1160 /* make corresponding updates to old pointers, for yyerror() */
1161 oldprevlen = PL_oldbufptr - PL_bufend;
1162 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1163 if (PL_last_uni)
1164 oldunilen = PL_last_uni - PL_bufend;
1165 if (PL_last_lop)
1166 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1167 PL_linestart = PL_bufptr = s + prevlen;
1168 PL_bufend = s + SvCUR(PL_linestr);
1169 s = PL_bufptr;
09bef843
SB
1170 PL_oldbufptr = s + oldprevlen;
1171 PL_oldoldbufptr = s + oldoldprevlen;
1172 if (PL_last_uni)
1173 PL_last_uni = s + oldunilen;
1174 if (PL_last_lop)
1175 PL_last_lop = s + oldloplen;
a0d0e21e 1176 incline(s);
ffb4593c
NT
1177
1178 /* debugger active and we're not compiling the debugger code,
1179 * so store the line into the debugger's array of lines
1180 */
80a702cd 1181 if (PERLDB_LINE && PL_curstash != PL_debstash)
5fa550fb 1182 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
463ee0b2 1183 }
5db06880
NC
1184
1185#ifdef PERL_MAD
1186 done:
1187 if (PL_madskills) {
cd81e915 1188 if (!PL_skipwhite)
6b29d1f5 1189 PL_skipwhite = newSVpvs("");
5db06880
NC
1190 curoff = s - SvPVX(PL_linestr);
1191 if (curoff - startoff)
cd81e915 1192 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1193 curoff - startoff);
1194 }
1195 return s;
1196#endif
a687059c 1197}
378cc40b 1198
ffb4593c
NT
1199/*
1200 * S_check_uni
1201 * Check the unary operators to ensure there's no ambiguity in how they're
1202 * used. An ambiguous piece of code would be:
1203 * rand + 5
1204 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1205 * the +5 is its argument.
1206 */
1207
76e3520e 1208STATIC void
cea2e8a9 1209S_check_uni(pTHX)
ba106d47 1210{
97aff369 1211 dVAR;
d4c19fe8
AL
1212 const char *s;
1213 const char *t;
2f3197b3 1214
3280af22 1215 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1216 return;
3280af22
NIS
1217 while (isSPACE(*PL_last_uni))
1218 PL_last_uni++;
c35e046a
AL
1219 s = PL_last_uni;
1220 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1221 s++;
3280af22 1222 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1223 return;
6136c704 1224
0453d815 1225 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1226 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1227 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1228 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1229 }
2f3197b3
LW
1230}
1231
ffb4593c
NT
1232/*
1233 * LOP : macro to build a list operator. Its behaviour has been replaced
1234 * with a subroutine, S_lop() for which LOP is just another name.
1235 */
1236
a0d0e21e
LW
1237#define LOP(f,x) return lop(f,x,s)
1238
ffb4593c
NT
1239/*
1240 * S_lop
1241 * Build a list operator (or something that might be one). The rules:
1242 * - if we have a next token, then it's a list operator [why?]
1243 * - if the next thing is an opening paren, then it's a function
1244 * - else it's a list operator
1245 */
1246
76e3520e 1247STATIC I32
a0be28da 1248S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1249{
97aff369 1250 dVAR;
7918f24d
NC
1251
1252 PERL_ARGS_ASSERT_LOP;
1253
6154021b 1254 pl_yylval.ival = f;
35c8bce7 1255 CLINE;
3280af22
NIS
1256 PL_expect = x;
1257 PL_bufptr = s;
1258 PL_last_lop = PL_oldbufptr;
eb160463 1259 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1260#ifdef PERL_MAD
1261 if (PL_lasttoke)
1262 return REPORT(LSTOP);
1263#else
3280af22 1264 if (PL_nexttoke)
bbf60fe6 1265 return REPORT(LSTOP);
5db06880 1266#endif
79072805 1267 if (*s == '(')
bbf60fe6 1268 return REPORT(FUNC);
29595ff2 1269 s = PEEKSPACE(s);
79072805 1270 if (*s == '(')
bbf60fe6 1271 return REPORT(FUNC);
79072805 1272 else
bbf60fe6 1273 return REPORT(LSTOP);
79072805
LW
1274}
1275
5db06880
NC
1276#ifdef PERL_MAD
1277 /*
1278 * S_start_force
1279 * Sets up for an eventual force_next(). start_force(0) basically does
1280 * an unshift, while start_force(-1) does a push. yylex removes items
1281 * on the "pop" end.
1282 */
1283
1284STATIC void
1285S_start_force(pTHX_ int where)
1286{
1287 int i;
1288
cd81e915 1289 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1290 where = PL_lasttoke;
cd81e915
NC
1291 assert(PL_curforce < 0 || PL_curforce == where);
1292 if (PL_curforce != where) {
5db06880
NC
1293 for (i = PL_lasttoke; i > where; --i) {
1294 PL_nexttoke[i] = PL_nexttoke[i-1];
1295 }
1296 PL_lasttoke++;
1297 }
cd81e915 1298 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1299 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1300 PL_curforce = where;
1301 if (PL_nextwhite) {
5db06880 1302 if (PL_madskills)
6b29d1f5 1303 curmad('^', newSVpvs(""));
cd81e915 1304 CURMAD('_', PL_nextwhite);
5db06880
NC
1305 }
1306}
1307
1308STATIC void
1309S_curmad(pTHX_ char slot, SV *sv)
1310{
1311 MADPROP **where;
1312
1313 if (!sv)
1314 return;
cd81e915
NC
1315 if (PL_curforce < 0)
1316 where = &PL_thismad;
5db06880 1317 else
cd81e915 1318 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1319
cd81e915 1320 if (PL_faketokens)
5db06880
NC
1321 sv_setpvn(sv, "", 0);
1322 else {
1323 if (!IN_BYTES) {
1324 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1325 SvUTF8_on(sv);
1326 else if (PL_encoding) {
1327 sv_recode_to_utf8(sv, PL_encoding);
1328 }
1329 }
1330 }
1331
1332 /* keep a slot open for the head of the list? */
1333 if (slot != '_' && *where && (*where)->mad_key == '^') {
1334 (*where)->mad_key = slot;
035e2bcc 1335 sv_free((SV*)((*where)->mad_val));
5db06880
NC
1336 (*where)->mad_val = (void*)sv;
1337 }
1338 else
1339 addmad(newMADsv(slot, sv), where, 0);
1340}
1341#else
b3f24c00
MHM
1342# define start_force(where) NOOP
1343# define curmad(slot, sv) NOOP
5db06880
NC
1344#endif
1345
ffb4593c
NT
1346/*
1347 * S_force_next
9cbb5ea2 1348 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1349 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1350 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1351 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1352 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1353 */
1354
4e553d73 1355STATIC void
cea2e8a9 1356S_force_next(pTHX_ I32 type)
79072805 1357{
97aff369 1358 dVAR;
704d4215
GG
1359#ifdef DEBUGGING
1360 if (DEBUG_T_TEST) {
1361 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1362 tokereport(THING, &NEXTVAL_NEXTTOKE);
1363 }
1364#endif
5db06880 1365#ifdef PERL_MAD
cd81e915 1366 if (PL_curforce < 0)
5db06880 1367 start_force(PL_lasttoke);
cd81e915 1368 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1369 if (PL_lex_state != LEX_KNOWNEXT)
1370 PL_lex_defer = PL_lex_state;
1371 PL_lex_state = LEX_KNOWNEXT;
1372 PL_lex_expect = PL_expect;
cd81e915 1373 PL_curforce = -1;
5db06880 1374#else
3280af22
NIS
1375 PL_nexttype[PL_nexttoke] = type;
1376 PL_nexttoke++;
1377 if (PL_lex_state != LEX_KNOWNEXT) {
1378 PL_lex_defer = PL_lex_state;
1379 PL_lex_expect = PL_expect;
1380 PL_lex_state = LEX_KNOWNEXT;
79072805 1381 }
5db06880 1382#endif
79072805
LW
1383}
1384
d0a148a6 1385STATIC SV *
15f169a1 1386S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1387{
97aff369 1388 dVAR;
740cce10
NC
1389 SV * const sv = newSVpvn_utf8(start, len,
1390 UTF && !IN_BYTES
1391 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1392 return sv;
1393}
1394
ffb4593c
NT
1395/*
1396 * S_force_word
1397 * When the lexer knows the next thing is a word (for instance, it has
1398 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1399 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1400 * lookahead.
ffb4593c
NT
1401 *
1402 * Arguments:
b1b65b59 1403 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1404 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1405 * int check_keyword : if true, Perl checks to make sure the word isn't
1406 * a keyword (do this if the word is a label, e.g. goto FOO)
1407 * int allow_pack : if true, : characters will also be allowed (require,
1408 * use, etc. do this)
9cbb5ea2 1409 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1410 */
1411
76e3520e 1412STATIC char *
cea2e8a9 1413S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1414{
97aff369 1415 dVAR;
463ee0b2
LW
1416 register char *s;
1417 STRLEN len;
4e553d73 1418
7918f24d
NC
1419 PERL_ARGS_ASSERT_FORCE_WORD;
1420
29595ff2 1421 start = SKIPSPACE1(start);
463ee0b2 1422 s = start;
7e2040f0 1423 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1424 (allow_pack && *s == ':') ||
15f0808c 1425 (allow_initial_tick && *s == '\'') )
a0d0e21e 1426 {
3280af22 1427 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1428 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1429 return start;
cd81e915 1430 start_force(PL_curforce);
5db06880
NC
1431 if (PL_madskills)
1432 curmad('X', newSVpvn(start,s-start));
463ee0b2 1433 if (token == METHOD) {
29595ff2 1434 s = SKIPSPACE1(s);
463ee0b2 1435 if (*s == '(')
3280af22 1436 PL_expect = XTERM;
463ee0b2 1437 else {
3280af22 1438 PL_expect = XOPERATOR;
463ee0b2 1439 }
79072805 1440 }
e74e6b3d 1441 if (PL_madskills)
63575281 1442 curmad('g', newSVpvs( "forced" ));
9ded7720 1443 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1444 = (OP*)newSVOP(OP_CONST,0,
1445 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1446 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1447 force_next(token);
1448 }
1449 return s;
1450}
1451
ffb4593c
NT
1452/*
1453 * S_force_ident
9cbb5ea2 1454 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1455 * text only contains the "foo" portion. The first argument is a pointer
1456 * to the "foo", and the second argument is the type symbol to prefix.
1457 * Forces the next token to be a "WORD".
9cbb5ea2 1458 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1459 */
1460
76e3520e 1461STATIC void
bfed75c6 1462S_force_ident(pTHX_ register const char *s, int kind)
79072805 1463{
97aff369 1464 dVAR;
7918f24d
NC
1465
1466 PERL_ARGS_ASSERT_FORCE_IDENT;
1467
c35e046a 1468 if (*s) {
90e5519e
NC
1469 const STRLEN len = strlen(s);
1470 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1471 start_force(PL_curforce);
9ded7720 1472 NEXTVAL_NEXTTOKE.opval = o;
79072805 1473 force_next(WORD);
748a9306 1474 if (kind) {
11343788 1475 o->op_private = OPpCONST_ENTERED;
55497cff 1476 /* XXX see note in pp_entereval() for why we forgo typo
1477 warnings if the symbol must be introduced in an eval.
1478 GSAR 96-10-12 */
90e5519e
NC
1479 gv_fetchpvn_flags(s, len,
1480 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1481 : GV_ADD,
1482 kind == '$' ? SVt_PV :
1483 kind == '@' ? SVt_PVAV :
1484 kind == '%' ? SVt_PVHV :
a0d0e21e 1485 SVt_PVGV
90e5519e 1486 );
748a9306 1487 }
79072805
LW
1488 }
1489}
1490
1571675a
GS
1491NV
1492Perl_str_to_version(pTHX_ SV *sv)
1493{
1494 NV retval = 0.0;
1495 NV nshift = 1.0;
1496 STRLEN len;
cfd0369c 1497 const char *start = SvPV_const(sv,len);
9d4ba2ae 1498 const char * const end = start + len;
504618e9 1499 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
1500
1501 PERL_ARGS_ASSERT_STR_TO_VERSION;
1502
1571675a 1503 while (start < end) {
ba210ebe 1504 STRLEN skip;
1571675a
GS
1505 UV n;
1506 if (utf)
9041c2e3 1507 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1508 else {
1509 n = *(U8*)start;
1510 skip = 1;
1511 }
1512 retval += ((NV)n)/nshift;
1513 start += skip;
1514 nshift *= 1000;
1515 }
1516 return retval;
1517}
1518
4e553d73 1519/*
ffb4593c
NT
1520 * S_force_version
1521 * Forces the next token to be a version number.
e759cc13
RGS
1522 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1523 * and if "guessing" is TRUE, then no new token is created (and the caller
1524 * must use an alternative parsing method).
ffb4593c
NT
1525 */
1526
76e3520e 1527STATIC char *
e759cc13 1528S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1529{
97aff369 1530 dVAR;
5f66b61c 1531 OP *version = NULL;
44dcb63b 1532 char *d;
5db06880
NC
1533#ifdef PERL_MAD
1534 I32 startoff = s - SvPVX(PL_linestr);
1535#endif
89bfa8cd 1536
7918f24d
NC
1537 PERL_ARGS_ASSERT_FORCE_VERSION;
1538
29595ff2 1539 s = SKIPSPACE1(s);
89bfa8cd 1540
44dcb63b 1541 d = s;
dd629d5b 1542 if (*d == 'v')
44dcb63b 1543 d++;
44dcb63b 1544 if (isDIGIT(*d)) {
e759cc13
RGS
1545 while (isDIGIT(*d) || *d == '_' || *d == '.')
1546 d++;
5db06880
NC
1547#ifdef PERL_MAD
1548 if (PL_madskills) {
cd81e915 1549 start_force(PL_curforce);
5db06880
NC
1550 curmad('X', newSVpvn(s,d-s));
1551 }
1552#endif
9f3d182e 1553 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1554 SV *ver;
6154021b
RGS
1555 s = scan_num(s, &pl_yylval);
1556 version = pl_yylval.opval;
dd629d5b
GS
1557 ver = cSVOPx(version)->op_sv;
1558 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1559 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1560 SvNV_set(ver, str_to_version(ver));
1571675a 1561 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1562 }
89bfa8cd 1563 }
5db06880
NC
1564 else if (guessing) {
1565#ifdef PERL_MAD
1566 if (PL_madskills) {
cd81e915
NC
1567 sv_free(PL_nextwhite); /* let next token collect whitespace */
1568 PL_nextwhite = 0;
5db06880
NC
1569 s = SvPVX(PL_linestr) + startoff;
1570 }
1571#endif
e759cc13 1572 return s;
5db06880 1573 }
89bfa8cd 1574 }
1575
5db06880
NC
1576#ifdef PERL_MAD
1577 if (PL_madskills && !version) {
cd81e915
NC
1578 sv_free(PL_nextwhite); /* let next token collect whitespace */
1579 PL_nextwhite = 0;
5db06880
NC
1580 s = SvPVX(PL_linestr) + startoff;
1581 }
1582#endif
89bfa8cd 1583 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1584 start_force(PL_curforce);
9ded7720 1585 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1586 force_next(WORD);
89bfa8cd 1587
e759cc13 1588 return s;
89bfa8cd 1589}
1590
ffb4593c
NT
1591/*
1592 * S_tokeq
1593 * Tokenize a quoted string passed in as an SV. It finds the next
1594 * chunk, up to end of string or a backslash. It may make a new
1595 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1596 * turns \\ into \.
1597 */
1598
76e3520e 1599STATIC SV *
cea2e8a9 1600S_tokeq(pTHX_ SV *sv)
79072805 1601{
97aff369 1602 dVAR;
79072805
LW
1603 register char *s;
1604 register char *send;
1605 register char *d;
b3ac6de7
IZ
1606 STRLEN len = 0;
1607 SV *pv = sv;
79072805 1608
7918f24d
NC
1609 PERL_ARGS_ASSERT_TOKEQ;
1610
79072805 1611 if (!SvLEN(sv))
b3ac6de7 1612 goto finish;
79072805 1613
a0d0e21e 1614 s = SvPV_force(sv, len);
21a311ee 1615 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1616 goto finish;
463ee0b2 1617 send = s + len;
79072805
LW
1618 while (s < send && *s != '\\')
1619 s++;
1620 if (s == send)
b3ac6de7 1621 goto finish;
79072805 1622 d = s;
be4731d2 1623 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 1624 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 1625 }
79072805
LW
1626 while (s < send) {
1627 if (*s == '\\') {
a0d0e21e 1628 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1629 s++; /* all that, just for this */
1630 }
1631 *d++ = *s++;
1632 }
1633 *d = '\0';
95a20fc0 1634 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1635 finish:
3280af22 1636 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 1637 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
1638 return sv;
1639}
1640
ffb4593c
NT
1641/*
1642 * Now come three functions related to double-quote context,
1643 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1644 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1645 * interact with PL_lex_state, and create fake ( ... ) argument lists
1646 * to handle functions and concatenation.
1647 * They assume that whoever calls them will be setting up a fake
1648 * join call, because each subthing puts a ',' after it. This lets
1649 * "lower \luPpEr"
1650 * become
1651 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1652 *
1653 * (I'm not sure whether the spurious commas at the end of lcfirst's
1654 * arguments and join's arguments are created or not).
1655 */
1656
1657/*
1658 * S_sublex_start
6154021b 1659 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
1660 *
1661 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 1662 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
1663 *
1664 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1665 *
1666 * Everything else becomes a FUNC.
1667 *
1668 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1669 * had an OP_CONST or OP_READLINE). This just sets us up for a
1670 * call to S_sublex_push().
1671 */
1672
76e3520e 1673STATIC I32
cea2e8a9 1674S_sublex_start(pTHX)
79072805 1675{
97aff369 1676 dVAR;
6154021b 1677 register const I32 op_type = pl_yylval.ival;
79072805
LW
1678
1679 if (op_type == OP_NULL) {
6154021b 1680 pl_yylval.opval = PL_lex_op;
5f66b61c 1681 PL_lex_op = NULL;
79072805
LW
1682 return THING;
1683 }
1684 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1685 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1686
1687 if (SvTYPE(sv) == SVt_PVIV) {
1688 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1689 STRLEN len;
96a5add6 1690 const char * const p = SvPV_const(sv, len);
740cce10 1691 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
1692 SvREFCNT_dec(sv);
1693 sv = nsv;
4e553d73 1694 }
6154021b 1695 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1696 PL_lex_stuff = NULL;
6f33ba73
RGS
1697 /* Allow <FH> // "foo" */
1698 if (op_type == OP_READLINE)
1699 PL_expect = XTERMORDORDOR;
79072805
LW
1700 return THING;
1701 }
e3f73d4e
RGS
1702 else if (op_type == OP_BACKTICK && PL_lex_op) {
1703 /* readpipe() vas overriden */
1704 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 1705 pl_yylval.opval = PL_lex_op;
9b201d7d 1706 PL_lex_op = NULL;
e3f73d4e
RGS
1707 PL_lex_stuff = NULL;
1708 return THING;
1709 }
79072805 1710
3280af22 1711 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 1712 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
1713 PL_sublex_info.sub_op = PL_lex_op;
1714 PL_lex_state = LEX_INTERPPUSH;
55497cff 1715
3280af22
NIS
1716 PL_expect = XTERM;
1717 if (PL_lex_op) {
6154021b 1718 pl_yylval.opval = PL_lex_op;
5f66b61c 1719 PL_lex_op = NULL;
55497cff 1720 return PMFUNC;
1721 }
1722 else
1723 return FUNC;
1724}
1725
ffb4593c
NT
1726/*
1727 * S_sublex_push
1728 * Create a new scope to save the lexing state. The scope will be
1729 * ended in S_sublex_done. Returns a '(', starting the function arguments
1730 * to the uc, lc, etc. found before.
1731 * Sets PL_lex_state to LEX_INTERPCONCAT.
1732 */
1733
76e3520e 1734STATIC I32
cea2e8a9 1735S_sublex_push(pTHX)
55497cff 1736{
27da23d5 1737 dVAR;
f46d017c 1738 ENTER;
55497cff 1739
3280af22 1740 PL_lex_state = PL_sublex_info.super_state;
651b5b28 1741 SAVEBOOL(PL_lex_dojoin);
3280af22 1742 SAVEI32(PL_lex_brackets);
3280af22
NIS
1743 SAVEI32(PL_lex_casemods);
1744 SAVEI32(PL_lex_starts);
651b5b28 1745 SAVEI8(PL_lex_state);
7766f137 1746 SAVEVPTR(PL_lex_inpat);
98246f1e 1747 SAVEI16(PL_lex_inwhat);
57843af0 1748 SAVECOPLINE(PL_curcop);
3280af22 1749 SAVEPPTR(PL_bufptr);
8452ff4b 1750 SAVEPPTR(PL_bufend);
3280af22
NIS
1751 SAVEPPTR(PL_oldbufptr);
1752 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1753 SAVEPPTR(PL_last_lop);
1754 SAVEPPTR(PL_last_uni);
3280af22
NIS
1755 SAVEPPTR(PL_linestart);
1756 SAVESPTR(PL_linestr);
8edd5f42
RGS
1757 SAVEGENERICPV(PL_lex_brackstack);
1758 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1759
1760 PL_linestr = PL_lex_stuff;
a0714e2c 1761 PL_lex_stuff = NULL;
3280af22 1762
9cbb5ea2
GS
1763 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1764 = SvPVX(PL_linestr);
3280af22 1765 PL_bufend += SvCUR(PL_linestr);
bd61b366 1766 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1767 SAVEFREESV(PL_linestr);
1768
1769 PL_lex_dojoin = FALSE;
1770 PL_lex_brackets = 0;
a02a5408
JC
1771 Newx(PL_lex_brackstack, 120, char);
1772 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1773 PL_lex_casemods = 0;
1774 *PL_lex_casestack = '\0';
1775 PL_lex_starts = 0;
1776 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1777 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1778
1779 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1780 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1781 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1782 else
5f66b61c 1783 PL_lex_inpat = NULL;
79072805 1784
55497cff 1785 return '(';
79072805
LW
1786}
1787
ffb4593c
NT
1788/*
1789 * S_sublex_done
1790 * Restores lexer state after a S_sublex_push.
1791 */
1792
76e3520e 1793STATIC I32
cea2e8a9 1794S_sublex_done(pTHX)
79072805 1795{
27da23d5 1796 dVAR;
3280af22 1797 if (!PL_lex_starts++) {
396482e1 1798 SV * const sv = newSVpvs("");
9aa983d2
JH
1799 if (SvUTF8(PL_linestr))
1800 SvUTF8_on(sv);
3280af22 1801 PL_expect = XOPERATOR;
6154021b 1802 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1803 return THING;
1804 }
1805
3280af22
NIS
1806 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1807 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1808 return yylex();
79072805
LW
1809 }
1810
ffb4593c 1811 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1812 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1813 PL_linestr = PL_lex_repl;
1814 PL_lex_inpat = 0;
1815 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1816 PL_bufend += SvCUR(PL_linestr);
bd61b366 1817 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1818 SAVEFREESV(PL_linestr);
1819 PL_lex_dojoin = FALSE;
1820 PL_lex_brackets = 0;
3280af22
NIS
1821 PL_lex_casemods = 0;
1822 *PL_lex_casestack = '\0';
1823 PL_lex_starts = 0;
25da4f38 1824 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1825 PL_lex_state = LEX_INTERPNORMAL;
1826 PL_lex_starts++;
e9fa98b2
HS
1827 /* we don't clear PL_lex_repl here, so that we can check later
1828 whether this is an evalled subst; that means we rely on the
1829 logic to ensure sublex_done() is called again only via the
1830 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1831 }
e9fa98b2 1832 else {
3280af22 1833 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1834 PL_lex_repl = NULL;
e9fa98b2 1835 }
79072805 1836 return ',';
ffed7fef
LW
1837 }
1838 else {
5db06880
NC
1839#ifdef PERL_MAD
1840 if (PL_madskills) {
cd81e915
NC
1841 if (PL_thiswhite) {
1842 if (!PL_endwhite)
6b29d1f5 1843 PL_endwhite = newSVpvs("");
cd81e915
NC
1844 sv_catsv(PL_endwhite, PL_thiswhite);
1845 PL_thiswhite = 0;
1846 }
1847 if (PL_thistoken)
1848 sv_setpvn(PL_thistoken,"",0);
5db06880 1849 else
cd81e915 1850 PL_realtokenstart = -1;
5db06880
NC
1851 }
1852#endif
f46d017c 1853 LEAVE;
3280af22
NIS
1854 PL_bufend = SvPVX(PL_linestr);
1855 PL_bufend += SvCUR(PL_linestr);
1856 PL_expect = XOPERATOR;
09bef843 1857 PL_sublex_info.sub_inwhat = 0;
79072805 1858 return ')';
ffed7fef
LW
1859 }
1860}
1861
02aa26ce
NT
1862/*
1863 scan_const
1864
1865 Extracts a pattern, double-quoted string, or transliteration. This
1866 is terrifying code.
1867
94def140 1868 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1869 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1870 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1871
94def140
TS
1872 Returns a pointer to the character scanned up to. If this is
1873 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 1874 successfully parsed), will leave an OP for the substring scanned
6154021b 1875 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
1876 by looking at the next characters herself.
1877
02aa26ce
NT
1878 In patterns:
1879 backslashes:
1880 double-quoted style: \r and \n
1881 regexp special ones: \D \s
94def140
TS
1882 constants: \x31
1883 backrefs: \1
02aa26ce
NT
1884 case and quoting: \U \Q \E
1885 stops on @ and $, but not for $ as tail anchor
1886
1887 In transliterations:
1888 characters are VERY literal, except for - not at the start or end
94def140
TS
1889 of the string, which indicates a range. If the range is in bytes,
1890 scan_const expands the range to the full set of intermediate
1891 characters. If the range is in utf8, the hyphen is replaced with
1892 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1893
1894 In double-quoted strings:
1895 backslashes:
1896 double-quoted style: \r and \n
94def140
TS
1897 constants: \x31
1898 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1899 case and quoting: \U \Q \E
1900 stops on @ and $
1901
1902 scan_const does *not* construct ops to handle interpolated strings.
1903 It stops processing as soon as it finds an embedded $ or @ variable
1904 and leaves it to the caller to work out what's going on.
1905
94def140
TS
1906 embedded arrays (whether in pattern or not) could be:
1907 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1908
1909 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1910
1911 $ in pattern could be $foo or could be tail anchor. Assumption:
1912 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1913 followed by one of "()| \r\n\t"
02aa26ce
NT
1914
1915 \1 (backreferences) are turned into $1
1916
1917 The structure of the code is
1918 while (there's a character to process) {
94def140
TS
1919 handle transliteration ranges
1920 skip regexp comments /(?#comment)/ and codes /(?{code})/
1921 skip #-initiated comments in //x patterns
1922 check for embedded arrays
02aa26ce
NT
1923 check for embedded scalars
1924 if (backslash) {
94def140
TS
1925 leave intact backslashes from leaveit (below)
1926 deprecate \1 in substitution replacements
02aa26ce
NT
1927 handle string-changing backslashes \l \U \Q \E, etc.
1928 switch (what was escaped) {
94def140
TS
1929 handle \- in a transliteration (becomes a literal -)
1930 handle \132 (octal characters)
1931 handle \x15 and \x{1234} (hex characters)
1932 handle \N{name} (named characters)
1933 handle \cV (control characters)
1934 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce
NT
1935 } (end switch)
1936 } (end if backslash)
1937 } (end while character to read)
4e553d73 1938
02aa26ce
NT
1939*/
1940
76e3520e 1941STATIC char *
cea2e8a9 1942S_scan_const(pTHX_ char *start)
79072805 1943{
97aff369 1944 dVAR;
3280af22 1945 register char *send = PL_bufend; /* end of the constant */
561b68a9 1946 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1947 register char *s = start; /* start of the constant */
1948 register char *d = SvPVX(sv); /* destination for copies */
1949 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1950 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1951 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1952 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1953 UV uv;
4c3a8340
TS
1954#ifdef EBCDIC
1955 UV literal_endpoint = 0;
e294cc5d 1956 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1957#endif
012bcf8d 1958
7918f24d
NC
1959 PERL_ARGS_ASSERT_SCAN_CONST;
1960
2b9d42f0
NIS
1961 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1962 /* If we are doing a trans and we know we want UTF8 set expectation */
1963 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1964 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1965 }
1966
1967
79072805 1968 while (s < send || dorange) {
02aa26ce 1969 /* get transliterations out of the way (they're most literal) */
3280af22 1970 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1971 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1972 if (dorange) {
1ba5c669
JH
1973 I32 i; /* current expanded character */
1974 I32 min; /* first character in range */
1975 I32 max; /* last character in range */
02aa26ce 1976
e294cc5d
JH
1977#ifdef EBCDIC
1978 UV uvmax = 0;
1979#endif
1980
1981 if (has_utf8
1982#ifdef EBCDIC
1983 && !native_range
1984#endif
1985 ) {
9d4ba2ae 1986 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1987 char *e = d++;
1988 while (e-- > c)
1989 *(e + 1) = *e;
25716404 1990 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1991 /* mark the range as done, and continue */
1992 dorange = FALSE;
1993 didrange = TRUE;
1994 continue;
1995 }
2b9d42f0 1996
95a20fc0 1997 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1998#ifdef EBCDIC
1999 SvGROW(sv,
2000 SvLEN(sv) + (has_utf8 ?
2001 (512 - UTF_CONTINUATION_MARK +
2002 UNISKIP(0x100))
2003 : 256));
2004 /* How many two-byte within 0..255: 128 in UTF-8,
2005 * 96 in UTF-8-mod. */
2006#else
9cbb5ea2 2007 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2008#endif
9cbb5ea2 2009 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2010#ifdef EBCDIC
2011 if (has_utf8) {
2012 int j;
2013 for (j = 0; j <= 1; j++) {
2014 char * const c = (char*)utf8_hop((U8*)d, -1);
2015 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2016 if (j)
2017 min = (U8)uv;
2018 else if (uv < 256)
2019 max = (U8)uv;
2020 else {
2021 max = (U8)0xff; /* only to \xff */
2022 uvmax = uv; /* \x{100} to uvmax */
2023 }
2024 d = c; /* eat endpoint chars */
2025 }
2026 }
2027 else {
2028#endif
2029 d -= 2; /* eat the first char and the - */
2030 min = (U8)*d; /* first char in range */
2031 max = (U8)d[1]; /* last char in range */
2032#ifdef EBCDIC
2033 }
2034#endif
8ada0baa 2035
c2e66d9e 2036 if (min > max) {
01ec43d0 2037 Perl_croak(aTHX_
d1573ac7 2038 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2039 (char)min, (char)max);
c2e66d9e
GS
2040 }
2041
c7f1f016 2042#ifdef EBCDIC
4c3a8340
TS
2043 if (literal_endpoint == 2 &&
2044 ((isLOWER(min) && isLOWER(max)) ||
2045 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2046 if (isLOWER(min)) {
2047 for (i = min; i <= max; i++)
2048 if (isLOWER(i))
db42d148 2049 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2050 } else {
2051 for (i = min; i <= max; i++)
2052 if (isUPPER(i))
db42d148 2053 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2054 }
2055 }
2056 else
2057#endif
2058 for (i = min; i <= max; i++)
e294cc5d
JH
2059#ifdef EBCDIC
2060 if (has_utf8) {
2061 const U8 ch = (U8)NATIVE_TO_UTF(i);
2062 if (UNI_IS_INVARIANT(ch))
2063 *d++ = (U8)i;
2064 else {
2065 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2066 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2067 }
2068 }
2069 else
2070#endif
2071 *d++ = (char)i;
2072
2073#ifdef EBCDIC
2074 if (uvmax) {
2075 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2076 if (uvmax > 0x101)
2077 *d++ = (char)UTF_TO_NATIVE(0xff);
2078 if (uvmax > 0x100)
2079 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2080 }
2081#endif
02aa26ce
NT
2082
2083 /* mark the range as done, and continue */
79072805 2084 dorange = FALSE;
01ec43d0 2085 didrange = TRUE;
4c3a8340
TS
2086#ifdef EBCDIC
2087 literal_endpoint = 0;
2088#endif
79072805 2089 continue;
4e553d73 2090 }
02aa26ce
NT
2091
2092 /* range begins (ignore - as first or last char) */
79072805 2093 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2094 if (didrange) {
1fafa243 2095 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2096 }
e294cc5d
JH
2097 if (has_utf8
2098#ifdef EBCDIC
2099 && !native_range
2100#endif
2101 ) {
25716404 2102 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2103 s++;
2104 continue;
2105 }
79072805
LW
2106 dorange = TRUE;
2107 s++;
01ec43d0
GS
2108 }
2109 else {
2110 didrange = FALSE;
4c3a8340
TS
2111#ifdef EBCDIC
2112 literal_endpoint = 0;
e294cc5d 2113 native_range = TRUE;
4c3a8340 2114#endif
01ec43d0 2115 }
79072805 2116 }
02aa26ce
NT
2117
2118 /* if we get here, we're not doing a transliteration */
2119
0f5d15d6
IZ
2120 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2121 except for the last char, which will be done separately. */
3280af22 2122 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2123 if (s[2] == '#') {
e994fd66 2124 while (s+1 < send && *s != ')')
db42d148 2125 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2126 }
2127 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2128 || (s[2] == '?' && s[3] == '{'))
155aba94 2129 {
cc6b7395 2130 I32 count = 1;
0f5d15d6 2131 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2132 char c;
2133
d9f97599
GS
2134 while (count && (c = *regparse)) {
2135 if (c == '\\' && regparse[1])
2136 regparse++;
4e553d73 2137 else if (c == '{')
cc6b7395 2138 count++;
4e553d73 2139 else if (c == '}')
cc6b7395 2140 count--;
d9f97599 2141 regparse++;
cc6b7395 2142 }
e994fd66 2143 if (*regparse != ')')
5bdf89e7 2144 regparse--; /* Leave one char for continuation. */
0f5d15d6 2145 while (s < regparse)
db42d148 2146 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2147 }
748a9306 2148 }
02aa26ce
NT
2149
2150 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2151 else if (*s == '#' && PL_lex_inpat &&
2152 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2153 while (s+1 < send && *s != '\n')
db42d148 2154 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2155 }
02aa26ce 2156
5d1d4326 2157 /* check for embedded arrays
da6eedaa 2158 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2159 */
1749ea0d
TS
2160 else if (*s == '@' && s[1]) {
2161 if (isALNUM_lazy_if(s+1,UTF))
2162 break;
2163 if (strchr(":'{$", s[1]))
2164 break;
2165 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2166 break; /* in regexp, neither @+ nor @- are interpolated */
2167 }
02aa26ce
NT
2168
2169 /* check for embedded scalars. only stop if we're sure it's a
2170 variable.
2171 */
79072805 2172 else if (*s == '$') {
3280af22 2173 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2174 break;
6002328a 2175 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
2176 break; /* in regexp, $ might be tail anchor */
2177 }
02aa26ce 2178
2b9d42f0
NIS
2179 /* End of else if chain - OP_TRANS rejoin rest */
2180
02aa26ce 2181 /* backslashes */
79072805
LW
2182 if (*s == '\\' && s+1 < send) {
2183 s++;
02aa26ce 2184
02aa26ce 2185 /* deprecate \1 in strings and substitution replacements */
3280af22 2186 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2187 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2188 {
599cee73 2189 if (ckWARN(WARN_SYNTAX))
9014280d 2190 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2191 *--s = '$';
2192 break;
2193 }
02aa26ce
NT
2194
2195 /* string-change backslash escapes */
3280af22 2196 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2197 --s;
2198 break;
2199 }
cc74c5bd
TS
2200 /* skip any other backslash escapes in a pattern */
2201 else if (PL_lex_inpat) {
2202 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2203 goto default_action;
2204 }
02aa26ce
NT
2205
2206 /* if we get here, it's either a quoted -, or a digit */
79072805 2207 switch (*s) {
02aa26ce
NT
2208
2209 /* quoted - in transliterations */
79072805 2210 case '-':
3280af22 2211 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2212 *d++ = *s++;
2213 continue;
2214 }
2215 /* FALL THROUGH */
2216 default:
11b8faa4 2217 {
86f97054 2218 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 2219 ckWARN(WARN_MISC))
9014280d 2220 Perl_warner(aTHX_ packWARN(WARN_MISC),
e294cc5d
JH
2221 "Unrecognized escape \\%c passed through",
2222 *s);
11b8faa4 2223 /* default action is to copy the quoted character */
f9a63242 2224 goto default_action;
11b8faa4 2225 }
02aa26ce
NT
2226
2227 /* \132 indicates an octal constant */
79072805
LW
2228 case '0': case '1': case '2': case '3':
2229 case '4': case '5': case '6': case '7':
ba210ebe 2230 {
53305cf1
NC
2231 I32 flags = 0;
2232 STRLEN len = 3;
2233 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2234 s += len;
2235 }
012bcf8d 2236 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2237
2238 /* \x24 indicates a hex constant */
79072805 2239 case 'x':
a0ed51b3
LW
2240 ++s;
2241 if (*s == '{') {
9d4ba2ae 2242 char* const e = strchr(s, '}');
a4c04bdc
NC
2243 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2244 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2245 STRLEN len;
355860ce 2246
53305cf1 2247 ++s;
adaeee49 2248 if (!e) {
a0ed51b3 2249 yyerror("Missing right brace on \\x{}");
355860ce 2250 continue;
ba210ebe 2251 }
53305cf1
NC
2252 len = e - s;
2253 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2254 s = e + 1;
a0ed51b3
LW
2255 }
2256 else {
ba210ebe 2257 {
53305cf1 2258 STRLEN len = 2;
a4c04bdc 2259 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2260 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2261 s += len;
2262 }
012bcf8d
GS
2263 }
2264
2265 NUM_ESCAPE_INSERT:
2266 /* Insert oct or hex escaped character.
301d3d20 2267 * There will always enough room in sv since such
db42d148 2268 * escapes will be longer than any UTF-8 sequence
301d3d20 2269 * they can end up as. */
ba7cea30 2270
c7f1f016
NIS
2271 /* We need to map to chars to ASCII before doing the tests
2272 to cover EBCDIC
2273 */
c4d5f83a 2274 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2275 if (!has_utf8 && uv > 255) {
301d3d20
JH
2276 /* Might need to recode whatever we have
2277 * accumulated so far if it contains any
2278 * hibit chars.
2279 *
2280 * (Can't we keep track of that and avoid
2281 * this rescan? --jhi)
012bcf8d 2282 */
c7f1f016 2283 int hicount = 0;
63cd0674
NIS
2284 U8 *c;
2285 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2286 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2287 hicount++;
db42d148 2288 }
012bcf8d 2289 }
63cd0674 2290 if (hicount) {
9d4ba2ae 2291 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2292 U8 *src, *dst;
2293 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2294 src = (U8 *)d - 1;
2295 dst = src+hicount;
2296 d += hicount;
cfd0369c 2297 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2298 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2299 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2300 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2301 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2302 }
2303 else {
63cd0674 2304 *dst-- = *src;
012bcf8d 2305 }
c7f1f016 2306 src--;
012bcf8d
GS
2307 }
2308 }
2309 }
2310
9aa983d2 2311 if (has_utf8 || uv > 255) {
9041c2e3 2312 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2313 has_utf8 = TRUE;
f9a63242
JH
2314 if (PL_lex_inwhat == OP_TRANS &&
2315 PL_sublex_info.sub_op) {
2316 PL_sublex_info.sub_op->op_private |=
2317 (PL_lex_repl ? OPpTRANS_FROM_UTF
2318 : OPpTRANS_TO_UTF);
f9a63242 2319 }
e294cc5d
JH
2320#ifdef EBCDIC
2321 if (uv > 255 && !dorange)
2322 native_range = FALSE;
2323#endif
012bcf8d 2324 }
a0ed51b3 2325 else {
012bcf8d 2326 *d++ = (char)uv;
a0ed51b3 2327 }
012bcf8d
GS
2328 }
2329 else {
c4d5f83a 2330 *d++ = (char) uv;
a0ed51b3 2331 }
79072805 2332 continue;
02aa26ce 2333
b239daa5 2334 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2335 case 'N':
55eda711 2336 ++s;
423cee85
JH
2337 if (*s == '{') {
2338 char* e = strchr(s, '}');
155aba94 2339 SV *res;
423cee85 2340 STRLEN len;
cfd0369c 2341 const char *str;
4e553d73 2342
423cee85 2343 if (!e) {
5777a3f7 2344 yyerror("Missing right brace on \\N{}");
423cee85
JH
2345 e = s - 1;
2346 goto cont_scan;
2347 }
dbc0d4f2
JH
2348 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2349 /* \N{U+...} */
2350 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2351 PERL_SCAN_DISALLOW_PREFIX;
2352 s += 3;
2353 len = e - s;
2354 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2355 if ( e > s && len != (STRLEN)(e - s) ) {
2356 uv = 0xFFFD;
fc8cd66c 2357 }
dbc0d4f2
JH
2358 s = e + 1;
2359 goto NUM_ESCAPE_INSERT;
2360 }
55eda711 2361 res = newSVpvn(s + 1, e - s - 1);
bd61b366 2362 res = new_constant( NULL, 0, "charnames",
eb0d8d16 2363 res, NULL, s - 2, e - s + 3 );
f9a63242
JH
2364 if (has_utf8)
2365 sv_utf8_upgrade(res);
cfd0369c 2366 str = SvPV_const(res,len);
1c47067b
JH
2367#ifdef EBCDIC_NEVER_MIND
2368 /* charnames uses pack U and that has been
2369 * recently changed to do the below uni->native
2370 * mapping, so this would be redundant (and wrong,
2371 * the code point would be doubly converted).
2372 * But leave this in just in case the pack U change
2373 * gets revoked, but the semantics is still
2374 * desireable for charnames. --jhi */
cddc7ef4 2375 {
cfd0369c 2376 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2377
2378 if (uv < 0x100) {
89ebb4a3 2379 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2380
2381 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2382 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2383 str = SvPV_const(res, len);
cddc7ef4
JH
2384 }
2385 }
2386#endif
89491803 2387 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2388 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2389 SvCUR_set(sv, d - ostart);
2390 SvPOK_on(sv);
e4f3eed8 2391 *d = '\0';
f08d6ad9 2392 sv_utf8_upgrade(sv);
d2f449dd 2393 /* this just broke our allocation above... */
eb160463 2394 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2395 d = SvPVX(sv) + SvCUR(sv);
89491803 2396 has_utf8 = TRUE;
f08d6ad9 2397 }
eb160463 2398 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2399 const char * const odest = SvPVX_const(sv);
423cee85 2400
8973db79 2401 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2402 d = SvPVX(sv) + (d - odest);
2403 }
e294cc5d
JH
2404#ifdef EBCDIC
2405 if (!dorange)
2406 native_range = FALSE; /* \N{} is guessed to be Unicode */
2407#endif
423cee85
JH
2408 Copy(str, d, len, char);
2409 d += len;
2410 SvREFCNT_dec(res);
2411 cont_scan:
2412 s = e + 1;
2413 }
2414 else
5777a3f7 2415 yyerror("Missing braces on \\N{}");
423cee85
JH
2416 continue;
2417
02aa26ce 2418 /* \c is a control character */
79072805
LW
2419 case 'c':
2420 s++;
961ce445 2421 if (s < send) {
ba210ebe 2422 U8 c = *s++;
c7f1f016
NIS
2423#ifdef EBCDIC
2424 if (isLOWER(c))
2425 c = toUPPER(c);
2426#endif
db42d148 2427 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2428 }
961ce445
RGS
2429 else {
2430 yyerror("Missing control char name in \\c");
2431 }
79072805 2432 continue;
02aa26ce
NT
2433
2434 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2435 case 'b':
db42d148 2436 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2437 break;
2438 case 'n':
db42d148 2439 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2440 break;
2441 case 'r':
db42d148 2442 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2443 break;
2444 case 'f':
db42d148 2445 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2446 break;
2447 case 't':
db42d148 2448 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2449 break;
34a3fe2a 2450 case 'e':
db42d148 2451 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2452 break;
2453 case 'a':
db42d148 2454 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2455 break;
02aa26ce
NT
2456 } /* end switch */
2457
79072805
LW
2458 s++;
2459 continue;
02aa26ce 2460 } /* end if (backslash) */
4c3a8340
TS
2461#ifdef EBCDIC
2462 else
2463 literal_endpoint++;
2464#endif
02aa26ce 2465
f9a63242 2466 default_action:
2b9d42f0
NIS
2467 /* If we started with encoded form, or already know we want it
2468 and then encode the next character */
2469 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2470 STRLEN len = 1;
5f66b61c
AL
2471 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2472 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2473 s += len;
2474 if (need > len) {
2475 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2476 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2477 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2478 }
5f66b61c 2479 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0 2480 has_utf8 = TRUE;
e294cc5d
JH
2481#ifdef EBCDIC
2482 if (uv > 255 && !dorange)
2483 native_range = FALSE;
2484#endif
2b9d42f0
NIS
2485 }
2486 else {
2487 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2488 }
02aa26ce
NT
2489 } /* while loop to process each character */
2490
2491 /* terminate the string and set up the sv */
79072805 2492 *d = '\0';
95a20fc0 2493 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2494 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2495 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2496
79072805 2497 SvPOK_on(sv);
9f4817db 2498 if (PL_encoding && !has_utf8) {
d0063567
DK
2499 sv_recode_to_utf8(sv, PL_encoding);
2500 if (SvUTF8(sv))
2501 has_utf8 = TRUE;
9f4817db 2502 }
2b9d42f0 2503 if (has_utf8) {
7e2040f0 2504 SvUTF8_on(sv);
2b9d42f0 2505 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2506 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2507 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2508 }
2509 }
79072805 2510
02aa26ce 2511 /* shrink the sv if we allocated more than we used */
79072805 2512 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2513 SvPV_shrink_to_cur(sv);
79072805 2514 }
02aa26ce 2515
6154021b 2516 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 2517 if (s > PL_bufptr) {
eb0d8d16
NC
2518 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2519 const char *const key = PL_lex_inpat ? "qr" : "q";
2520 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2521 const char *type;
2522 STRLEN typelen;
2523
2524 if (PL_lex_inwhat == OP_TRANS) {
2525 type = "tr";
2526 typelen = 2;
2527 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2528 type = "s";
2529 typelen = 1;
2530 } else {
2531 type = "qq";
2532 typelen = 2;
2533 }
2534
2535 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2536 type, typelen);
2537 }
6154021b 2538 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2539 } else
8990e307 2540 SvREFCNT_dec(sv);
79072805
LW
2541 return s;
2542}
2543
ffb4593c
NT
2544/* S_intuit_more
2545 * Returns TRUE if there's more to the expression (e.g., a subscript),
2546 * FALSE otherwise.
ffb4593c
NT
2547 *
2548 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2549 *
2550 * ->[ and ->{ return TRUE
2551 * { and [ outside a pattern are always subscripts, so return TRUE
2552 * if we're outside a pattern and it's not { or [, then return FALSE
2553 * if we're in a pattern and the first char is a {
2554 * {4,5} (any digits around the comma) returns FALSE
2555 * if we're in a pattern and the first char is a [
2556 * [] returns FALSE
2557 * [SOMETHING] has a funky algorithm to decide whether it's a
2558 * character class or not. It has to deal with things like
2559 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2560 * anything else returns TRUE
2561 */
2562
9cbb5ea2
GS
2563/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2564
76e3520e 2565STATIC int
cea2e8a9 2566S_intuit_more(pTHX_ register char *s)
79072805 2567{
97aff369 2568 dVAR;
7918f24d
NC
2569
2570 PERL_ARGS_ASSERT_INTUIT_MORE;
2571
3280af22 2572 if (PL_lex_brackets)
79072805
LW
2573 return TRUE;
2574 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2575 return TRUE;
2576 if (*s != '{' && *s != '[')
2577 return FALSE;
3280af22 2578 if (!PL_lex_inpat)
79072805
LW
2579 return TRUE;
2580
2581 /* In a pattern, so maybe we have {n,m}. */
2582 if (*s == '{') {
2583 s++;
2584 if (!isDIGIT(*s))
2585 return TRUE;
2586 while (isDIGIT(*s))
2587 s++;
2588 if (*s == ',')
2589 s++;
2590 while (isDIGIT(*s))
2591 s++;
2592 if (*s == '}')
2593 return FALSE;
2594 return TRUE;
2595
2596 }
2597
2598 /* On the other hand, maybe we have a character class */
2599
2600 s++;
2601 if (*s == ']' || *s == '^')
2602 return FALSE;
2603 else {
ffb4593c 2604 /* this is terrifying, and it works */
79072805
LW
2605 int weight = 2; /* let's weigh the evidence */
2606 char seen[256];
f27ffc4a 2607 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2608 const char * const send = strchr(s,']');
3280af22 2609 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2610
2611 if (!send) /* has to be an expression */
2612 return TRUE;
2613
2614 Zero(seen,256,char);
2615 if (*s == '$')
2616 weight -= 3;
2617 else if (isDIGIT(*s)) {
2618 if (s[1] != ']') {
2619 if (isDIGIT(s[1]) && s[2] == ']')
2620 weight -= 10;
2621 }
2622 else
2623 weight -= 100;
2624 }
2625 for (; s < send; s++) {
2626 last_un_char = un_char;
2627 un_char = (unsigned char)*s;
2628 switch (*s) {
2629 case '@':
2630 case '&':
2631 case '$':
2632 weight -= seen[un_char] * 10;
7e2040f0 2633 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2634 int len;
8903cb82 2635 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2636 len = (int)strlen(tmpbuf);
2637 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2638 weight -= 100;
2639 else
2640 weight -= 10;
2641 }
2642 else if (*s == '$' && s[1] &&
93a17b20
LW
2643 strchr("[#!%*<>()-=",s[1])) {
2644 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2645 weight -= 10;
2646 else
2647 weight -= 1;
2648 }
2649 break;
2650 case '\\':
2651 un_char = 254;
2652 if (s[1]) {
93a17b20 2653 if (strchr("wds]",s[1]))
79072805 2654 weight += 100;
10edeb5d 2655 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2656 weight += 1;
93a17b20 2657 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2658 weight += 40;
2659 else if (isDIGIT(s[1])) {
2660 weight += 40;
2661 while (s[1] && isDIGIT(s[1]))
2662 s++;
2663 }
2664 }
2665 else
2666 weight += 100;
2667 break;
2668 case '-':
2669 if (s[1] == '\\')
2670 weight += 50;
93a17b20 2671 if (strchr("aA01! ",last_un_char))
79072805 2672 weight += 30;
93a17b20 2673 if (strchr("zZ79~",s[1]))
79072805 2674 weight += 30;
f27ffc4a
GS
2675 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2676 weight -= 5; /* cope with negative subscript */
79072805
LW
2677 break;
2678 default:
3792a11b
NC
2679 if (!isALNUM(last_un_char)
2680 && !(last_un_char == '$' || last_un_char == '@'
2681 || last_un_char == '&')
2682 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2683 char *d = tmpbuf;
2684 while (isALPHA(*s))
2685 *d++ = *s++;
2686 *d = '\0';
5458a98a 2687 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
2688 weight -= 150;
2689 }
2690 if (un_char == last_un_char + 1)
2691 weight += 5;
2692 weight -= seen[un_char];
2693 break;
2694 }
2695 seen[un_char]++;
2696 }
2697 if (weight >= 0) /* probably a character class */
2698 return FALSE;
2699 }
2700
2701 return TRUE;
2702}
ffed7fef 2703
ffb4593c
NT
2704/*
2705 * S_intuit_method
2706 *
2707 * Does all the checking to disambiguate
2708 * foo bar
2709 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2710 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2711 *
2712 * First argument is the stuff after the first token, e.g. "bar".
2713 *
2714 * Not a method if bar is a filehandle.
2715 * Not a method if foo is a subroutine prototyped to take a filehandle.
2716 * Not a method if it's really "Foo $bar"
2717 * Method if it's "foo $bar"
2718 * Not a method if it's really "print foo $bar"
2719 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2720 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2721 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2722 * =>
2723 */
2724
76e3520e 2725STATIC int
62d55b22 2726S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2727{
97aff369 2728 dVAR;
a0d0e21e 2729 char *s = start + (*start == '$');
3280af22 2730 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2731 STRLEN len;
2732 GV* indirgv;
5db06880
NC
2733#ifdef PERL_MAD
2734 int soff;
2735#endif
a0d0e21e 2736
7918f24d
NC
2737 PERL_ARGS_ASSERT_INTUIT_METHOD;
2738
a0d0e21e 2739 if (gv) {
62d55b22 2740 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2741 return 0;
62d55b22
NC
2742 if (cv) {
2743 if (SvPOK(cv)) {
2744 const char *proto = SvPVX_const(cv);
2745 if (proto) {
2746 if (*proto == ';')
2747 proto++;
2748 if (*proto == '*')
2749 return 0;
2750 }
b6c543e3
IZ
2751 }
2752 } else
c35e046a 2753 gv = NULL;
a0d0e21e 2754 }
8903cb82 2755 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2756 /* start is the beginning of the possible filehandle/object,
2757 * and s is the end of it
2758 * tmpbuf is a copy of it
2759 */
2760
a0d0e21e 2761 if (*start == '$') {
3ef1310e
RGS
2762 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2763 isUPPER(*PL_tokenbuf))
a0d0e21e 2764 return 0;
5db06880
NC
2765#ifdef PERL_MAD
2766 len = start - SvPVX(PL_linestr);
2767#endif
29595ff2 2768 s = PEEKSPACE(s);
f0092767 2769#ifdef PERL_MAD
5db06880
NC
2770 start = SvPVX(PL_linestr) + len;
2771#endif
3280af22
NIS
2772 PL_bufptr = start;
2773 PL_expect = XREF;
a0d0e21e
LW
2774 return *s == '(' ? FUNCMETH : METHOD;
2775 }
5458a98a 2776 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
2777 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2778 len -= 2;
2779 tmpbuf[len] = '\0';
5db06880
NC
2780#ifdef PERL_MAD
2781 soff = s - SvPVX(PL_linestr);
2782#endif
c3e0f903
GS
2783 goto bare_package;
2784 }
90e5519e 2785 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2786 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2787 return 0;
2788 /* filehandle or package name makes it a method */
da51bb9b 2789 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
2790#ifdef PERL_MAD
2791 soff = s - SvPVX(PL_linestr);
2792#endif
29595ff2 2793 s = PEEKSPACE(s);
3280af22 2794 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2795 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2796 bare_package:
cd81e915 2797 start_force(PL_curforce);
9ded7720 2798 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2799 newSVpvn(tmpbuf,len));
9ded7720 2800 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2801 if (PL_madskills)
2802 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2803 PL_expect = XTERM;
a0d0e21e 2804 force_next(WORD);
3280af22 2805 PL_bufptr = s;
5db06880
NC
2806#ifdef PERL_MAD
2807 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2808#endif
a0d0e21e
LW
2809 return *s == '(' ? FUNCMETH : METHOD;
2810 }
2811 }
2812 return 0;
2813}
2814
16d20bd9 2815/* Encoded script support. filter_add() effectively inserts a
4e553d73 2816 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2817 * Note that the filter function only applies to the current source file
2818 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2819 *
2820 * The datasv parameter (which may be NULL) can be used to pass
2821 * private data to this instance of the filter. The filter function
2822 * can recover the SV using the FILTER_DATA macro and use it to
2823 * store private buffers and state information.
2824 *
2825 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2826 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2827 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2828 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2829 * private use must be set using malloc'd pointers.
2830 */
16d20bd9
AD
2831
2832SV *
864dbfa3 2833Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2834{
97aff369 2835 dVAR;
f4c556ac 2836 if (!funcp)
a0714e2c 2837 return NULL;
f4c556ac 2838
5486870f
DM
2839 if (!PL_parser)
2840 return NULL;
2841
3280af22
NIS
2842 if (!PL_rsfp_filters)
2843 PL_rsfp_filters = newAV();
16d20bd9 2844 if (!datasv)
561b68a9 2845 datasv = newSV(0);
862a34c6 2846 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2847 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2848 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2849 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2850 FPTR2DPTR(void *, IoANY(datasv)),
2851 SvPV_nolen(datasv)));
3280af22
NIS
2852 av_unshift(PL_rsfp_filters, 1);
2853 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2854 return(datasv);
2855}
4e553d73 2856
16d20bd9
AD
2857
2858/* Delete most recently added instance of this filter function. */
a0d0e21e 2859void
864dbfa3 2860Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2861{
97aff369 2862 dVAR;
e0c19803 2863 SV *datasv;
24801a4b 2864
7918f24d
NC
2865 PERL_ARGS_ASSERT_FILTER_DEL;
2866
33073adb 2867#ifdef DEBUGGING
55662e27
JH
2868 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2869 FPTR2DPTR(void*, funcp)));
33073adb 2870#endif
5486870f 2871 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2872 return;
2873 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2874 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2875 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2876 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2877 IoANY(datasv) = (void *)NULL;
3280af22 2878 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2879
16d20bd9
AD
2880 return;
2881 }
2882 /* we need to search for the correct entry and clear it */
cea2e8a9 2883 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2884}
2885
2886
1de9afcd
RGS
2887/* Invoke the idxth filter function for the current rsfp. */
2888/* maxlen 0 = read one text line */
16d20bd9 2889I32
864dbfa3 2890Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2891{
97aff369 2892 dVAR;
16d20bd9
AD
2893 filter_t funcp;
2894 SV *datasv = NULL;
f482118e
NC
2895 /* This API is bad. It should have been using unsigned int for maxlen.
2896 Not sure if we want to change the API, but if not we should sanity
2897 check the value here. */
39cd7a59
NC
2898 const unsigned int correct_length
2899 = maxlen < 0 ?
2900#ifdef PERL_MICRO
2901 0x7FFFFFFF
2902#else
2903 INT_MAX
2904#endif
2905 : maxlen;
e50aee73 2906
7918f24d
NC
2907 PERL_ARGS_ASSERT_FILTER_READ;
2908
5486870f 2909 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 2910 return -1;
1de9afcd 2911 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2912 /* Provide a default input filter to make life easy. */
2913 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2914 DEBUG_P(PerlIO_printf(Perl_debug_log,
2915 "filter_read %d: from rsfp\n", idx));
f482118e 2916 if (correct_length) {
16d20bd9
AD
2917 /* Want a block */
2918 int len ;
f54cb97a 2919 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2920
2921 /* ensure buf_sv is large enough */
f482118e
NC
2922 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2923 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2924 correct_length)) <= 0) {
3280af22 2925 if (PerlIO_error(PL_rsfp))
37120919
AD
2926 return -1; /* error */
2927 else
2928 return 0 ; /* end of file */
2929 }
16d20bd9
AD
2930 SvCUR_set(buf_sv, old_len + len) ;
2931 } else {
2932 /* Want a line */
3280af22
NIS
2933 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2934 if (PerlIO_error(PL_rsfp))
37120919
AD
2935 return -1; /* error */
2936 else
2937 return 0 ; /* end of file */
2938 }
16d20bd9
AD
2939 }
2940 return SvCUR(buf_sv);
2941 }
2942 /* Skip this filter slot if filter has been deleted */
1de9afcd 2943 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2944 DEBUG_P(PerlIO_printf(Perl_debug_log,
2945 "filter_read %d: skipped (filter deleted)\n",
2946 idx));
f482118e 2947 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2948 }
2949 /* Get function pointer hidden within datasv */
8141890a 2950 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2951 DEBUG_P(PerlIO_printf(Perl_debug_log,
2952 "filter_read %d: via function %p (%s)\n",
ca0270c4 2953 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2954 /* Call function. The function is expected to */
2955 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2956 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2957 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2958}
2959
76e3520e 2960STATIC char *
cea2e8a9 2961S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2962{
97aff369 2963 dVAR;
7918f24d
NC
2964
2965 PERL_ARGS_ASSERT_FILTER_GETS;
2966
c39cd008 2967#ifdef PERL_CR_FILTER
3280af22 2968 if (!PL_rsfp_filters) {
c39cd008 2969 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2970 }
2971#endif
3280af22 2972 if (PL_rsfp_filters) {
55497cff 2973 if (!append)
2974 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2975 if (FILTER_READ(0, sv, 0) > 0)
2976 return ( SvPVX(sv) ) ;
2977 else
bd61b366 2978 return NULL ;
16d20bd9 2979 }
9d116dd7 2980 else
fd049845 2981 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2982}
2983
01ec43d0 2984STATIC HV *
9bde8eb0 2985S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 2986{
97aff369 2987 dVAR;
def3634b
GS
2988 GV *gv;
2989
7918f24d
NC
2990 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
2991
01ec43d0 2992 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2993 return PL_curstash;
2994
2995 if (len > 2 &&
2996 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2997 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2998 {
2999 return GvHV(gv); /* Foo:: */
def3634b
GS
3000 }
3001
3002 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3003 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3004 if (gv && GvCV(gv)) {
3005 SV * const sv = cv_const_sv(GvCV(gv));
3006 if (sv)
9bde8eb0 3007 pkgname = SvPV_const(sv, len);
def3634b
GS
3008 }
3009
9bde8eb0 3010 return gv_stashpvn(pkgname, len, 0);
def3634b 3011}
a0d0e21e 3012
e3f73d4e
RGS
3013/*
3014 * S_readpipe_override
3015 * Check whether readpipe() is overriden, and generates the appropriate
3016 * optree, provided sublex_start() is called afterwards.
3017 */
3018STATIC void
1d51329b 3019S_readpipe_override(pTHX)
e3f73d4e
RGS
3020{
3021 GV **gvp;
3022 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 3023 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
3024 if ((gv_readpipe
3025 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3026 ||
3027 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 3028 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
3029 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3030 {
3031 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3032 append_elem(OP_LIST,
3033 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3034 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3035 }
e3f73d4e
RGS
3036}
3037
5db06880
NC
3038#ifdef PERL_MAD
3039 /*
3040 * Perl_madlex
3041 * The intent of this yylex wrapper is to minimize the changes to the
3042 * tokener when we aren't interested in collecting madprops. It remains
3043 * to be seen how successful this strategy will be...
3044 */
3045
3046int
3047Perl_madlex(pTHX)
3048{
3049 int optype;
3050 char *s = PL_bufptr;
3051
cd81e915
NC
3052 /* make sure PL_thiswhite is initialized */
3053 PL_thiswhite = 0;
3054 PL_thismad = 0;
5db06880 3055
cd81e915 3056 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
3057 if (PL_pending_ident)
3058 return S_pending_ident(aTHX);
3059
3060 /* previous token ate up our whitespace? */
cd81e915
NC
3061 if (!PL_lasttoke && PL_nextwhite) {
3062 PL_thiswhite = PL_nextwhite;
3063 PL_nextwhite = 0;
5db06880
NC
3064 }
3065
3066 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3067 PL_realtokenstart = -1;
3068 PL_thistoken = 0;
5db06880
NC
3069 optype = yylex();
3070 s = PL_bufptr;
cd81e915 3071 assert(PL_curforce < 0);
5db06880 3072
cd81e915
NC
3073 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3074 if (!PL_thistoken) {
3075 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3076 PL_thistoken = newSVpvs("");
5db06880 3077 else {
c35e046a 3078 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3079 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3080 }
3081 }
cd81e915
NC
3082 if (PL_thismad) /* install head */
3083 CURMAD('X', PL_thistoken);
5db06880
NC
3084 }
3085
3086 /* last whitespace of a sublex? */
cd81e915
NC
3087 if (optype == ')' && PL_endwhite) {
3088 CURMAD('X', PL_endwhite);
5db06880
NC
3089 }
3090
cd81e915 3091 if (!PL_thismad) {
5db06880
NC
3092
3093 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3094 if (!PL_thiswhite && !PL_endwhite && !optype) {
3095 sv_free(PL_thistoken);
3096 PL_thistoken = 0;
5db06880
NC
3097 return 0;
3098 }
3099
3100 /* put off final whitespace till peg */
3101 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3102 PL_nextwhite = PL_thiswhite;
3103 PL_thiswhite = 0;
5db06880 3104 }
cd81e915
NC
3105 else if (PL_thisopen) {
3106 CURMAD('q', PL_thisopen);
3107 if (PL_thistoken)
3108 sv_free(PL_thistoken);
3109 PL_thistoken = 0;
5db06880
NC
3110 }
3111 else {
3112 /* Store actual token text as madprop X */
cd81e915 3113 CURMAD('X', PL_thistoken);
5db06880
NC
3114 }
3115
cd81e915 3116 if (PL_thiswhite) {
5db06880 3117 /* add preceding whitespace as madprop _ */
cd81e915 3118 CURMAD('_', PL_thiswhite);
5db06880
NC
3119 }
3120
cd81e915 3121 if (PL_thisstuff) {
5db06880 3122 /* add quoted material as madprop = */
cd81e915 3123 CURMAD('=', PL_thisstuff);
5db06880
NC
3124 }
3125
cd81e915 3126 if (PL_thisclose) {
5db06880 3127 /* add terminating quote as madprop Q */
cd81e915 3128 CURMAD('Q', PL_thisclose);
5db06880
NC
3129 }
3130 }
3131
3132 /* special processing based on optype */
3133
3134 switch (optype) {
3135
3136 /* opval doesn't need a TOKEN since it can already store mp */
3137 case WORD:
3138 case METHOD:
3139 case FUNCMETH:
3140 case THING:
3141 case PMFUNC:
3142 case PRIVATEREF:
3143 case FUNC0SUB:
3144 case UNIOPSUB:
3145 case LSTOPSUB:
6154021b
RGS
3146 if (pl_yylval.opval)
3147 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 3148 PL_thismad = 0;
5db06880
NC
3149 return optype;
3150
3151 /* fake EOF */
3152 case 0:
3153 optype = PEG;
cd81e915
NC
3154 if (PL_endwhite) {
3155 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3156 PL_endwhite = 0;
5db06880
NC
3157 }
3158 break;
3159
3160 case ']':
3161 case '}':
cd81e915 3162 if (PL_faketokens)
5db06880
NC
3163 break;
3164 /* remember any fake bracket that lexer is about to discard */
3165 if (PL_lex_brackets == 1 &&
3166 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3167 {
3168 s = PL_bufptr;
3169 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3170 s++;
3171 if (*s == '}') {
cd81e915
NC
3172 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3173 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3174 PL_thiswhite = 0;
5db06880
NC
3175 PL_bufptr = s - 1;
3176 break; /* don't bother looking for trailing comment */
3177 }
3178 else
3179 s = PL_bufptr;
3180 }
3181 if (optype == ']')
3182 break;
3183 /* FALLTHROUGH */
3184
3185 /* attach a trailing comment to its statement instead of next token */
3186 case ';':
cd81e915 3187 if (PL_faketokens)
5db06880
NC
3188 break;
3189 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3190 s = PL_bufptr;
3191 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3192 s++;
3193 if (*s == '\n' || *s == '#') {
3194 while (s < PL_bufend && *s != '\n')
3195 s++;
3196 if (s < PL_bufend)
3197 s++;
cd81e915
NC
3198 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3199 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3200 PL_thiswhite = 0;
5db06880
NC
3201 PL_bufptr = s;
3202 }
3203 }
3204 break;
3205
3206 /* pval */
3207 case LABEL:
3208 break;
3209
3210 /* ival */
3211 default:
3212 break;
3213
3214 }
3215
3216 /* Create new token struct. Note: opvals return early above. */
6154021b 3217 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 3218 PL_thismad = 0;
5db06880
NC
3219 return optype;
3220}
3221#endif
3222
468aa647 3223STATIC char *
cc6ed77d 3224S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3225 dVAR;
7918f24d
NC
3226
3227 PERL_ARGS_ASSERT_TOKENIZE_USE;
3228
468aa647
RGS
3229 if (PL_expect != XSTATE)
3230 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3231 is_use ? "use" : "no"));
29595ff2 3232 s = SKIPSPACE1(s);
468aa647
RGS
3233 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3234 s = force_version(s, TRUE);
29595ff2 3235 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3236 start_force(PL_curforce);
9ded7720 3237 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3238 force_next(WORD);
3239 }
3240 else if (*s == 'v') {
3241 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3242 s = force_version(s, FALSE);
3243 }
3244 }
3245 else {
3246 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3247 s = force_version(s, FALSE);
3248 }
6154021b 3249 pl_yylval.ival = is_use;
468aa647
RGS
3250 return s;
3251}
748a9306 3252#ifdef DEBUGGING
27da23d5 3253 static const char* const exp_name[] =
09bef843 3254 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3255 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3256 };
748a9306 3257#endif
463ee0b2 3258
02aa26ce
NT
3259/*
3260 yylex
3261
3262 Works out what to call the token just pulled out of the input
3263 stream. The yacc parser takes care of taking the ops we return and
3264 stitching them into a tree.
3265
3266 Returns:
3267 PRIVATEREF
3268
3269 Structure:
3270 if read an identifier
3271 if we're in a my declaration
3272 croak if they tried to say my($foo::bar)
3273 build the ops for a my() declaration
3274 if it's an access to a my() variable
3275 are we in a sort block?
3276 croak if my($a); $a <=> $b
3277 build ops for access to a my() variable
3278 if in a dq string, and they've said @foo and we can't find @foo
3279 croak
3280 build ops for a bareword
3281 if we already built the token before, use it.
3282*/
3283
20141f0e 3284
dba4d153
JH
3285#ifdef __SC__
3286#pragma segment Perl_yylex
3287#endif
dba4d153 3288int
dba4d153 3289Perl_yylex(pTHX)
20141f0e 3290{
97aff369 3291 dVAR;
3afc138a 3292 register char *s = PL_bufptr;
378cc40b 3293 register char *d;
463ee0b2 3294 STRLEN len;
aa7440fb 3295 bool bof = FALSE;
a687059c 3296
10edeb5d
JH
3297 /* orig_keyword, gvp, and gv are initialized here because
3298 * jump to the label just_a_word_zero can bypass their
3299 * initialization later. */
3300 I32 orig_keyword = 0;
3301 GV *gv = NULL;
3302 GV **gvp = NULL;
3303
bbf60fe6 3304 DEBUG_T( {
396482e1 3305 SV* tmp = newSVpvs("");
b6007c36
DM
3306 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3307 (IV)CopLINE(PL_curcop),
3308 lex_state_names[PL_lex_state],
3309 exp_name[PL_expect],
3310 pv_display(tmp, s, strlen(s), 0, 60));
3311 SvREFCNT_dec(tmp);
bbf60fe6 3312 } );
02aa26ce 3313 /* check if there's an identifier for us to look at */
ba979b31 3314 if (PL_pending_ident)
bbf60fe6 3315 return REPORT(S_pending_ident(aTHX));
bbce6d69 3316
02aa26ce
NT
3317 /* no identifier pending identification */
3318
3280af22 3319 switch (PL_lex_state) {
79072805
LW
3320#ifdef COMMENTARY
3321 case LEX_NORMAL: /* Some compilers will produce faster */
3322 case LEX_INTERPNORMAL: /* code if we comment these out. */
3323 break;
3324#endif
3325
09bef843 3326 /* when we've already built the next token, just pull it out of the queue */
79072805 3327 case LEX_KNOWNEXT:
5db06880
NC
3328#ifdef PERL_MAD
3329 PL_lasttoke--;
6154021b 3330 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 3331 if (PL_madskills) {
cd81e915 3332 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3333 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3334 if (PL_thismad && PL_thismad->mad_key == '_') {
3335 PL_thiswhite = (SV*)PL_thismad->mad_val;
3336 PL_thismad->mad_val = 0;
3337 mad_free(PL_thismad);
3338 PL_thismad = 0;
5db06880
NC
3339 }
3340 }
3341 if (!PL_lasttoke) {
3342 PL_lex_state = PL_lex_defer;
3343 PL_expect = PL_lex_expect;
3344 PL_lex_defer = LEX_NORMAL;
3345 if (!PL_nexttoke[PL_lasttoke].next_type)
3346 return yylex();
3347 }
3348#else
3280af22 3349 PL_nexttoke--;
6154021b 3350 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3351 if (!PL_nexttoke) {
3352 PL_lex_state = PL_lex_defer;
3353 PL_expect = PL_lex_expect;
3354 PL_lex_defer = LEX_NORMAL;
463ee0b2 3355 }
5db06880
NC
3356#endif
3357#ifdef PERL_MAD
3358 /* FIXME - can these be merged? */
3359 return(PL_nexttoke[PL_lasttoke].next_type);
3360#else
bbf60fe6 3361 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3362#endif
79072805 3363
02aa26ce 3364 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3365 when we get here, PL_bufptr is at the \
02aa26ce 3366 */
79072805
LW
3367 case LEX_INTERPCASEMOD:
3368#ifdef DEBUGGING
3280af22 3369 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3370 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3371#endif
02aa26ce 3372 /* handle \E or end of string */
3280af22 3373 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3374 /* if at a \E */
3280af22 3375 if (PL_lex_casemods) {
f54cb97a 3376 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3377 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3378
3792a11b
NC
3379 if (PL_bufptr != PL_bufend
3380 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3381 PL_bufptr += 2;
3382 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3383#ifdef PERL_MAD
3384 if (PL_madskills)
6b29d1f5 3385 PL_thistoken = newSVpvs("\\E");
5db06880 3386#endif
a0d0e21e 3387 }
bbf60fe6 3388 return REPORT(')');
79072805 3389 }
5db06880
NC
3390#ifdef PERL_MAD
3391 while (PL_bufptr != PL_bufend &&
3392 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3393 if (!PL_thiswhite)
6b29d1f5 3394 PL_thiswhite = newSVpvs("");
cd81e915 3395 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3396 PL_bufptr += 2;
3397 }
3398#else
3280af22
NIS
3399 if (PL_bufptr != PL_bufend)
3400 PL_bufptr += 2;
5db06880 3401#endif
3280af22 3402 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3403 return yylex();
79072805
LW
3404 }
3405 else {
607df283 3406 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3407 "### Saw case modifier\n"); });
3280af22 3408 s = PL_bufptr + 1;
6e909404 3409 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3410#ifdef PERL_MAD
cd81e915 3411 if (!PL_thiswhite)
6b29d1f5 3412 PL_thiswhite = newSVpvs("");
cd81e915 3413 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3414#endif
89122651 3415 PL_bufptr = s + 3;
6e909404
JH
3416 PL_lex_state = LEX_INTERPCONCAT;
3417 return yylex();
a0d0e21e 3418 }
6e909404 3419 else {
90771dc0 3420 I32 tmp;
5db06880
NC
3421 if (!PL_madskills) /* when just compiling don't need correct */
3422 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3423 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3424 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3425 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3426 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3427 return REPORT(')');
6e909404
JH
3428 }
3429 if (PL_lex_casemods > 10)
3430 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3431 PL_lex_casestack[PL_lex_casemods++] = *s;
3432 PL_lex_casestack[PL_lex_casemods] = '\0';
3433 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3434 start_force(PL_curforce);
9ded7720 3435 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3436 force_next('(');
cd81e915 3437 start_force(PL_curforce);
6e909404 3438 if (*s == 'l')
9ded7720 3439 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3440 else if (*s == 'u')
9ded7720 3441 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3442 else if (*s == 'L')
9ded7720 3443 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3444 else if (*s == 'U')
9ded7720 3445 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3446 else if (*s == 'Q')
9ded7720 3447 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3448 else
3449 Perl_croak(aTHX_ "panic: yylex");
5db06880 3450 if (PL_madskills) {
a5849ce5
NC
3451 SV* const tmpsv = newSVpvs("\\ ");
3452 /* replace the space with the character we want to escape
3453 */
3454 SvPVX(tmpsv)[1] = *s;
5db06880
NC
3455 curmad('_', tmpsv);
3456 }
6e909404 3457 PL_bufptr = s + 1;
a0d0e21e 3458 }
79072805 3459 force_next(FUNC);
3280af22
NIS
3460 if (PL_lex_starts) {
3461 s = PL_bufptr;
3462 PL_lex_starts = 0;
5db06880
NC
3463#ifdef PERL_MAD
3464 if (PL_madskills) {
cd81e915
NC
3465 if (PL_thistoken)
3466 sv_free(PL_thistoken);
6b29d1f5 3467 PL_thistoken = newSVpvs("");
5db06880
NC
3468 }
3469#endif
131b3ad0
DM
3470 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3471 if (PL_lex_casemods == 1 && PL_lex_inpat)
3472 OPERATOR(',');
3473 else
3474 Aop(OP_CONCAT);
79072805
LW
3475 }
3476 else
cea2e8a9 3477 return yylex();
79072805
LW
3478 }
3479
55497cff 3480 case LEX_INTERPPUSH:
bbf60fe6 3481 return REPORT(sublex_push());
55497cff 3482
79072805 3483 case LEX_INTERPSTART:
3280af22 3484 if (PL_bufptr == PL_bufend)
bbf60fe6 3485 return REPORT(sublex_done());
607df283 3486 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3487 "### Interpolated variable\n"); });
3280af22
NIS
3488 PL_expect = XTERM;
3489 PL_lex_dojoin = (*PL_bufptr == '@');
3490 PL_lex_state = LEX_INTERPNORMAL;
3491 if (PL_lex_dojoin) {
cd81e915 3492 start_force(PL_curforce);
9ded7720 3493 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3494 force_next(',');
cd81e915 3495 start_force(PL_curforce);
a0d0e21e 3496 force_ident("\"", '$');
cd81e915 3497 start_force(PL_curforce);
9ded7720 3498 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3499 force_next('$');
cd81e915 3500 start_force(PL_curforce);
9ded7720 3501 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3502 force_next('(');
cd81e915 3503 start_force(PL_curforce);
9ded7720 3504 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3505 force_next(FUNC);
3506 }
3280af22
NIS
3507 if (PL_lex_starts++) {
3508 s = PL_bufptr;
5db06880
NC
3509#ifdef PERL_MAD
3510 if (PL_madskills) {
cd81e915
NC
3511 if (PL_thistoken)
3512 sv_free(PL_thistoken);
6b29d1f5 3513 PL_thistoken = newSVpvs("");
5db06880
NC
3514 }
3515#endif
131b3ad0
DM
3516 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3517 if (!PL_lex_casemods && PL_lex_inpat)
3518 OPERATOR(',');
3519 else
3520 Aop(OP_CONCAT);
79072805 3521 }
cea2e8a9 3522 return yylex();
79072805
LW
3523
3524 case LEX_INTERPENDMAYBE:
3280af22
NIS
3525 if (intuit_more(PL_bufptr)) {
3526 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3527 break;
3528 }
3529 /* FALL THROUGH */
3530
3531 case LEX_INTERPEND:
3280af22
NIS
3532 if (PL_lex_dojoin) {
3533 PL_lex_dojoin = FALSE;
3534 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3535#ifdef PERL_MAD
3536 if (PL_madskills) {
cd81e915
NC</