This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add new error "Can't use keyword '%s' as a label"
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
378cc40b 24#include "EXTERN.h"
864dbfa3 25#define PERL_IN_TOKE_C
378cc40b 26#include "perl.h"
378cc40b 27
eb0d8d16
NC
28#define new_constant(a,b,c,d,e,f,g) \
29 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
30
6154021b 31#define pl_yylval (PL_parser->yylval)
d3b6f988 32
acdf0a21
DM
33/* YYINITDEPTH -- initial size of the parser's stacks. */
34#define YYINITDEPTH 200
35
199e78b7
DM
36/* XXX temporary backwards compatibility */
37#define PL_lex_brackets (PL_parser->lex_brackets)
38#define PL_lex_brackstack (PL_parser->lex_brackstack)
39#define PL_lex_casemods (PL_parser->lex_casemods)
40#define PL_lex_casestack (PL_parser->lex_casestack)
41#define PL_lex_defer (PL_parser->lex_defer)
42#define PL_lex_dojoin (PL_parser->lex_dojoin)
43#define PL_lex_expect (PL_parser->lex_expect)
44#define PL_lex_formbrack (PL_parser->lex_formbrack)
45#define PL_lex_inpat (PL_parser->lex_inpat)
46#define PL_lex_inwhat (PL_parser->lex_inwhat)
47#define PL_lex_op (PL_parser->lex_op)
48#define PL_lex_repl (PL_parser->lex_repl)
49#define PL_lex_starts (PL_parser->lex_starts)
50#define PL_lex_stuff (PL_parser->lex_stuff)
51#define PL_multi_start (PL_parser->multi_start)
52#define PL_multi_open (PL_parser->multi_open)
53#define PL_multi_close (PL_parser->multi_close)
54#define PL_pending_ident (PL_parser->pending_ident)
55#define PL_preambled (PL_parser->preambled)
56#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 57#define PL_linestr (PL_parser->linestr)
c2598295
DM
58#define PL_expect (PL_parser->expect)
59#define PL_copline (PL_parser->copline)
f06b5848
DM
60#define PL_bufptr (PL_parser->bufptr)
61#define PL_oldbufptr (PL_parser->oldbufptr)
62#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
63#define PL_linestart (PL_parser->linestart)
64#define PL_bufend (PL_parser->bufend)
65#define PL_last_uni (PL_parser->last_uni)
66#define PL_last_lop (PL_parser->last_lop)
67#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 68#define PL_lex_state (PL_parser->lex_state)
2f9285f8 69#define PL_rsfp (PL_parser->rsfp)
5486870f 70#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
71#define PL_in_my (PL_parser->in_my)
72#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 73#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 74#define PL_multi_end (PL_parser->multi_end)
13765c85 75#define PL_error_count (PL_parser->error_count)
199e78b7
DM
76
77#ifdef PERL_MAD
78# define PL_endwhite (PL_parser->endwhite)
79# define PL_faketokens (PL_parser->faketokens)
80# define PL_lasttoke (PL_parser->lasttoke)
81# define PL_nextwhite (PL_parser->nextwhite)
82# define PL_realtokenstart (PL_parser->realtokenstart)
83# define PL_skipwhite (PL_parser->skipwhite)
84# define PL_thisclose (PL_parser->thisclose)
85# define PL_thismad (PL_parser->thismad)
86# define PL_thisopen (PL_parser->thisopen)
87# define PL_thisstuff (PL_parser->thisstuff)
88# define PL_thistoken (PL_parser->thistoken)
89# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
90# define PL_thiswhite (PL_parser->thiswhite)
91# define PL_nexttoke (PL_parser->nexttoke)
92# define PL_curforce (PL_parser->curforce)
93#else
94# define PL_nexttoke (PL_parser->nexttoke)
95# define PL_nexttype (PL_parser->nexttype)
96# define PL_nextval (PL_parser->nextval)
199e78b7
DM
97#endif
98
3cbf51f5
DM
99static int
100S_pending_ident(pTHX);
199e78b7 101
0bd48802 102static const char ident_too_long[] = "Identifier too long";
c445ea15 103static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 104
6e3aabd6 105#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
106static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
107static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 108#endif
51371543 109
29595ff2 110#ifdef PERL_MAD
29595ff2 111# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 112# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 113#else
5db06880 114# define CURMAD(slot,sv)
9ded7720 115# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
116#endif
117
9059aa12
LW
118#define XFAKEBRACK 128
119#define XENUMMASK 127
120
39e02b42
JH
121#ifdef USE_UTF8_SCRIPTS
122# define UTF (!IN_BYTES)
2b9d42f0 123#else
746b446a 124# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 125#endif
a0ed51b3 126
b1fc3636
CJ
127/* The maximum number of characters preceding the unrecognized one to display */
128#define UNRECOGNIZED_PRECEDE_COUNT 10
129
61f0cdd9 130/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
131 * 1999-02-27 mjd-perl-patch@plover.com */
132#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
133
bf4acbe4 134#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 135
ffb4593c
NT
136/* LEX_* are values for PL_lex_state, the state of the lexer.
137 * They are arranged oddly so that the guard on the switch statement
79072805
LW
138 * can get by with a single comparison (if the compiler is smart enough).
139 */
140
fb73857a
PP
141/* #define LEX_NOTPARSING 11 is done in perl.h. */
142
b6007c36
DM
143#define LEX_NORMAL 10 /* normal code (ie not within "...") */
144#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
145#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
146#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
147#define LEX_INTERPSTART 6 /* expecting the start of a $var */
148
149 /* at end of code, eg "$x" followed by: */
150#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
151#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
152
153#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
154 string or after \E, $foo, etc */
155#define LEX_INTERPCONST 2 /* NOT USED */
156#define LEX_FORMLINE 1 /* expecting a format line */
157#define LEX_KNOWNEXT 0 /* next token known; just return it */
158
79072805 159
bbf60fe6 160#ifdef DEBUGGING
27da23d5 161static const char* const lex_state_names[] = {
bbf60fe6
DM
162 "KNOWNEXT",
163 "FORMLINE",
164 "INTERPCONST",
165 "INTERPCONCAT",
166 "INTERPENDMAYBE",
167 "INTERPEND",
168 "INTERPSTART",
169 "INTERPPUSH",
170 "INTERPCASEMOD",
171 "INTERPNORMAL",
172 "NORMAL"
173};
174#endif
175
79072805
LW
176#ifdef ff_next
177#undef ff_next
d48672a2
LW
178#endif
179
79072805 180#include "keywords.h"
fe14fcc3 181
ffb4593c
NT
182/* CLINE is a macro that ensures PL_copline has a sane value */
183
ae986130
LW
184#ifdef CLINE
185#undef CLINE
186#endif
57843af0 187#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 188
5db06880 189#ifdef PERL_MAD
29595ff2
NC
190# define SKIPSPACE0(s) skipspace0(s)
191# define SKIPSPACE1(s) skipspace1(s)
192# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
193# define PEEKSPACE(s) skipspace2(s,0)
194#else
195# define SKIPSPACE0(s) skipspace(s)
196# define SKIPSPACE1(s) skipspace(s)
197# define SKIPSPACE2(s,tsv) skipspace(s)
198# define PEEKSPACE(s) skipspace(s)
199#endif
200
ffb4593c
NT
201/*
202 * Convenience functions to return different tokens and prime the
9cbb5ea2 203 * lexer for the next token. They all take an argument.
ffb4593c
NT
204 *
205 * TOKEN : generic token (used for '(', DOLSHARP, etc)
206 * OPERATOR : generic operator
207 * AOPERATOR : assignment operator
208 * PREBLOCK : beginning the block after an if, while, foreach, ...
209 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
210 * PREREF : *EXPR where EXPR is not a simple identifier
211 * TERM : expression term
212 * LOOPX : loop exiting command (goto, last, dump, etc)
213 * FTST : file test operator
214 * FUN0 : zero-argument function
2d2e263d 215 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
216 * BOop : bitwise or or xor
217 * BAop : bitwise and
218 * SHop : shift operator
219 * PWop : power operator
9cbb5ea2 220 * PMop : pattern-matching operator
ffb4593c
NT
221 * Aop : addition-level operator
222 * Mop : multiplication-level operator
223 * Eop : equality-testing operator
e5edeb50 224 * Rop : relational operator <= != gt
ffb4593c
NT
225 *
226 * Also see LOP and lop() below.
227 */
228
998054bd 229#ifdef DEBUGGING /* Serve -DT. */
704d4215 230# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 231#else
bbf60fe6 232# define REPORT(retval) (retval)
998054bd
SC
233#endif
234
bbf60fe6
DM
235#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
236#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
237#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
238#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
239#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
240#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
241#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
242#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
243#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
244#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
245#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
246#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
247#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
248#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
249#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
250#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
251#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
252#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
253#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
254#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 255
a687059c
LW
256/* This bit of chicanery makes a unary function followed by
257 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
258 * The UNIDOR macro is for unary functions that can be followed by the //
259 * operator (such as C<shift // 0>).
a687059c 260 */
376fcdbf 261#define UNI2(f,x) { \
6154021b 262 pl_yylval.ival = f; \
376fcdbf
AL
263 PL_expect = x; \
264 PL_bufptr = s; \
265 PL_last_uni = PL_oldbufptr; \
266 PL_last_lop_op = f; \
267 if (*s == '(') \
268 return REPORT( (int)FUNC1 ); \
29595ff2 269 s = PEEKSPACE(s); \
376fcdbf
AL
270 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
271 }
6f33ba73
RGS
272#define UNI(f) UNI2(f,XTERM)
273#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 274
376fcdbf 275#define UNIBRACK(f) { \
6154021b 276 pl_yylval.ival = f; \
376fcdbf
AL
277 PL_bufptr = s; \
278 PL_last_uni = PL_oldbufptr; \
279 if (*s == '(') \
280 return REPORT( (int)FUNC1 ); \
29595ff2 281 s = PEEKSPACE(s); \
376fcdbf
AL
282 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
283 }
79072805 284
9f68db38 285/* grandfather return to old style */
6154021b 286#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 287
8fa7f367
JH
288#ifdef DEBUGGING
289
6154021b 290/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
291enum token_type {
292 TOKENTYPE_NONE,
293 TOKENTYPE_IVAL,
6154021b 294 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
295 TOKENTYPE_PVAL,
296 TOKENTYPE_OPVAL,
297 TOKENTYPE_GVVAL
298};
299
6d4a66ac
NC
300static struct debug_tokens {
301 const int token;
302 enum token_type type;
303 const char *name;
304} const debug_tokens[] =
9041c2e3 305{
bbf60fe6
DM
306 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
307 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
308 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
309 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
310 { ARROW, TOKENTYPE_NONE, "ARROW" },
311 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
312 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
313 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
314 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
315 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 316 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
317 { DO, TOKENTYPE_NONE, "DO" },
318 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
319 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
320 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
321 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
322 { ELSE, TOKENTYPE_NONE, "ELSE" },
323 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
324 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
325 { FOR, TOKENTYPE_IVAL, "FOR" },
326 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
327 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
328 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
329 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
330 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
331 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 332 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
333 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
334 { IF, TOKENTYPE_IVAL, "IF" },
335 { LABEL, TOKENTYPE_PVAL, "LABEL" },
336 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
337 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
338 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
339 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
340 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
341 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
342 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
343 { MY, TOKENTYPE_IVAL, "MY" },
344 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
345 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
346 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
347 { OROP, TOKENTYPE_IVAL, "OROP" },
348 { OROR, TOKENTYPE_NONE, "OROR" },
349 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
350 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
351 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
352 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
353 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
354 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
355 { PREINC, TOKENTYPE_NONE, "PREINC" },
356 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
357 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
358 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
359 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
360 { SUB, TOKENTYPE_NONE, "SUB" },
361 { THING, TOKENTYPE_OPVAL, "THING" },
362 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
363 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
364 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
365 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
366 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
367 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 368 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
369 { WHILE, TOKENTYPE_IVAL, "WHILE" },
370 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 371 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 372 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
373};
374
6154021b 375/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 376
bbf60fe6 377STATIC int
704d4215 378S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 379{
97aff369 380 dVAR;
7918f24d
NC
381
382 PERL_ARGS_ASSERT_TOKEREPORT;
383
bbf60fe6 384 if (DEBUG_T_TEST) {
bd61b366 385 const char *name = NULL;
bbf60fe6 386 enum token_type type = TOKENTYPE_NONE;
f54cb97a 387 const struct debug_tokens *p;
396482e1 388 SV* const report = newSVpvs("<== ");
bbf60fe6 389
f54cb97a 390 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
391 if (p->token == (int)rv) {
392 name = p->name;
393 type = p->type;
394 break;
395 }
396 }
397 if (name)
54667de8 398 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
399 else if ((char)rv > ' ' && (char)rv < '~')
400 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
401 else if (!rv)
396482e1 402 sv_catpvs(report, "EOF");
bbf60fe6
DM
403 else
404 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
405 switch (type) {
406 case TOKENTYPE_NONE:
407 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
408 break;
409 case TOKENTYPE_IVAL:
704d4215 410 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
411 break;
412 case TOKENTYPE_OPNUM:
413 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 414 PL_op_name[lvalp->ival]);
bbf60fe6
DM
415 break;
416 case TOKENTYPE_PVAL:
704d4215 417 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
418 break;
419 case TOKENTYPE_OPVAL:
704d4215 420 if (lvalp->opval) {
401441c0 421 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
422 PL_op_name[lvalp->opval->op_type]);
423 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 424 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 425 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
426 }
427
428 }
401441c0 429 else
396482e1 430 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
431 break;
432 }
b6007c36 433 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
434 };
435 return (int)rv;
998054bd
SC
436}
437
b6007c36
DM
438
439/* print the buffer with suitable escapes */
440
441STATIC void
15f169a1 442S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 443{
396482e1 444 SV* const tmp = newSVpvs("");
7918f24d
NC
445
446 PERL_ARGS_ASSERT_PRINTBUF;
447
b6007c36
DM
448 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
449 SvREFCNT_dec(tmp);
450}
451
8fa7f367
JH
452#endif
453
ffb4593c
NT
454/*
455 * S_ao
456 *
c963b151
BD
457 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
458 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
459 */
460
76e3520e 461STATIC int
cea2e8a9 462S_ao(pTHX_ int toketype)
a0d0e21e 463{
97aff369 464 dVAR;
3280af22
NIS
465 if (*PL_bufptr == '=') {
466 PL_bufptr++;
a0d0e21e 467 if (toketype == ANDAND)
6154021b 468 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 469 else if (toketype == OROR)
6154021b 470 pl_yylval.ival = OP_ORASSIGN;
c963b151 471 else if (toketype == DORDOR)
6154021b 472 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
473 toketype = ASSIGNOP;
474 }
475 return toketype;
476}
477
ffb4593c
NT
478/*
479 * S_no_op
480 * When Perl expects an operator and finds something else, no_op
481 * prints the warning. It always prints "<something> found where
482 * operator expected. It prints "Missing semicolon on previous line?"
483 * if the surprise occurs at the start of the line. "do you need to
484 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
485 * where the compiler doesn't know if foo is a method call or a function.
486 * It prints "Missing operator before end of line" if there's nothing
487 * after the missing operator, or "... before <...>" if there is something
488 * after the missing operator.
489 */
490
76e3520e 491STATIC void
15f169a1 492S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 493{
97aff369 494 dVAR;
9d4ba2ae
AL
495 char * const oldbp = PL_bufptr;
496 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 497
7918f24d
NC
498 PERL_ARGS_ASSERT_NO_OP;
499
1189a94a
GS
500 if (!s)
501 s = oldbp;
07c798fb 502 else
1189a94a 503 PL_bufptr = s;
cea2e8a9 504 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
505 if (ckWARN_d(WARN_SYNTAX)) {
506 if (is_first)
507 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
508 "\t(Missing semicolon on previous line?)\n");
509 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 510 const char *t;
c35e046a
AL
511 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
512 NOOP;
56da5a46
RGS
513 if (t < PL_bufptr && isSPACE(*t))
514 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
515 "\t(Do you need to predeclare %.*s?)\n",
551405c4 516 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
517 }
518 else {
519 assert(s >= oldbp);
520 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 521 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 522 }
07c798fb 523 }
3280af22 524 PL_bufptr = oldbp;
8990e307
LW
525}
526
ffb4593c
NT
527/*
528 * S_missingterm
529 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 530 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
531 * If we're in a delimited string and the delimiter is a control
532 * character, it's reformatted into a two-char sequence like ^C.
533 * This is fatal.
534 */
535
76e3520e 536STATIC void
cea2e8a9 537S_missingterm(pTHX_ char *s)
8990e307 538{
97aff369 539 dVAR;
8990e307
LW
540 char tmpbuf[3];
541 char q;
542 if (s) {
9d4ba2ae 543 char * const nl = strrchr(s,'\n');
d2719217 544 if (nl)
8990e307
LW
545 *nl = '\0';
546 }
463559e7 547 else if (isCNTRL(PL_multi_close)) {
8990e307 548 *tmpbuf = '^';
585ec06d 549 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
550 tmpbuf[2] = '\0';
551 s = tmpbuf;
552 }
553 else {
eb160463 554 *tmpbuf = (char)PL_multi_close;
8990e307
LW
555 tmpbuf[1] = '\0';
556 s = tmpbuf;
557 }
558 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 559 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 560}
79072805 561
ef89dcc3 562#define FEATURE_IS_ENABLED(name) \
0d863452 563 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 564 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b
NC
565/* The longest string we pass in. */
566#define MAX_FEATURE_LEN (sizeof("switch")-1)
567
0d863452
RH
568/*
569 * S_feature_is_enabled
570 * Check whether the named feature is enabled.
571 */
572STATIC bool
15f169a1 573S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 574{
97aff369 575 dVAR;
0d863452 576 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 577 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
578
579 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
580
4a731d7b
NC
581 assert(namelen <= MAX_FEATURE_LEN);
582 memcpy(&he_name[8], name, namelen);
d4c19fe8 583
7b9ef140 584 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
585}
586
ffb4593c
NT
587/*
588 * Perl_deprecate
ffb4593c
NT
589 */
590
79072805 591void
15f169a1 592Perl_deprecate(pTHX_ const char *const s)
a0d0e21e 593{
7918f24d
NC
594 PERL_ARGS_ASSERT_DEPRECATE;
595
599cee73 596 if (ckWARN(WARN_DEPRECATED))
9014280d 597 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
598}
599
12bcd1a6 600void
15f169a1 601Perl_deprecate_old(pTHX_ const char *const s)
12bcd1a6
PM
602{
603 /* This function should NOT be called for any new deprecated warnings */
604 /* Use Perl_deprecate instead */
605 /* */
606 /* It is here to maintain backward compatibility with the pre-5.8 */
607 /* warnings category hierarchy. The "deprecated" category used to */
608 /* live under the "syntax" category. It is now a top-level category */
609 /* in its own right. */
610
7918f24d
NC
611 PERL_ARGS_ASSERT_DEPRECATE_OLD;
612
12bcd1a6 613 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 614 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
615 "Use of %s is deprecated", s);
616}
617
ffb4593c 618/*
9cbb5ea2
GS
619 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
620 * utf16-to-utf8-reversed.
ffb4593c
NT
621 */
622
c39cd008
GS
623#ifdef PERL_CR_FILTER
624static void
625strip_return(SV *sv)
626{
95a20fc0 627 register const char *s = SvPVX_const(sv);
9d4ba2ae 628 register const char * const e = s + SvCUR(sv);
7918f24d
NC
629
630 PERL_ARGS_ASSERT_STRIP_RETURN;
631
c39cd008
GS
632 /* outer loop optimized to do nothing if there are no CR-LFs */
633 while (s < e) {
634 if (*s++ == '\r' && *s == '\n') {
635 /* hit a CR-LF, need to copy the rest */
636 register char *d = s - 1;
637 *d++ = *s++;
638 while (s < e) {
639 if (*s == '\r' && s[1] == '\n')
640 s++;
641 *d++ = *s++;
642 }
643 SvCUR(sv) -= s - d;
644 return;
645 }
646 }
647}
a868473f 648
76e3520e 649STATIC I32
c39cd008 650S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 651{
f54cb97a 652 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
653 if (count > 0 && !maxlen)
654 strip_return(sv);
655 return count;
a868473f
NIS
656}
657#endif
658
199e78b7
DM
659
660
ffb4593c
NT
661/*
662 * Perl_lex_start
5486870f 663 *
e3abe207 664 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
665 *
666 * rsfp is the opened file handle to read from (if any),
667 *
668 * line holds any initial content already read from the file (or in
669 * the case of no file, such as an eval, the whole contents);
670 *
671 * new_filter indicates that this is a new file and it shouldn't inherit
672 * the filters from the current parser (ie require).
ffb4593c
NT
673 */
674
a0d0e21e 675void
5486870f 676Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 677{
97aff369 678 dVAR;
6ef55633 679 const char *s = NULL;
8990e307 680 STRLEN len;
5486870f 681 yy_parser *parser, *oparser;
acdf0a21
DM
682
683 /* create and initialise a parser */
684
199e78b7 685 Newxz(parser, 1, yy_parser);
5486870f 686 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
687 PL_parser = parser;
688
689 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
690 parser->ps = parser->stack;
691 parser->stack_size = YYINITDEPTH;
692
693 parser->stack->state = 0;
694 parser->yyerrstatus = 0;
695 parser->yychar = YYEMPTY; /* Cause a token to be read. */
696
e3abe207
DM
697 /* on scope exit, free this parser and restore any outer one */
698 SAVEPARSER(parser);
7c4baf47 699 parser->saved_curcop = PL_curcop;
e3abe207 700
acdf0a21 701 /* initialise lexer state */
8990e307 702
fb205e7a
DM
703#ifdef PERL_MAD
704 parser->curforce = -1;
705#else
706 parser->nexttoke = 0;
707#endif
ca4cfd28 708 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 709 parser->copline = NOLINE;
5afb0a62 710 parser->lex_state = LEX_NORMAL;
c2598295 711 parser->expect = XSTATE;
2f9285f8 712 parser->rsfp = rsfp;
56b27c9a 713 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
502c6561 714 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 715
199e78b7
DM
716 Newx(parser->lex_brackstack, 120, char);
717 Newx(parser->lex_casestack, 12, char);
718 *parser->lex_casestack = '\0';
02b34bbe 719
10efb74f
NC
720 if (line) {
721 s = SvPV_const(line, len);
722 } else {
723 len = 0;
724 }
bdc0bf6f 725
10efb74f 726 if (!len) {
bdc0bf6f 727 parser->linestr = newSVpvs("\n;");
10efb74f 728 } else if (SvREADONLY(line) || s[len-1] != ';') {
bdc0bf6f 729 parser->linestr = newSVsv(line);
10efb74f 730 if (s[len-1] != ';')
bdc0bf6f 731 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
732 } else {
733 SvTEMP_off(line);
734 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 735 parser->linestr = line;
8990e307 736 }
f06b5848
DM
737 parser->oldoldbufptr =
738 parser->oldbufptr =
739 parser->bufptr =
740 parser->linestart = SvPVX(parser->linestr);
741 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
742 parser->last_lop = parser->last_uni = NULL;
79072805 743}
a687059c 744
e3abe207
DM
745
746/* delete a parser object */
747
748void
749Perl_parser_free(pTHX_ const yy_parser *parser)
750{
7918f24d
NC
751 PERL_ARGS_ASSERT_PARSER_FREE;
752
7c4baf47 753 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
754 SvREFCNT_dec(parser->linestr);
755
2f9285f8
DM
756 if (parser->rsfp == PerlIO_stdin())
757 PerlIO_clearerr(parser->rsfp);
799361c3
SH
758 else if (parser->rsfp && (!parser->old_parser ||
759 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 760 PerlIO_close(parser->rsfp);
5486870f 761 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 762
e3abe207
DM
763 Safefree(parser->stack);
764 Safefree(parser->lex_brackstack);
765 Safefree(parser->lex_casestack);
766 PL_parser = parser->old_parser;
767 Safefree(parser);
768}
769
770
ffb4593c
NT
771/*
772 * Perl_lex_end
9cbb5ea2
GS
773 * Finalizer for lexing operations. Must be called when the parser is
774 * done with the lexer.
ffb4593c
NT
775 */
776
463ee0b2 777void
864dbfa3 778Perl_lex_end(pTHX)
463ee0b2 779{
97aff369 780 dVAR;
3280af22 781 PL_doextract = FALSE;
463ee0b2
LW
782}
783
ffb4593c
NT
784/*
785 * S_incline
786 * This subroutine has nothing to do with tilting, whether at windmills
787 * or pinball tables. Its name is short for "increment line". It
57843af0 788 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 789 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
790 * # line 500 "foo.pm"
791 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
792 */
793
76e3520e 794STATIC void
d9095cec 795S_incline(pTHX_ const char *s)
463ee0b2 796{
97aff369 797 dVAR;
d9095cec
NC
798 const char *t;
799 const char *n;
800 const char *e;
463ee0b2 801
7918f24d
NC
802 PERL_ARGS_ASSERT_INCLINE;
803
57843af0 804 CopLINE_inc(PL_curcop);
463ee0b2
LW
805 if (*s++ != '#')
806 return;
d4c19fe8
AL
807 while (SPACE_OR_TAB(*s))
808 s++;
73659bf1
GS
809 if (strnEQ(s, "line", 4))
810 s += 4;
811 else
812 return;
084592ab 813 if (SPACE_OR_TAB(*s))
73659bf1 814 s++;
4e553d73 815 else
73659bf1 816 return;
d4c19fe8
AL
817 while (SPACE_OR_TAB(*s))
818 s++;
463ee0b2
LW
819 if (!isDIGIT(*s))
820 return;
d4c19fe8 821
463ee0b2
LW
822 n = s;
823 while (isDIGIT(*s))
824 s++;
07714eb4 825 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 826 return;
bf4acbe4 827 while (SPACE_OR_TAB(*s))
463ee0b2 828 s++;
73659bf1 829 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 830 s++;
73659bf1
GS
831 e = t + 1;
832 }
463ee0b2 833 else {
c35e046a
AL
834 t = s;
835 while (!isSPACE(*t))
836 t++;
73659bf1 837 e = t;
463ee0b2 838 }
bf4acbe4 839 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
840 e++;
841 if (*e != '\n' && *e != '\0')
842 return; /* false alarm */
843
f4dd75d9 844 if (t - s > 0) {
d9095cec 845 const STRLEN len = t - s;
8a5ee598 846#ifndef USE_ITHREADS
19bad673
NC
847 SV *const temp_sv = CopFILESV(PL_curcop);
848 const char *cf;
849 STRLEN tmplen;
850
851 if (temp_sv) {
852 cf = SvPVX(temp_sv);
853 tmplen = SvCUR(temp_sv);
854 } else {
855 cf = NULL;
856 tmplen = 0;
857 }
858
42d9b98d 859 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
860 /* must copy *{"::_<(eval N)[oldfilename:L]"}
861 * to *{"::_<newfilename"} */
44867030
NC
862 /* However, the long form of evals is only turned on by the
863 debugger - usually they're "(eval %lu)" */
864 char smallbuf[128];
865 char *tmpbuf;
866 GV **gvp;
d9095cec 867 STRLEN tmplen2 = len;
798b63bc 868 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
869 tmpbuf = smallbuf;
870 else
2ae0db35 871 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
872 tmpbuf[0] = '_';
873 tmpbuf[1] = '<';
2ae0db35 874 memcpy(tmpbuf + 2, cf, tmplen);
44867030 875 tmplen += 2;
8a5ee598
RGS
876 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
877 if (gvp) {
44867030
NC
878 char *tmpbuf2;
879 GV *gv2;
880
881 if (tmplen2 + 2 <= sizeof smallbuf)
882 tmpbuf2 = smallbuf;
883 else
884 Newx(tmpbuf2, tmplen2 + 2, char);
885
886 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
887 /* Either they malloc'd it, or we malloc'd it,
888 so no prefix is present in ours. */
889 tmpbuf2[0] = '_';
890 tmpbuf2[1] = '<';
891 }
892
893 memcpy(tmpbuf2 + 2, s, tmplen2);
894 tmplen2 += 2;
895
8a5ee598 896 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 897 if (!isGV(gv2)) {
8a5ee598 898 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
899 /* adjust ${"::_<newfilename"} to store the new file name */
900 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
901 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
902 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 903 }
44867030
NC
904
905 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 906 }
e66cf94c 907 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 908 }
8a5ee598 909#endif
05ec9bb3 910 CopFILE_free(PL_curcop);
d9095cec 911 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 912 }
57843af0 913 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
914}
915
29595ff2 916#ifdef PERL_MAD
cd81e915 917/* skip space before PL_thistoken */
29595ff2
NC
918
919STATIC char *
920S_skipspace0(pTHX_ register char *s)
921{
7918f24d
NC
922 PERL_ARGS_ASSERT_SKIPSPACE0;
923
29595ff2
NC
924 s = skipspace(s);
925 if (!PL_madskills)
926 return s;
cd81e915
NC
927 if (PL_skipwhite) {
928 if (!PL_thiswhite)
6b29d1f5 929 PL_thiswhite = newSVpvs("");
cd81e915
NC
930 sv_catsv(PL_thiswhite, PL_skipwhite);
931 sv_free(PL_skipwhite);
932 PL_skipwhite = 0;
933 }
934 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
935 return s;
936}
937
cd81e915 938/* skip space after PL_thistoken */
29595ff2
NC
939
940STATIC char *
941S_skipspace1(pTHX_ register char *s)
942{
d4c19fe8 943 const char *start = s;
29595ff2
NC
944 I32 startoff = start - SvPVX(PL_linestr);
945
7918f24d
NC
946 PERL_ARGS_ASSERT_SKIPSPACE1;
947
29595ff2
NC
948 s = skipspace(s);
949 if (!PL_madskills)
950 return s;
951 start = SvPVX(PL_linestr) + startoff;
cd81e915 952 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 953 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
954 PL_thistoken = newSVpvn(tstart, start - tstart);
955 }
956 PL_realtokenstart = -1;
957 if (PL_skipwhite) {
958 if (!PL_nextwhite)
6b29d1f5 959 PL_nextwhite = newSVpvs("");
cd81e915
NC
960 sv_catsv(PL_nextwhite, PL_skipwhite);
961 sv_free(PL_skipwhite);
962 PL_skipwhite = 0;
29595ff2
NC
963 }
964 return s;
965}
966
967STATIC char *
968S_skipspace2(pTHX_ register char *s, SV **svp)
969{
c35e046a
AL
970 char *start;
971 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
972 const I32 startoff = s - SvPVX(PL_linestr);
973
7918f24d
NC
974 PERL_ARGS_ASSERT_SKIPSPACE2;
975
29595ff2
NC
976 s = skipspace(s);
977 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
978 if (!PL_madskills || !svp)
979 return s;
980 start = SvPVX(PL_linestr) + startoff;
cd81e915 981 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 982 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
983 PL_thistoken = newSVpvn(tstart, start - tstart);
984 PL_realtokenstart = -1;
29595ff2 985 }
cd81e915 986 if (PL_skipwhite) {
29595ff2 987 if (!*svp)
6b29d1f5 988 *svp = newSVpvs("");
cd81e915
NC
989 sv_setsv(*svp, PL_skipwhite);
990 sv_free(PL_skipwhite);
991 PL_skipwhite = 0;
29595ff2
NC
992 }
993
994 return s;
995}
996#endif
997
80a702cd 998STATIC void
15f169a1 999S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1000{
1001 AV *av = CopFILEAVx(PL_curcop);
1002 if (av) {
b9f83d2f 1003 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1004 if (orig_sv)
1005 sv_setsv(sv, orig_sv);
1006 else
1007 sv_setpvn(sv, buf, len);
80a702cd
RGS
1008 (void)SvIOK_on(sv);
1009 SvIV_set(sv, 0);
1010 av_store(av, (I32)CopLINE(PL_curcop), sv);
1011 }
1012}
1013
ffb4593c
NT
1014/*
1015 * S_skipspace
1016 * Called to gobble the appropriate amount and type of whitespace.
1017 * Skips comments as well.
1018 */
1019
76e3520e 1020STATIC char *
cea2e8a9 1021S_skipspace(pTHX_ register char *s)
a687059c 1022{
97aff369 1023 dVAR;
5db06880
NC
1024#ifdef PERL_MAD
1025 int curoff;
1026 int startoff = s - SvPVX(PL_linestr);
1027
7918f24d
NC
1028 PERL_ARGS_ASSERT_SKIPSPACE;
1029
cd81e915
NC
1030 if (PL_skipwhite) {
1031 sv_free(PL_skipwhite);
1032 PL_skipwhite = 0;
5db06880
NC
1033 }
1034#endif
7918f24d 1035 PERL_ARGS_ASSERT_SKIPSPACE;
5db06880 1036
3280af22 1037 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1038 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1039 s++;
5db06880
NC
1040#ifdef PERL_MAD
1041 goto done;
1042#else
463ee0b2 1043 return s;
5db06880 1044#endif
463ee0b2
LW
1045 }
1046 for (;;) {
fd049845 1047 STRLEN prevlen;
09bef843 1048 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 1049 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
1050 while (s < PL_bufend && isSPACE(*s)) {
1051 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1052 incline(s);
1053 }
ffb4593c
NT
1054
1055 /* comment */
3280af22
NIS
1056 if (s < PL_bufend && *s == '#') {
1057 while (s < PL_bufend && *s != '\n')
463ee0b2 1058 s++;
60e6418e 1059 if (s < PL_bufend) {
463ee0b2 1060 s++;
60e6418e
GS
1061 if (PL_in_eval && !PL_rsfp) {
1062 incline(s);
1063 continue;
1064 }
1065 }
463ee0b2 1066 }
ffb4593c
NT
1067
1068 /* only continue to recharge the buffer if we're at the end
1069 * of the buffer, we're not reading from a source filter, and
1070 * we're in normal lexing mode
1071 */
09bef843
SB
1072 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1073 PL_lex_state == LEX_FORMLINE)
5db06880
NC
1074#ifdef PERL_MAD
1075 goto done;
1076#else
463ee0b2 1077 return s;
5db06880 1078#endif
ffb4593c
NT
1079
1080 /* try to recharge the buffer */
5db06880
NC
1081#ifdef PERL_MAD
1082 curoff = s - SvPVX(PL_linestr);
1083#endif
1084
9cbb5ea2 1085 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 1086 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 1087 {
5db06880
NC
1088#ifdef PERL_MAD
1089 if (PL_madskills && curoff != startoff) {
cd81e915 1090 if (!PL_skipwhite)
6b29d1f5 1091 PL_skipwhite = newSVpvs("");
cd81e915 1092 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1093 curoff - startoff);
1094 }
1095
1096 /* mustn't throw out old stuff yet if madpropping */
1097 SvCUR(PL_linestr) = curoff;
1098 s = SvPVX(PL_linestr) + curoff;
1099 *s = 0;
1100 if (curoff && s[-1] == '\n')
1101 s[-1] = ' ';
1102#endif
1103
9cbb5ea2 1104 /* end of file. Add on the -p or -n magic */
cd81e915 1105 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 1106 if (PL_minus_p) {
5db06880 1107#ifdef PERL_MAD
6502358f 1108 sv_catpvs(PL_linestr,
5db06880
NC
1109 ";}continue{print or die qq(-p destination: $!\\n);}");
1110#else
6502358f 1111 sv_setpvs(PL_linestr,
01a19ab0 1112 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 1113#endif
3280af22 1114 PL_minus_n = PL_minus_p = 0;
a0d0e21e 1115 }
01a19ab0 1116 else if (PL_minus_n) {
5db06880 1117#ifdef PERL_MAD
76f68e9b 1118 sv_catpvs(PL_linestr, ";}");
5db06880 1119#else
76f68e9b 1120 sv_setpvs(PL_linestr, ";}");
5db06880 1121#endif
01a19ab0
NC
1122 PL_minus_n = 0;
1123 }
a0d0e21e 1124 else
5db06880 1125#ifdef PERL_MAD
76f68e9b 1126 sv_catpvs(PL_linestr,";");
5db06880 1127#else
76f68e9b 1128 sv_setpvs(PL_linestr,";");
5db06880 1129#endif
ffb4593c
NT
1130
1131 /* reset variables for next time we lex */
9cbb5ea2 1132 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
1133 = SvPVX(PL_linestr)
1134#ifdef PERL_MAD
1135 + curoff
1136#endif
1137 ;
3280af22 1138 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1139 PL_last_lop = PL_last_uni = NULL;
ffb4593c 1140
4c84d7f2 1141 /* Close the filehandle. Could be from
ffb4593c
NT
1142 * STDIN, or a regular file. If we were reading code from
1143 * STDIN (because the commandline held no -e or filename)
1144 * then we don't close it, we reset it so the code can
1145 * read from STDIN too.
1146 */
1147
4c84d7f2 1148 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
3280af22 1149 PerlIO_clearerr(PL_rsfp);
8990e307 1150 else
3280af22 1151 (void)PerlIO_close(PL_rsfp);
4608196e 1152 PL_rsfp = NULL;
463ee0b2
LW
1153 return s;
1154 }
ffb4593c
NT
1155
1156 /* not at end of file, so we only read another line */
09bef843
SB
1157 /* make corresponding updates to old pointers, for yyerror() */
1158 oldprevlen = PL_oldbufptr - PL_bufend;
1159 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1160 if (PL_last_uni)
1161 oldunilen = PL_last_uni - PL_bufend;
1162 if (PL_last_lop)
1163 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1164 PL_linestart = PL_bufptr = s + prevlen;
1165 PL_bufend = s + SvCUR(PL_linestr);
1166 s = PL_bufptr;
09bef843
SB
1167 PL_oldbufptr = s + oldprevlen;
1168 PL_oldoldbufptr = s + oldoldprevlen;
1169 if (PL_last_uni)
1170 PL_last_uni = s + oldunilen;
1171 if (PL_last_lop)
1172 PL_last_lop = s + oldloplen;
a0d0e21e 1173 incline(s);
ffb4593c
NT
1174
1175 /* debugger active and we're not compiling the debugger code,
1176 * so store the line into the debugger's array of lines
1177 */
65269a95 1178 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 1179 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
463ee0b2 1180 }
5db06880
NC
1181
1182#ifdef PERL_MAD
1183 done:
1184 if (PL_madskills) {
cd81e915 1185 if (!PL_skipwhite)
6b29d1f5 1186 PL_skipwhite = newSVpvs("");
5db06880
NC
1187 curoff = s - SvPVX(PL_linestr);
1188 if (curoff - startoff)
cd81e915 1189 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1190 curoff - startoff);
1191 }
1192 return s;
1193#endif
a687059c 1194}
378cc40b 1195
ffb4593c
NT
1196/*
1197 * S_check_uni
1198 * Check the unary operators to ensure there's no ambiguity in how they're
1199 * used. An ambiguous piece of code would be:
1200 * rand + 5
1201 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1202 * the +5 is its argument.
1203 */
1204
76e3520e 1205STATIC void
cea2e8a9 1206S_check_uni(pTHX)
ba106d47 1207{
97aff369 1208 dVAR;
d4c19fe8
AL
1209 const char *s;
1210 const char *t;
2f3197b3 1211
3280af22 1212 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1213 return;
3280af22
NIS
1214 while (isSPACE(*PL_last_uni))
1215 PL_last_uni++;
c35e046a
AL
1216 s = PL_last_uni;
1217 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1218 s++;
3280af22 1219 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1220 return;
6136c704 1221
0453d815 1222 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1223 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1224 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1225 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1226 }
2f3197b3
LW
1227}
1228
ffb4593c
NT
1229/*
1230 * LOP : macro to build a list operator. Its behaviour has been replaced
1231 * with a subroutine, S_lop() for which LOP is just another name.
1232 */
1233
a0d0e21e
LW
1234#define LOP(f,x) return lop(f,x,s)
1235
ffb4593c
NT
1236/*
1237 * S_lop
1238 * Build a list operator (or something that might be one). The rules:
1239 * - if we have a next token, then it's a list operator [why?]
1240 * - if the next thing is an opening paren, then it's a function
1241 * - else it's a list operator
1242 */
1243
76e3520e 1244STATIC I32
a0be28da 1245S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1246{
97aff369 1247 dVAR;
7918f24d
NC
1248
1249 PERL_ARGS_ASSERT_LOP;
1250
6154021b 1251 pl_yylval.ival = f;
35c8bce7 1252 CLINE;
3280af22
NIS
1253 PL_expect = x;
1254 PL_bufptr = s;
1255 PL_last_lop = PL_oldbufptr;
eb160463 1256 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1257#ifdef PERL_MAD
1258 if (PL_lasttoke)
1259 return REPORT(LSTOP);
1260#else
3280af22 1261 if (PL_nexttoke)
bbf60fe6 1262 return REPORT(LSTOP);
5db06880 1263#endif
79072805 1264 if (*s == '(')
bbf60fe6 1265 return REPORT(FUNC);
29595ff2 1266 s = PEEKSPACE(s);
79072805 1267 if (*s == '(')
bbf60fe6 1268 return REPORT(FUNC);
79072805 1269 else
bbf60fe6 1270 return REPORT(LSTOP);
79072805
LW
1271}
1272
5db06880
NC
1273#ifdef PERL_MAD
1274 /*
1275 * S_start_force
1276 * Sets up for an eventual force_next(). start_force(0) basically does
1277 * an unshift, while start_force(-1) does a push. yylex removes items
1278 * on the "pop" end.
1279 */
1280
1281STATIC void
1282S_start_force(pTHX_ int where)
1283{
1284 int i;
1285
cd81e915 1286 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1287 where = PL_lasttoke;
cd81e915
NC
1288 assert(PL_curforce < 0 || PL_curforce == where);
1289 if (PL_curforce != where) {
5db06880
NC
1290 for (i = PL_lasttoke; i > where; --i) {
1291 PL_nexttoke[i] = PL_nexttoke[i-1];
1292 }
1293 PL_lasttoke++;
1294 }
cd81e915 1295 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1296 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1297 PL_curforce = where;
1298 if (PL_nextwhite) {
5db06880 1299 if (PL_madskills)
6b29d1f5 1300 curmad('^', newSVpvs(""));
cd81e915 1301 CURMAD('_', PL_nextwhite);
5db06880
NC
1302 }
1303}
1304
1305STATIC void
1306S_curmad(pTHX_ char slot, SV *sv)
1307{
1308 MADPROP **where;
1309
1310 if (!sv)
1311 return;
cd81e915
NC
1312 if (PL_curforce < 0)
1313 where = &PL_thismad;
5db06880 1314 else
cd81e915 1315 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1316
cd81e915 1317 if (PL_faketokens)
76f68e9b 1318 sv_setpvs(sv, "");
5db06880
NC
1319 else {
1320 if (!IN_BYTES) {
1321 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1322 SvUTF8_on(sv);
1323 else if (PL_encoding) {
1324 sv_recode_to_utf8(sv, PL_encoding);
1325 }
1326 }
1327 }
1328
1329 /* keep a slot open for the head of the list? */
1330 if (slot != '_' && *where && (*where)->mad_key == '^') {
1331 (*where)->mad_key = slot;
daba3364 1332 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1333 (*where)->mad_val = (void*)sv;
1334 }
1335 else
1336 addmad(newMADsv(slot, sv), where, 0);
1337}
1338#else
b3f24c00
MHM
1339# define start_force(where) NOOP
1340# define curmad(slot, sv) NOOP
5db06880
NC
1341#endif
1342
ffb4593c
NT
1343/*
1344 * S_force_next
9cbb5ea2 1345 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1346 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1347 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1348 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1349 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1350 */
1351
4e553d73 1352STATIC void
cea2e8a9 1353S_force_next(pTHX_ I32 type)
79072805 1354{
97aff369 1355 dVAR;
704d4215
GG
1356#ifdef DEBUGGING
1357 if (DEBUG_T_TEST) {
1358 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1359 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1360 }
1361#endif
5db06880 1362#ifdef PERL_MAD
cd81e915 1363 if (PL_curforce < 0)
5db06880 1364 start_force(PL_lasttoke);
cd81e915 1365 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1366 if (PL_lex_state != LEX_KNOWNEXT)
1367 PL_lex_defer = PL_lex_state;
1368 PL_lex_state = LEX_KNOWNEXT;
1369 PL_lex_expect = PL_expect;
cd81e915 1370 PL_curforce = -1;
5db06880 1371#else
3280af22
NIS
1372 PL_nexttype[PL_nexttoke] = type;
1373 PL_nexttoke++;
1374 if (PL_lex_state != LEX_KNOWNEXT) {
1375 PL_lex_defer = PL_lex_state;
1376 PL_lex_expect = PL_expect;
1377 PL_lex_state = LEX_KNOWNEXT;
79072805 1378 }
5db06880 1379#endif
79072805
LW
1380}
1381
d0a148a6 1382STATIC SV *
15f169a1 1383S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1384{
97aff369 1385 dVAR;
740cce10
NC
1386 SV * const sv = newSVpvn_utf8(start, len,
1387 UTF && !IN_BYTES
1388 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1389 return sv;
1390}
1391
ffb4593c
NT
1392/*
1393 * S_force_word
1394 * When the lexer knows the next thing is a word (for instance, it has
1395 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1396 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1397 * lookahead.
ffb4593c
NT
1398 *
1399 * Arguments:
b1b65b59 1400 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1401 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1402 * int check_keyword : if true, Perl checks to make sure the word isn't
1403 * a keyword (do this if the word is a label, e.g. goto FOO)
1404 * int allow_pack : if true, : characters will also be allowed (require,
1405 * use, etc. do this)
9cbb5ea2 1406 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1407 */
1408
76e3520e 1409STATIC char *
cea2e8a9 1410S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1411{
97aff369 1412 dVAR;
463ee0b2
LW
1413 register char *s;
1414 STRLEN len;
4e553d73 1415
7918f24d
NC
1416 PERL_ARGS_ASSERT_FORCE_WORD;
1417
29595ff2 1418 start = SKIPSPACE1(start);
463ee0b2 1419 s = start;
7e2040f0 1420 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1421 (allow_pack && *s == ':') ||
15f0808c 1422 (allow_initial_tick && *s == '\'') )
a0d0e21e 1423 {
3280af22 1424 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1425 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1426 return start;
cd81e915 1427 start_force(PL_curforce);
5db06880
NC
1428 if (PL_madskills)
1429 curmad('X', newSVpvn(start,s-start));
463ee0b2 1430 if (token == METHOD) {
29595ff2 1431 s = SKIPSPACE1(s);
463ee0b2 1432 if (*s == '(')
3280af22 1433 PL_expect = XTERM;
463ee0b2 1434 else {
3280af22 1435 PL_expect = XOPERATOR;
463ee0b2 1436 }
79072805 1437 }
e74e6b3d 1438 if (PL_madskills)
63575281 1439 curmad('g', newSVpvs( "forced" ));
9ded7720 1440 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1441 = (OP*)newSVOP(OP_CONST,0,
1442 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1443 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1444 force_next(token);
1445 }
1446 return s;
1447}
1448
ffb4593c
NT
1449/*
1450 * S_force_ident
9cbb5ea2 1451 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1452 * text only contains the "foo" portion. The first argument is a pointer
1453 * to the "foo", and the second argument is the type symbol to prefix.
1454 * Forces the next token to be a "WORD".
9cbb5ea2 1455 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1456 */
1457
76e3520e 1458STATIC void
bfed75c6 1459S_force_ident(pTHX_ register const char *s, int kind)
79072805 1460{
97aff369 1461 dVAR;
7918f24d
NC
1462
1463 PERL_ARGS_ASSERT_FORCE_IDENT;
1464
c35e046a 1465 if (*s) {
90e5519e
NC
1466 const STRLEN len = strlen(s);
1467 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1468 start_force(PL_curforce);
9ded7720 1469 NEXTVAL_NEXTTOKE.opval = o;
79072805 1470 force_next(WORD);
748a9306 1471 if (kind) {
11343788 1472 o->op_private = OPpCONST_ENTERED;
55497cff
PP
1473 /* XXX see note in pp_entereval() for why we forgo typo
1474 warnings if the symbol must be introduced in an eval.
1475 GSAR 96-10-12 */
90e5519e
NC
1476 gv_fetchpvn_flags(s, len,
1477 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1478 : GV_ADD,
1479 kind == '$' ? SVt_PV :
1480 kind == '@' ? SVt_PVAV :
1481 kind == '%' ? SVt_PVHV :
a0d0e21e 1482 SVt_PVGV
90e5519e 1483 );
748a9306 1484 }
79072805
LW
1485 }
1486}
1487
1571675a
GS
1488NV
1489Perl_str_to_version(pTHX_ SV *sv)
1490{
1491 NV retval = 0.0;
1492 NV nshift = 1.0;
1493 STRLEN len;
cfd0369c 1494 const char *start = SvPV_const(sv,len);
9d4ba2ae 1495 const char * const end = start + len;
504618e9 1496 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
1497
1498 PERL_ARGS_ASSERT_STR_TO_VERSION;
1499
1571675a 1500 while (start < end) {
ba210ebe 1501 STRLEN skip;
1571675a
GS
1502 UV n;
1503 if (utf)
9041c2e3 1504 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1505 else {
1506 n = *(U8*)start;
1507 skip = 1;
1508 }
1509 retval += ((NV)n)/nshift;
1510 start += skip;
1511 nshift *= 1000;
1512 }
1513 return retval;
1514}
1515
4e553d73 1516/*
ffb4593c
NT
1517 * S_force_version
1518 * Forces the next token to be a version number.
e759cc13
RGS
1519 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1520 * and if "guessing" is TRUE, then no new token is created (and the caller
1521 * must use an alternative parsing method).
ffb4593c
NT
1522 */
1523
76e3520e 1524STATIC char *
e759cc13 1525S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1526{
97aff369 1527 dVAR;
5f66b61c 1528 OP *version = NULL;
44dcb63b 1529 char *d;
5db06880
NC
1530#ifdef PERL_MAD
1531 I32 startoff = s - SvPVX(PL_linestr);
1532#endif
89bfa8cd 1533
7918f24d
NC
1534 PERL_ARGS_ASSERT_FORCE_VERSION;
1535
29595ff2 1536 s = SKIPSPACE1(s);
89bfa8cd 1537
44dcb63b 1538 d = s;
dd629d5b 1539 if (*d == 'v')
44dcb63b 1540 d++;
44dcb63b 1541 if (isDIGIT(*d)) {
e759cc13
RGS
1542 while (isDIGIT(*d) || *d == '_' || *d == '.')
1543 d++;
5db06880
NC
1544#ifdef PERL_MAD
1545 if (PL_madskills) {
cd81e915 1546 start_force(PL_curforce);
5db06880
NC
1547 curmad('X', newSVpvn(s,d-s));
1548 }
1549#endif
9f3d182e 1550 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1551 SV *ver;
6154021b
RGS
1552 s = scan_num(s, &pl_yylval);
1553 version = pl_yylval.opval;
dd629d5b
GS
1554 ver = cSVOPx(version)->op_sv;
1555 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1556 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1557 SvNV_set(ver, str_to_version(ver));
1571675a 1558 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1559 }
89bfa8cd 1560 }
5db06880
NC
1561 else if (guessing) {
1562#ifdef PERL_MAD
1563 if (PL_madskills) {
cd81e915
NC
1564 sv_free(PL_nextwhite); /* let next token collect whitespace */
1565 PL_nextwhite = 0;
5db06880
NC
1566 s = SvPVX(PL_linestr) + startoff;
1567 }
1568#endif
e759cc13 1569 return s;
5db06880 1570 }
89bfa8cd
PP
1571 }
1572
5db06880
NC
1573#ifdef PERL_MAD
1574 if (PL_madskills && !version) {
cd81e915
NC
1575 sv_free(PL_nextwhite); /* let next token collect whitespace */
1576 PL_nextwhite = 0;
5db06880
NC
1577 s = SvPVX(PL_linestr) + startoff;
1578 }
1579#endif
89bfa8cd 1580 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1581 start_force(PL_curforce);
9ded7720 1582 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1583 force_next(WORD);
89bfa8cd 1584
e759cc13 1585 return s;
89bfa8cd
PP
1586}
1587
ffb4593c
NT
1588/*
1589 * S_tokeq
1590 * Tokenize a quoted string passed in as an SV. It finds the next
1591 * chunk, up to end of string or a backslash. It may make a new
1592 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1593 * turns \\ into \.
1594 */
1595
76e3520e 1596STATIC SV *
cea2e8a9 1597S_tokeq(pTHX_ SV *sv)
79072805 1598{
97aff369 1599 dVAR;
79072805
LW
1600 register char *s;
1601 register char *send;
1602 register char *d;
b3ac6de7
IZ
1603 STRLEN len = 0;
1604 SV *pv = sv;
79072805 1605
7918f24d
NC
1606 PERL_ARGS_ASSERT_TOKEQ;
1607
79072805 1608 if (!SvLEN(sv))
b3ac6de7 1609 goto finish;
79072805 1610
a0d0e21e 1611 s = SvPV_force(sv, len);
21a311ee 1612 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1613 goto finish;
463ee0b2 1614 send = s + len;
79072805
LW
1615 while (s < send && *s != '\\')
1616 s++;
1617 if (s == send)
b3ac6de7 1618 goto finish;
79072805 1619 d = s;
be4731d2 1620 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 1621 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 1622 }
79072805
LW
1623 while (s < send) {
1624 if (*s == '\\') {
a0d0e21e 1625 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1626 s++; /* all that, just for this */
1627 }
1628 *d++ = *s++;
1629 }
1630 *d = '\0';
95a20fc0 1631 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1632 finish:
3280af22 1633 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 1634 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
1635 return sv;
1636}
1637
ffb4593c
NT
1638/*
1639 * Now come three functions related to double-quote context,
1640 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1641 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1642 * interact with PL_lex_state, and create fake ( ... ) argument lists
1643 * to handle functions and concatenation.
1644 * They assume that whoever calls them will be setting up a fake
1645 * join call, because each subthing puts a ',' after it. This lets
1646 * "lower \luPpEr"
1647 * become
1648 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1649 *
1650 * (I'm not sure whether the spurious commas at the end of lcfirst's
1651 * arguments and join's arguments are created or not).
1652 */
1653
1654/*
1655 * S_sublex_start
6154021b 1656 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
1657 *
1658 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 1659 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
1660 *
1661 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1662 *
1663 * Everything else becomes a FUNC.
1664 *
1665 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1666 * had an OP_CONST or OP_READLINE). This just sets us up for a
1667 * call to S_sublex_push().
1668 */
1669
76e3520e 1670STATIC I32
cea2e8a9 1671S_sublex_start(pTHX)
79072805 1672{
97aff369 1673 dVAR;
6154021b 1674 register const I32 op_type = pl_yylval.ival;
79072805
LW
1675
1676 if (op_type == OP_NULL) {
6154021b 1677 pl_yylval.opval = PL_lex_op;
5f66b61c 1678 PL_lex_op = NULL;
79072805
LW
1679 return THING;
1680 }
1681 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1682 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1683
1684 if (SvTYPE(sv) == SVt_PVIV) {
1685 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1686 STRLEN len;
96a5add6 1687 const char * const p = SvPV_const(sv, len);
740cce10 1688 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
1689 SvREFCNT_dec(sv);
1690 sv = nsv;
4e553d73 1691 }
6154021b 1692 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1693 PL_lex_stuff = NULL;
6f33ba73
RGS
1694 /* Allow <FH> // "foo" */
1695 if (op_type == OP_READLINE)
1696 PL_expect = XTERMORDORDOR;
79072805
LW
1697 return THING;
1698 }
e3f73d4e
RGS
1699 else if (op_type == OP_BACKTICK && PL_lex_op) {
1700 /* readpipe() vas overriden */
1701 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 1702 pl_yylval.opval = PL_lex_op;
9b201d7d 1703 PL_lex_op = NULL;
e3f73d4e
RGS
1704 PL_lex_stuff = NULL;
1705 return THING;
1706 }
79072805 1707
3280af22 1708 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 1709 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
1710 PL_sublex_info.sub_op = PL_lex_op;
1711 PL_lex_state = LEX_INTERPPUSH;
55497cff 1712
3280af22
NIS
1713 PL_expect = XTERM;
1714 if (PL_lex_op) {
6154021b 1715 pl_yylval.opval = PL_lex_op;
5f66b61c 1716 PL_lex_op = NULL;
55497cff
PP
1717 return PMFUNC;
1718 }
1719 else
1720 return FUNC;
1721}
1722
ffb4593c
NT
1723/*
1724 * S_sublex_push
1725 * Create a new scope to save the lexing state. The scope will be
1726 * ended in S_sublex_done. Returns a '(', starting the function arguments
1727 * to the uc, lc, etc. found before.
1728 * Sets PL_lex_state to LEX_INTERPCONCAT.
1729 */
1730
76e3520e 1731STATIC I32
cea2e8a9 1732S_sublex_push(pTHX)
55497cff 1733{
27da23d5 1734 dVAR;
f46d017c 1735 ENTER;
55497cff 1736
3280af22 1737 PL_lex_state = PL_sublex_info.super_state;
651b5b28 1738 SAVEBOOL(PL_lex_dojoin);
3280af22 1739 SAVEI32(PL_lex_brackets);
3280af22
NIS
1740 SAVEI32(PL_lex_casemods);
1741 SAVEI32(PL_lex_starts);
651b5b28 1742 SAVEI8(PL_lex_state);
7766f137 1743 SAVEVPTR(PL_lex_inpat);
98246f1e 1744 SAVEI16(PL_lex_inwhat);
57843af0 1745 SAVECOPLINE(PL_curcop);
3280af22 1746 SAVEPPTR(PL_bufptr);
8452ff4b 1747 SAVEPPTR(PL_bufend);
3280af22
NIS
1748 SAVEPPTR(PL_oldbufptr);
1749 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1750 SAVEPPTR(PL_last_lop);
1751 SAVEPPTR(PL_last_uni);
3280af22
NIS
1752 SAVEPPTR(PL_linestart);
1753 SAVESPTR(PL_linestr);
8edd5f42
RGS
1754 SAVEGENERICPV(PL_lex_brackstack);
1755 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1756
1757 PL_linestr = PL_lex_stuff;
a0714e2c 1758 PL_lex_stuff = NULL;
3280af22 1759
9cbb5ea2
GS
1760 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1761 = SvPVX(PL_linestr);
3280af22 1762 PL_bufend += SvCUR(PL_linestr);
bd61b366 1763 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1764 SAVEFREESV(PL_linestr);
1765
1766 PL_lex_dojoin = FALSE;
1767 PL_lex_brackets = 0;
a02a5408
JC
1768 Newx(PL_lex_brackstack, 120, char);
1769 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1770 PL_lex_casemods = 0;
1771 *PL_lex_casestack = '\0';
1772 PL_lex_starts = 0;
1773 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1774 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1775
1776 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1777 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1778 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1779 else
5f66b61c 1780 PL_lex_inpat = NULL;
79072805 1781
55497cff 1782 return '(';
79072805
LW
1783}
1784
ffb4593c
NT
1785/*
1786 * S_sublex_done
1787 * Restores lexer state after a S_sublex_push.
1788 */
1789
76e3520e 1790STATIC I32
cea2e8a9 1791S_sublex_done(pTHX)
79072805 1792{
27da23d5 1793 dVAR;
3280af22 1794 if (!PL_lex_starts++) {
396482e1 1795 SV * const sv = newSVpvs("");
9aa983d2
JH
1796 if (SvUTF8(PL_linestr))
1797 SvUTF8_on(sv);
3280af22 1798 PL_expect = XOPERATOR;
6154021b 1799 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1800 return THING;
1801 }
1802
3280af22
NIS
1803 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1804 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1805 return yylex();
79072805
LW
1806 }
1807
ffb4593c 1808 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1809 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1810 PL_linestr = PL_lex_repl;
1811 PL_lex_inpat = 0;
1812 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1813 PL_bufend += SvCUR(PL_linestr);
bd61b366 1814 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1815 SAVEFREESV(PL_linestr);
1816 PL_lex_dojoin = FALSE;
1817 PL_lex_brackets = 0;
3280af22
NIS
1818 PL_lex_casemods = 0;
1819 *PL_lex_casestack = '\0';
1820 PL_lex_starts = 0;
25da4f38 1821 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1822 PL_lex_state = LEX_INTERPNORMAL;
1823 PL_lex_starts++;
e9fa98b2
HS
1824 /* we don't clear PL_lex_repl here, so that we can check later
1825 whether this is an evalled subst; that means we rely on the
1826 logic to ensure sublex_done() is called again only via the
1827 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1828 }
e9fa98b2 1829 else {
3280af22 1830 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1831 PL_lex_repl = NULL;
e9fa98b2 1832 }
79072805 1833 return ',';
ffed7fef
LW
1834 }
1835 else {
5db06880
NC
1836#ifdef PERL_MAD
1837 if (PL_madskills) {
cd81e915
NC
1838 if (PL_thiswhite) {
1839 if (!PL_endwhite)
6b29d1f5 1840 PL_endwhite = newSVpvs("");
cd81e915
NC
1841 sv_catsv(PL_endwhite, PL_thiswhite);
1842 PL_thiswhite = 0;
1843 }
1844 if (PL_thistoken)
76f68e9b 1845 sv_setpvs(PL_thistoken,"");
5db06880 1846 else
cd81e915 1847 PL_realtokenstart = -1;
5db06880
NC
1848 }
1849#endif
f46d017c 1850 LEAVE;
3280af22
NIS
1851 PL_bufend = SvPVX(PL_linestr);
1852 PL_bufend += SvCUR(PL_linestr);
1853 PL_expect = XOPERATOR;
09bef843 1854 PL_sublex_info.sub_inwhat = 0;
79072805 1855 return ')';
ffed7fef
LW
1856 }
1857}
1858
02aa26ce
NT
1859/*
1860 scan_const
1861
1862 Extracts a pattern, double-quoted string, or transliteration. This
1863 is terrifying code.
1864
94def140 1865 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1866 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1867 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1868
94def140
ST
1869 Returns a pointer to the character scanned up to. If this is
1870 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 1871 successfully parsed), will leave an OP for the substring scanned
6154021b 1872 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
1873 by looking at the next characters herself.
1874
02aa26ce
NT
1875 In patterns:
1876 backslashes:
1877 double-quoted style: \r and \n
1878 regexp special ones: \D \s
94def140
ST
1879 constants: \x31
1880 backrefs: \1
02aa26ce
NT
1881 case and quoting: \U \Q \E
1882 stops on @ and $, but not for $ as tail anchor
1883
1884 In transliterations:
1885 characters are VERY literal, except for - not at the start or end
94def140
ST
1886 of the string, which indicates a range. If the range is in bytes,
1887 scan_const expands the range to the full set of intermediate
1888 characters. If the range is in utf8, the hyphen is replaced with
1889 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1890
1891 In double-quoted strings:
1892 backslashes:
1893 double-quoted style: \r and \n
94def140
ST
1894 constants: \x31
1895 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1896 case and quoting: \U \Q \E
1897 stops on @ and $
1898
1899 scan_const does *not* construct ops to handle interpolated strings.
1900 It stops processing as soon as it finds an embedded $ or @ variable
1901 and leaves it to the caller to work out what's going on.
1902
94def140
ST
1903 embedded arrays (whether in pattern or not) could be:
1904 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1905
1906 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1907
1908 $ in pattern could be $foo or could be tail anchor. Assumption:
1909 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1910 followed by one of "()| \r\n\t"
02aa26ce
NT
1911
1912 \1 (backreferences) are turned into $1
1913
1914 The structure of the code is
1915 while (there's a character to process) {
94def140
ST
1916 handle transliteration ranges
1917 skip regexp comments /(?#comment)/ and codes /(?{code})/
1918 skip #-initiated comments in //x patterns
1919 check for embedded arrays
02aa26ce
NT
1920 check for embedded scalars
1921 if (backslash) {
94def140
ST
1922 leave intact backslashes from leaveit (below)
1923 deprecate \1 in substitution replacements
02aa26ce
NT
1924 handle string-changing backslashes \l \U \Q \E, etc.
1925 switch (what was escaped) {
94def140
ST
1926 handle \- in a transliteration (becomes a literal -)
1927 handle \132 (octal characters)
1928 handle \x15 and \x{1234} (hex characters)
1929 handle \N{name} (named characters)
1930 handle \cV (control characters)
1931 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 1932 } (end switch)
77a135fe 1933 continue
02aa26ce 1934 } (end if backslash)
77a135fe 1935 handle regular character
02aa26ce 1936 } (end while character to read)
4e553d73 1937
02aa26ce
NT
1938*/
1939
76e3520e 1940STATIC char *
cea2e8a9 1941S_scan_const(pTHX_ char *start)
79072805 1942{
97aff369 1943 dVAR;
3280af22 1944 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
1945 SV *sv = newSV(send - start); /* sv for the constant. See
1946 note below on sizing. */
02aa26ce
NT
1947 register char *s = start; /* start of the constant */
1948 register char *d = SvPVX(sv); /* destination for copies */
1949 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1950 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0 1951 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
77a135fe
KW
1952 I32 this_utf8 = UTF; /* Is the source string assumed
1953 to be UTF8? But, this can
1954 show as true when the source
1955 isn't utf8, as for example
1956 when it is entirely composed
1957 of hex constants */
1958
1959 /* Note on sizing: The scanned constant is placed into sv, which is
1960 * initialized by newSV() assuming one byte of output for every byte of
1961 * input. This routine expects newSV() to allocate an extra byte for a
1962 * trailing NUL, which this routine will append if it gets to the end of
1963 * the input. There may be more bytes of input than output (eg., \N{LATIN
1964 * CAPITAL LETTER A}), or more output than input if the constant ends up
1965 * recoded to utf8, but each time a construct is found that might increase
1966 * the needed size, SvGROW() is called. Its size parameter each time is
1967 * based on the best guess estimate at the time, namely the length used so
1968 * far, plus the length the current construct will occupy, plus room for
1969 * the trailing NUL, plus one byte for every input byte still unscanned */
1970
012bcf8d 1971 UV uv;
4c3a8340
ST
1972#ifdef EBCDIC
1973 UV literal_endpoint = 0;
e294cc5d 1974 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1975#endif
012bcf8d 1976
7918f24d
NC
1977 PERL_ARGS_ASSERT_SCAN_CONST;
1978
2b9d42f0
NIS
1979 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1980 /* If we are doing a trans and we know we want UTF8 set expectation */
1981 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1982 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1983 }
1984
1985
79072805 1986 while (s < send || dorange) {
02aa26ce 1987 /* get transliterations out of the way (they're most literal) */
3280af22 1988 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1989 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1990 if (dorange) {
1ba5c669
JH
1991 I32 i; /* current expanded character */
1992 I32 min; /* first character in range */
1993 I32 max; /* last character in range */
02aa26ce 1994
e294cc5d
JH
1995#ifdef EBCDIC
1996 UV uvmax = 0;
1997#endif
1998
1999 if (has_utf8
2000#ifdef EBCDIC
2001 && !native_range
2002#endif
2003 ) {
9d4ba2ae 2004 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2005 char *e = d++;
2006 while (e-- > c)
2007 *(e + 1) = *e;
25716404 2008 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2009 /* mark the range as done, and continue */
2010 dorange = FALSE;
2011 didrange = TRUE;
2012 continue;
2013 }
2b9d42f0 2014
95a20fc0 2015 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2016#ifdef EBCDIC
2017 SvGROW(sv,
2018 SvLEN(sv) + (has_utf8 ?
2019 (512 - UTF_CONTINUATION_MARK +
2020 UNISKIP(0x100))
2021 : 256));
2022 /* How many two-byte within 0..255: 128 in UTF-8,
2023 * 96 in UTF-8-mod. */
2024#else
9cbb5ea2 2025 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2026#endif
9cbb5ea2 2027 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2028#ifdef EBCDIC
2029 if (has_utf8) {
2030 int j;
2031 for (j = 0; j <= 1; j++) {
2032 char * const c = (char*)utf8_hop((U8*)d, -1);
2033 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2034 if (j)
2035 min = (U8)uv;
2036 else if (uv < 256)
2037 max = (U8)uv;
2038 else {
2039 max = (U8)0xff; /* only to \xff */
2040 uvmax = uv; /* \x{100} to uvmax */
2041 }
2042 d = c; /* eat endpoint chars */
2043 }
2044 }
2045 else {
2046#endif
2047 d -= 2; /* eat the first char and the - */
2048 min = (U8)*d; /* first char in range */
2049 max = (U8)d[1]; /* last char in range */
2050#ifdef EBCDIC
2051 }
2052#endif
8ada0baa 2053
c2e66d9e 2054 if (min > max) {
01ec43d0 2055 Perl_croak(aTHX_
d1573ac7 2056 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2057 (char)min, (char)max);
c2e66d9e
GS
2058 }
2059
c7f1f016 2060#ifdef EBCDIC
4c3a8340
ST
2061 if (literal_endpoint == 2 &&
2062 ((isLOWER(min) && isLOWER(max)) ||
2063 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2064 if (isLOWER(min)) {
2065 for (i = min; i <= max; i++)
2066 if (isLOWER(i))
db42d148 2067 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2068 } else {
2069 for (i = min; i <= max; i++)
2070 if (isUPPER(i))
db42d148 2071 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2072 }
2073 }
2074 else
2075#endif
2076 for (i = min; i <= max; i++)
e294cc5d
JH
2077#ifdef EBCDIC
2078 if (has_utf8) {
2079 const U8 ch = (U8)NATIVE_TO_UTF(i);
2080 if (UNI_IS_INVARIANT(ch))
2081 *d++ = (U8)i;
2082 else {
2083 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2084 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2085 }
2086 }
2087 else
2088#endif
2089 *d++ = (char)i;
2090
2091#ifdef EBCDIC
2092 if (uvmax) {
2093 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2094 if (uvmax > 0x101)
2095 *d++ = (char)UTF_TO_NATIVE(0xff);
2096 if (uvmax > 0x100)
2097 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2098 }
2099#endif
02aa26ce
NT
2100
2101 /* mark the range as done, and continue */
79072805 2102 dorange = FALSE;
01ec43d0 2103 didrange = TRUE;
4c3a8340
ST
2104#ifdef EBCDIC
2105 literal_endpoint = 0;
2106#endif
79072805 2107 continue;
4e553d73 2108 }
02aa26ce
NT
2109
2110 /* range begins (ignore - as first or last char) */
79072805 2111 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2112 if (didrange) {
1fafa243 2113 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2114 }
e294cc5d
JH
2115 if (has_utf8
2116#ifdef EBCDIC
2117 && !native_range
2118#endif
2119 ) {
25716404 2120 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2121 s++;
2122 continue;
2123 }
79072805
LW
2124 dorange = TRUE;
2125 s++;
01ec43d0
GS
2126 }
2127 else {
2128 didrange = FALSE;
4c3a8340
ST
2129#ifdef EBCDIC
2130 literal_endpoint = 0;
e294cc5d 2131 native_range = TRUE;
4c3a8340 2132#endif
01ec43d0 2133 }
79072805 2134 }
02aa26ce
NT
2135
2136 /* if we get here, we're not doing a transliteration */
2137
0f5d15d6
IZ
2138 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2139 except for the last char, which will be done separately. */
3280af22 2140 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2141 if (s[2] == '#') {
e994fd66 2142 while (s+1 < send && *s != ')')
db42d148 2143 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2144 }
2145 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2146 || (s[2] == '?' && s[3] == '{'))
155aba94 2147 {
cc6b7395 2148 I32 count = 1;
0f5d15d6 2149 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2150 char c;
2151
d9f97599
GS
2152 while (count && (c = *regparse)) {
2153 if (c == '\\' && regparse[1])
2154 regparse++;
4e553d73 2155 else if (c == '{')
cc6b7395 2156 count++;
4e553d73 2157 else if (c == '}')
cc6b7395 2158 count--;
d9f97599 2159 regparse++;
cc6b7395 2160 }
e994fd66 2161 if (*regparse != ')')
5bdf89e7 2162 regparse--; /* Leave one char for continuation. */
0f5d15d6 2163 while (s < regparse)
db42d148 2164 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2165 }
748a9306 2166 }
02aa26ce
NT
2167
2168 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2169 else if (*s == '#' && PL_lex_inpat &&
2170 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2171 while (s+1 < send && *s != '\n')
db42d148 2172 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2173 }
02aa26ce 2174
5d1d4326 2175 /* check for embedded arrays
da6eedaa 2176 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2177 */
1749ea0d
ST
2178 else if (*s == '@' && s[1]) {
2179 if (isALNUM_lazy_if(s+1,UTF))
2180 break;
2181 if (strchr(":'{$", s[1]))
2182 break;
2183 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2184 break; /* in regexp, neither @+ nor @- are interpolated */
2185 }
02aa26ce
NT
2186
2187 /* check for embedded scalars. only stop if we're sure it's a
2188 variable.
2189 */
79072805 2190 else if (*s == '$') {
3280af22 2191 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2192 break;
77772344
B
2193 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2194 if (s[1] == '\\' && ckWARN(WARN_AMBIGUOUS)) {
2195 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2196 "Possible unintended interpolation of $\\ in regex");
2197 }
79072805 2198 break; /* in regexp, $ might be tail anchor */
77772344 2199 }
79072805 2200 }
02aa26ce 2201
2b9d42f0
NIS
2202 /* End of else if chain - OP_TRANS rejoin rest */
2203
02aa26ce 2204 /* backslashes */
79072805
LW
2205 if (*s == '\\' && s+1 < send) {
2206 s++;
02aa26ce 2207
02aa26ce 2208 /* deprecate \1 in strings and substitution replacements */
3280af22 2209 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2210 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2211 {
599cee73 2212 if (ckWARN(WARN_SYNTAX))
9014280d 2213 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2214 *--s = '$';
2215 break;
2216 }
02aa26ce
NT
2217
2218 /* string-change backslash escapes */
3280af22 2219 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2220 --s;
2221 break;
2222 }
cc74c5bd
ST
2223 /* skip any other backslash escapes in a pattern */
2224 else if (PL_lex_inpat) {
2225 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2226 goto default_action;
2227 }
02aa26ce
NT
2228
2229 /* if we get here, it's either a quoted -, or a digit */
79072805 2230 switch (*s) {
02aa26ce
NT
2231
2232 /* quoted - in transliterations */
79072805 2233 case '-':
3280af22 2234 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2235 *d++ = *s++;
2236 continue;
2237 }
2238 /* FALL THROUGH */
2239 default:
11b8faa4 2240 {
86f97054 2241 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 2242 ckWARN(WARN_MISC))
9014280d 2243 Perl_warner(aTHX_ packWARN(WARN_MISC),
e294cc5d
JH
2244 "Unrecognized escape \\%c passed through",
2245 *s);
11b8faa4 2246 /* default action is to copy the quoted character */
f9a63242 2247 goto default_action;
11b8faa4 2248 }
02aa26ce 2249
77a135fe 2250 /* eg. \132 indicates the octal constant 0x132 */
79072805
LW
2251 case '0': case '1': case '2': case '3':
2252 case '4': case '5': case '6': case '7':
ba210ebe 2253 {
53305cf1
NC
2254 I32 flags = 0;
2255 STRLEN len = 3;
77a135fe 2256 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2257 s += len;
2258 }
012bcf8d 2259 goto NUM_ESCAPE_INSERT;
02aa26ce 2260
77a135fe 2261 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2262 case 'x':
a0ed51b3
LW
2263 ++s;
2264 if (*s == '{') {
9d4ba2ae 2265 char* const e = strchr(s, '}');
a4c04bdc
NC
2266 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2267 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2268 STRLEN len;
355860ce 2269
53305cf1 2270 ++s;
adaeee49 2271 if (!e) {
a0ed51b3 2272 yyerror("Missing right brace on \\x{}");
355860ce 2273 continue;
ba210ebe 2274 }
53305cf1 2275 len = e - s;
77a135fe 2276 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2277 s = e + 1;
a0ed51b3
LW
2278 }
2279 else {
ba210ebe 2280 {
53305cf1 2281 STRLEN len = 2;
a4c04bdc 2282 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2283 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2284 s += len;
2285 }
012bcf8d
GS
2286 }
2287
2288 NUM_ESCAPE_INSERT:
77a135fe
KW
2289 /* Insert oct, hex, or \N{U+...} escaped character. There will
2290 * always be enough room in sv since such escapes will be
2291 * longer than any UTF-8 sequence they can end up as, except if
2292 * they force us to recode the rest of the string into utf8 */
ba7cea30 2293
77a135fe
KW
2294 /* Here uv is the ordinal of the next character being added in
2295 * unicode (converted from native). (It has to be done before
2296 * here because \N is interpreted as unicode, and oct and hex
2297 * as native.) */
2298 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 2299 if (!has_utf8 && uv > 255) {
77a135fe
KW
2300 /* Might need to recode whatever we have accumulated so
2301 * far if it contains any chars variant in utf8 or
2302 * utf-ebcdic. */
2303
2304 SvCUR_set(sv, d - SvPVX_const(sv));
2305 SvPOK_on(sv);
2306 *d = '\0';
77a135fe 2307 /* See Note on sizing above. */
7bf79863
KW
2308 sv_utf8_upgrade_flags_grow(sv,
2309 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2310 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
2311 d = SvPVX(sv) + SvCUR(sv);
2312 has_utf8 = TRUE;
012bcf8d
GS
2313 }
2314
77a135fe
KW
2315 if (has_utf8) {
2316 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
2317 if (PL_lex_inwhat == OP_TRANS &&
2318 PL_sublex_info.sub_op) {
2319 PL_sublex_info.sub_op->op_private |=
2320 (PL_lex_repl ? OPpTRANS_FROM_UTF
2321 : OPpTRANS_TO_UTF);
f9a63242 2322 }
e294cc5d
JH
2323#ifdef EBCDIC
2324 if (uv > 255 && !dorange)
2325 native_range = FALSE;
2326#endif
012bcf8d 2327 }
a0ed51b3 2328 else {
012bcf8d 2329 *d++ = (char)uv;
a0ed51b3 2330 }
012bcf8d
GS
2331 }
2332 else {
c4d5f83a 2333 *d++ = (char) uv;
a0ed51b3 2334 }
79072805 2335 continue;
02aa26ce 2336
77a135fe
KW
2337 /* \N{LATIN SMALL LETTER A} is a named character, and so is
2338 * \N{U+0041} */
4a2d328f 2339 case 'N':
55eda711 2340 ++s;
423cee85
JH
2341 if (*s == '{') {
2342 char* e = strchr(s, '}');
155aba94 2343 SV *res;
423cee85 2344 STRLEN len;
cfd0369c 2345 const char *str;
4e553d73 2346
423cee85 2347 if (!e) {
5777a3f7 2348 yyerror("Missing right brace on \\N{}");
423cee85
JH
2349 e = s - 1;
2350 goto cont_scan;
2351 }
dbc0d4f2 2352 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
77a135fe
KW
2353 /* \N{U+...} The ... is a unicode value even on EBCDIC
2354 * machines */
dbc0d4f2
JH
2355 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2356 PERL_SCAN_DISALLOW_PREFIX;
2357 s += 3;
2358 len = e - s;
2359 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2360 if ( e > s && len != (STRLEN)(e - s) ) {
2361 uv = 0xFFFD;
fc8cd66c 2362 }
dbc0d4f2
JH
2363 s = e + 1;
2364 goto NUM_ESCAPE_INSERT;
2365 }
55eda711 2366 res = newSVpvn(s + 1, e - s - 1);
bd61b366 2367 res = new_constant( NULL, 0, "charnames",
eb0d8d16 2368 res, NULL, s - 2, e - s + 3 );
f9a63242
JH
2369 if (has_utf8)
2370 sv_utf8_upgrade(res);
cfd0369c 2371 str = SvPV_const(res,len);
1c47067b
JH
2372#ifdef EBCDIC_NEVER_MIND
2373 /* charnames uses pack U and that has been
2374 * recently changed to do the below uni->native
2375 * mapping, so this would be redundant (and wrong,
2376 * the code point would be doubly converted).
2377 * But leave this in just in case the pack U change
2378 * gets revoked, but the semantics is still
2379 * desireable for charnames. --jhi */
cddc7ef4 2380 {
cfd0369c 2381 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2382
2383 if (uv < 0x100) {
89ebb4a3 2384 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2385
2386 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2387 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2388 str = SvPV_const(res, len);
cddc7ef4
JH
2389 }
2390 }
2391#endif
77a135fe
KW
2392 /* If destination is not in utf8 but this new character is,
2393 * recode the dest to utf8 */
89491803 2394 if (!has_utf8 && SvUTF8(res)) {
77a135fe 2395 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 2396 SvPOK_on(sv);
e4f3eed8 2397 *d = '\0';
77a135fe 2398 /* See Note on sizing above. */
7bf79863
KW
2399 sv_utf8_upgrade_flags_grow(sv,
2400 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2401 len + (STRLEN)(send - s) + 1);
f08d6ad9 2402 d = SvPVX(sv) + SvCUR(sv);
89491803 2403 has_utf8 = TRUE;
77a135fe 2404 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
423cee85 2405
77a135fe
KW
2406 /* See Note on sizing above. (NOTE: SvCUR() is not set
2407 * correctly here). */
2408 const STRLEN off = d - SvPVX_const(sv);
2409 d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
423cee85 2410 }
e294cc5d
JH
2411#ifdef EBCDIC
2412 if (!dorange)
2413 native_range = FALSE; /* \N{} is guessed to be Unicode */
2414#endif
423cee85
JH
2415 Copy(str, d, len, char);
2416 d += len;
2417 SvREFCNT_dec(res);
2418 cont_scan:
2419 s = e + 1;
2420 }
2421 else
5777a3f7 2422 yyerror("Missing braces on \\N{}");
423cee85
JH
2423 continue;
2424
02aa26ce 2425 /* \c is a control character */
79072805
LW
2426 case 'c':
2427 s++;
961ce445 2428 if (s < send) {
ba210ebe 2429 U8 c = *s++;
c7f1f016
NIS
2430#ifdef EBCDIC
2431 if (isLOWER(c))
2432 c = toUPPER(c);
2433#endif
db42d148 2434 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2435 }
961ce445
RGS
2436 else {
2437 yyerror("Missing control char name in \\c");
2438 }
79072805 2439 continue;
02aa26ce
NT
2440
2441 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2442 case 'b':
db42d148 2443 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2444 break;
2445 case 'n':
db42d148 2446 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2447 break;
2448 case 'r':
db42d148 2449 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2450 break;
2451 case 'f':
db42d148 2452 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2453 break;
2454 case 't':
db42d148 2455 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2456 break;
34a3fe2a 2457 case 'e':
db42d148 2458 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2459 break;
2460 case 'a':
db42d148 2461 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2462 break;
02aa26ce
NT
2463 } /* end switch */
2464
79072805
LW
2465 s++;
2466 continue;
02aa26ce 2467 } /* end if (backslash) */
4c3a8340
ST
2468#ifdef EBCDIC
2469 else
2470 literal_endpoint++;
2471#endif
02aa26ce 2472
f9a63242 2473 default_action:
77a135fe
KW
2474 /* If we started with encoded form, or already know we want it,
2475 then encode the next character */
2476 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 2477 STRLEN len = 1;
77a135fe
KW
2478
2479
2480 /* One might think that it is wasted effort in the case of the
2481 * source being utf8 (this_utf8 == TRUE) to take the next character
2482 * in the source, convert it to an unsigned value, and then convert
2483 * it back again. But the source has not been validated here. The
2484 * routine that does the conversion checks for errors like
2485 * malformed utf8 */
2486
5f66b61c
AL
2487 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2488 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
2489 if (!has_utf8) {
2490 SvCUR_set(sv, d - SvPVX_const(sv));
2491 SvPOK_on(sv);
2492 *d = '\0';
77a135fe 2493 /* See Note on sizing above. */
7bf79863
KW
2494 sv_utf8_upgrade_flags_grow(sv,
2495 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2496 need + (STRLEN)(send - s) + 1);
77a135fe
KW
2497 d = SvPVX(sv) + SvCUR(sv);
2498 has_utf8 = TRUE;
2499 } else if (need > len) {
2500 /* encoded value larger than old, may need extra space (NOTE:
2501 * SvCUR() is not set correctly here). See Note on sizing
2502 * above. */
9d4ba2ae 2503 const STRLEN off = d - SvPVX_const(sv);
77a135fe 2504 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 2505 }
77a135fe
KW
2506 s += len;
2507
5f66b61c 2508 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
2509#ifdef EBCDIC
2510 if (uv > 255 && !dorange)
2511 native_range = FALSE;
2512#endif
2b9d42f0
NIS
2513 }
2514 else {
2515 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2516 }
02aa26ce
NT
2517 } /* while loop to process each character */
2518
2519 /* terminate the string and set up the sv */
79072805 2520 *d = '\0';
95a20fc0 2521 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2522 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2523 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2524
79072805 2525 SvPOK_on(sv);
9f4817db 2526 if (PL_encoding && !has_utf8) {
d0063567
DK
2527 sv_recode_to_utf8(sv, PL_encoding);
2528 if (SvUTF8(sv))
2529 has_utf8 = TRUE;
9f4817db 2530 }
2b9d42f0 2531 if (has_utf8) {
7e2040f0 2532 SvUTF8_on(sv);
2b9d42f0 2533 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2534 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2535 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2536 }
2537 }
79072805 2538
02aa26ce 2539 /* shrink the sv if we allocated more than we used */
79072805 2540 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2541 SvPV_shrink_to_cur(sv);
79072805 2542 }
02aa26ce 2543
6154021b 2544 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 2545 if (s > PL_bufptr) {
eb0d8d16
NC
2546 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2547 const char *const key = PL_lex_inpat ? "qr" : "q";
2548 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2549 const char *type;
2550 STRLEN typelen;
2551
2552 if (PL_lex_inwhat == OP_TRANS) {
2553 type = "tr";
2554 typelen = 2;
2555 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2556 type = "s";
2557 typelen = 1;
2558 } else {
2559 type = "qq";
2560 typelen = 2;
2561 }
2562
2563 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2564 type, typelen);
2565 }
6154021b 2566 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2567 } else
8990e307 2568 SvREFCNT_dec(sv);
79072805
LW
2569 return s;
2570}
2571
ffb4593c
NT
2572/* S_intuit_more
2573 * Returns TRUE if there's more to the expression (e.g., a subscript),
2574 * FALSE otherwise.
ffb4593c
NT
2575 *
2576 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2577 *
2578 * ->[ and ->{ return TRUE
2579 * { and [ outside a pattern are always subscripts, so return TRUE
2580 * if we're outside a pattern and it's not { or [, then return FALSE
2581 * if we're in a pattern and the first char is a {
2582 * {4,5} (any digits around the comma) returns FALSE
2583 * if we're in a pattern and the first char is a [
2584 * [] returns FALSE
2585 * [SOMETHING] has a funky algorithm to decide whether it's a
2586 * character class or not. It has to deal with things like
2587 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2588 * anything else returns TRUE
2589 */
2590
9cbb5ea2
GS
2591/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2592
76e3520e 2593STATIC int
cea2e8a9 2594S_intuit_more(pTHX_ register char *s)
79072805 2595{
97aff369 2596 dVAR;
7918f24d
NC
2597
2598 PERL_ARGS_ASSERT_INTUIT_MORE;
2599
3280af22 2600 if (PL_lex_brackets)
79072805
LW
2601 return TRUE;
2602 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2603 return TRUE;
2604 if (*s != '{' && *s != '[')
2605 return FALSE;
3280af22 2606 if (!PL_lex_inpat)
79072805
LW
2607 return TRUE;
2608
2609 /* In a pattern, so maybe we have {n,m}. */
2610 if (*s == '{') {
2611 s++;
2612 if (!isDIGIT(*s))
2613 return TRUE;
2614 while (isDIGIT(*s))
2615 s++;
2616 if (*s == ',')
2617 s++;
2618 while (isDIGIT(*s))
2619 s++;
2620 if (*s == '}')
2621 return FALSE;
2622 return TRUE;
2623
2624 }
2625
2626 /* On the other hand, maybe we have a character class */
2627
2628 s++;
2629 if (*s == ']' || *s == '^')
2630 return FALSE;
2631 else {
ffb4593c 2632 /* this is terrifying, and it works */
79072805
LW
2633 int weight = 2; /* let's weigh the evidence */
2634 char seen[256];
f27ffc4a 2635 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2636 const char * const send = strchr(s,']');
3280af22 2637 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2638
2639 if (!send) /* has to be an expression */
2640 return TRUE;
2641
2642 Zero(seen,256,char);
2643 if (*s == '$')
2644 weight -= 3;
2645 else if (isDIGIT(*s)) {
2646 if (s[1] != ']') {
2647 if (isDIGIT(s[1]) && s[2] == ']')
2648 weight -= 10;
2649 }
2650 else
2651 weight -= 100;
2652 }
2653 for (; s < send; s++) {
2654 last_un_char = un_char;
2655 un_char = (unsigned char)*s;
2656 switch (*s) {
2657 case '@':
2658 case '&':
2659 case '$':
2660 weight -= seen[un_char] * 10;
7e2040f0 2661 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2662 int len;
8903cb82 2663 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2664 len = (int)strlen(tmpbuf);
2665 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2666 weight -= 100;
2667 else
2668 weight -= 10;
2669 }
2670 else if (*s == '$' && s[1] &&
93a17b20
LW
2671 strchr("[#!%*<>()-=",s[1])) {
2672 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2673 weight -= 10;
2674 else
2675 weight -= 1;
2676 }
2677 break;
2678 case '\\':
2679 un_char = 254;
2680 if (s[1]) {
93a17b20 2681 if (strchr("wds]",s[1]))
79072805 2682 weight += 100;
10edeb5d 2683 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2684 weight += 1;
93a17b20 2685 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2686 weight += 40;
2687 else if (isDIGIT(s[1])) {
2688 weight += 40;
2689 while (s[1] && isDIGIT(s[1]))
2690 s++;
2691 }
2692 }
2693 else
2694 weight += 100;
2695 break;
2696 case '-':
2697 if (s[1] == '\\')
2698 weight += 50;
93a17b20 2699 if (strchr("aA01! ",last_un_char))
79072805 2700 weight += 30;
93a17b20 2701 if (strchr("zZ79~",s[1]))
79072805 2702 weight += 30;
f27ffc4a
GS
2703 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2704 weight -= 5; /* cope with negative subscript */
79072805
LW
2705 break;
2706 default:
3792a11b
NC
2707 if (!isALNUM(last_un_char)
2708 && !(last_un_char == '$' || last_un_char == '@'
2709 || last_un_char == '&')
2710 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2711 char *d = tmpbuf;
2712 while (isALPHA(*s))
2713 *d++ = *s++;
2714 *d = '\0';
5458a98a 2715 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
2716 weight -= 150;
2717 }
2718 if (un_char == last_un_char + 1)
2719 weight += 5;
2720 weight -= seen[un_char];
2721 break;
2722 }
2723 seen[un_char]++;
2724 }
2725 if (weight >= 0) /* probably a character class */
2726 return FALSE;
2727 }
2728
2729 return TRUE;
2730}
ffed7fef 2731
ffb4593c
NT
2732/*
2733 * S_intuit_method
2734 *
2735 * Does all the checking to disambiguate
2736 * foo bar
2737 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2738 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2739 *
2740 * First argument is the stuff after the first token, e.g. "bar".
2741 *
2742 * Not a method if bar is a filehandle.
2743 * Not a method if foo is a subroutine prototyped to take a filehandle.
2744 * Not a method if it's really "Foo $bar"
2745 * Method if it's "foo $bar"
2746 * Not a method if it's really "print foo $bar"
2747 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2748 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2749 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2750 * =>
2751 */
2752
76e3520e 2753STATIC int
62d55b22 2754S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2755{
97aff369 2756 dVAR;
a0d0e21e 2757 char *s = start + (*start == '$');
3280af22 2758 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2759 STRLEN len;
2760 GV* indirgv;
5db06880
NC
2761#ifdef PERL_MAD
2762 int soff;
2763#endif
a0d0e21e 2764
7918f24d
NC
2765 PERL_ARGS_ASSERT_INTUIT_METHOD;
2766
a0d0e21e 2767 if (gv) {
62d55b22 2768 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2769 return 0;
62d55b22
NC
2770 if (cv) {
2771 if (SvPOK(cv)) {
2772 const char *proto = SvPVX_const(cv);
2773 if (proto) {
2774 if (*proto == ';')
2775 proto++;
2776 if (*proto == '*')
2777 return 0;
2778 }
b6c543e3
IZ
2779 }
2780 } else
c35e046a 2781 gv = NULL;
a0d0e21e 2782 }
8903cb82 2783 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2784 /* start is the beginning of the possible filehandle/object,
2785 * and s is the end of it
2786 * tmpbuf is a copy of it
2787 */
2788
a0d0e21e 2789 if (*start == '$') {
3ef1310e
RGS
2790 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2791 isUPPER(*PL_tokenbuf))
a0d0e21e 2792 return 0;
5db06880
NC
2793#ifdef PERL_MAD
2794 len = start - SvPVX(PL_linestr);
2795#endif
29595ff2 2796 s = PEEKSPACE(s);
f0092767 2797#ifdef PERL_MAD
5db06880
NC
2798 start = SvPVX(PL_linestr) + len;
2799#endif
3280af22
NIS
2800 PL_bufptr = start;
2801 PL_expect = XREF;
a0d0e21e
LW
2802 return *s == '(' ? FUNCMETH : METHOD;
2803 }
5458a98a 2804 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
2805 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2806 len -= 2;
2807 tmpbuf[len] = '\0';
5db06880
NC
2808#ifdef PERL_MAD
2809 soff = s - SvPVX(PL_linestr);
2810#endif
c3e0f903
GS
2811 goto bare_package;
2812 }
90e5519e 2813 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2814 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2815 return 0;
2816 /* filehandle or package name makes it a method */
da51bb9b 2817 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
2818#ifdef PERL_MAD
2819 soff = s - SvPVX(PL_linestr);
2820#endif
29595ff2 2821 s = PEEKSPACE(s);
3280af22 2822 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2823 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2824 bare_package:
cd81e915 2825 start_force(PL_curforce);
9ded7720 2826 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2827 newSVpvn(tmpbuf,len));
9ded7720 2828 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2829 if (PL_madskills)
2830 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2831 PL_expect = XTERM;
a0d0e21e 2832 force_next(WORD);
3280af22 2833 PL_bufptr = s;
5db06880
NC
2834#ifdef PERL_MAD
2835 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2836#endif
a0d0e21e
LW
2837 return *s == '(' ? FUNCMETH : METHOD;
2838 }
2839 }
2840 return 0;
2841}
2842
16d20bd9 2843/* Encoded script support. filter_add() effectively inserts a
4e553d73 2844 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2845 * Note that the filter function only applies to the current source file
2846 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2847 *
2848 * The datasv parameter (which may be NULL) can be used to pass
2849 * private data to this instance of the filter. The filter function
2850 * can recover the SV using the FILTER_DATA macro and use it to
2851 * store private buffers and state information.
2852 *
2853 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2854 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2855 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2856 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2857 * private use must be set using malloc'd pointers.
2858 */
16d20bd9
AD
2859
2860SV *
864dbfa3 2861Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2862{
97aff369 2863 dVAR;
f4c556ac 2864 if (!funcp)
a0714e2c 2865 return NULL;
f4c556ac 2866
5486870f
DM
2867 if (!PL_parser)
2868 return NULL;
2869
3280af22
NIS
2870 if (!PL_rsfp_filters)
2871 PL_rsfp_filters = newAV();
16d20bd9 2872 if (!datasv)
561b68a9 2873 datasv = newSV(0);
862a34c6 2874 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2875 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2876 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2877 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2878 FPTR2DPTR(void *, IoANY(datasv)),
2879 SvPV_nolen(datasv)));
3280af22
NIS
2880 av_unshift(PL_rsfp_filters, 1);
2881 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2882 return(datasv);
2883}
4e553d73 2884
16d20bd9
AD
2885
2886/* Delete most recently added instance of this filter function. */
a0d0e21e 2887void
864dbfa3 2888Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2889{
97aff369 2890 dVAR;
e0c19803 2891 SV *datasv;
24801a4b 2892
7918f24d
NC
2893 PERL_ARGS_ASSERT_FILTER_DEL;
2894
33073adb 2895#ifdef DEBUGGING
55662e27
JH
2896 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2897 FPTR2DPTR(void*, funcp)));
33073adb 2898#endif
5486870f 2899 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2900 return;
2901 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2902 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2903 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2904 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2905 IoANY(datasv) = (void *)NULL;
3280af22 2906 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2907
16d20bd9
AD
2908 return;
2909 }
2910 /* we need to search for the correct entry and clear it */
cea2e8a9 2911 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2912}
2913
2914
1de9afcd
RGS
2915/* Invoke the idxth filter function for the current rsfp. */
2916/* maxlen 0 = read one text line */
16d20bd9 2917I32
864dbfa3 2918Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2919{
97aff369 2920 dVAR;
16d20bd9
AD
2921 filter_t funcp;
2922 SV *datasv = NULL;
f482118e
NC
2923 /* This API is bad. It should have been using unsigned int for maxlen.
2924 Not sure if we want to change the API, but if not we should sanity
2925 check the value here. */
39cd7a59
NC
2926 const unsigned int correct_length
2927 = maxlen < 0 ?
2928#ifdef PERL_MICRO
2929 0x7FFFFFFF
2930#else
2931 INT_MAX
2932#endif
2933 : maxlen;
e50aee73 2934
7918f24d
NC
2935 PERL_ARGS_ASSERT_FILTER_READ;
2936
5486870f 2937 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 2938 return -1;
1de9afcd 2939 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2940 /* Provide a default input filter to make life easy. */
2941 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2942 DEBUG_P(PerlIO_printf(Perl_debug_log,
2943 "filter_read %d: from rsfp\n", idx));
f482118e 2944 if (correct_length) {
16d20bd9
AD
2945 /* Want a block */
2946 int len ;
f54cb97a 2947 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2948
2949 /* ensure buf_sv is large enough */
f482118e
NC
2950 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2951 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2952 correct_length)) <= 0) {
3280af22 2953 if (PerlIO_error(PL_rsfp))
37120919
AD
2954 return -1; /* error */
2955 else
2956 return 0 ; /* end of file */
2957 }
16d20bd9
AD
2958 SvCUR_set(buf_sv, old_len + len) ;
2959 } else {
2960 /* Want a line */
3280af22
NIS
2961 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2962 if (PerlIO_error(PL_rsfp))
37120919
AD
2963 return -1; /* error */
2964 else
2965 return 0 ; /* end of file */
2966 }
16d20bd9
AD
2967 }
2968 return SvCUR(buf_sv);
2969 }
2970 /* Skip this filter slot if filter has been deleted */
1de9afcd 2971 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2972 DEBUG_P(PerlIO_printf(Perl_debug_log,
2973 "filter_read %d: skipped (filter deleted)\n",
2974 idx));
f482118e 2975 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2976 }
2977 /* Get function pointer hidden within datasv */
8141890a 2978 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2979 DEBUG_P(PerlIO_printf(Perl_debug_log,
2980 "filter_read %d: via function %p (%s)\n",
ca0270c4 2981 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2982 /* Call function. The function is expected to */
2983 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2984 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2985 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2986}
2987
76e3520e 2988STATIC char *
cea2e8a9 2989S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2990{
97aff369 2991 dVAR;
7918f24d
NC
2992
2993 PERL_ARGS_ASSERT_FILTER_GETS;
2994
c39cd008 2995#ifdef PERL_CR_FILTER
3280af22 2996 if (!PL_rsfp_filters) {
c39cd008 2997 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2998 }
2999#endif
3280af22 3000 if (PL_rsfp_filters) {
55497cff
PP
3001 if (!append)
3002 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
3003 if (FILTER_READ(0, sv, 0) > 0)
3004 return ( SvPVX(sv) ) ;
3005 else
bd61b366 3006 return NULL ;
16d20bd9 3007 }
9d116dd7 3008 else
fd049845 3009 return (sv_gets(sv, fp, append));
a0d0e21e
LW
3010}
3011
01ec43d0 3012STATIC HV *
9bde8eb0 3013S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 3014{
97aff369 3015 dVAR;
def3634b
GS
3016 GV *gv;
3017
7918f24d
NC
3018 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3019
01ec43d0 3020 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
3021 return PL_curstash;
3022
3023 if (len > 2 &&
3024 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3025 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3026 {
3027 return GvHV(gv); /* Foo:: */
def3634b
GS
3028 }
3029
3030 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3031 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3032 if (gv && GvCV(gv)) {
3033 SV * const sv = cv_const_sv(GvCV(gv));
3034 if (sv)
9bde8eb0 3035 pkgname = SvPV_const(sv, len);
def3634b
GS
3036 }
3037
9bde8eb0 3038 return gv_stashpvn(pkgname, len, 0);
def3634b 3039}
a0d0e21e 3040
e3f73d4e
RGS
3041/*
3042 * S_readpipe_override
3043 * Check whether readpipe() is overriden, and generates the appropriate
3044 * optree, provided sublex_start() is called afterwards.
3045 */
3046STATIC void
1d51329b 3047S_readpipe_override(pTHX)
e3f73d4e
RGS
3048{
3049 GV **gvp;
3050 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 3051 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
3052 if ((gv_readpipe
3053 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3054 ||
3055 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 3056 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
3057 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3058 {
3059 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3060 append_elem(OP_LIST,
3061 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3062 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3063 }
e3f73d4e
RGS
3064}
3065
5db06880
NC
3066#ifdef PERL_MAD
3067 /*
3068 * Perl_madlex
3069 * The intent of this yylex wrapper is to minimize the changes to the
3070 * tokener when we aren't interested in collecting madprops. It remains
3071 * to be seen how successful this strategy will be...
3072 */
3073
3074int
3075Perl_madlex(pTHX)
3076{
3077 int optype;
3078 char *s = PL_bufptr;
3079
cd81e915
NC
3080 /* make sure PL_thiswhite is initialized */
3081 PL_thiswhite = 0;
3082 PL_thismad = 0;
5db06880 3083
cd81e915 3084 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
3085 if (PL_pending_ident)
3086 return S_pending_ident(aTHX);
3087
3088 /* previous token ate up our whitespace? */
cd81e915
NC
3089 if (!PL_lasttoke && PL_nextwhite) {
3090 PL_thiswhite = PL_nextwhite;
3091 PL_nextwhite = 0;
5db06880
NC
3092 }
3093
3094 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3095 PL_realtokenstart = -1;
3096 PL_thistoken = 0;
5db06880
NC
3097 optype = yylex();
3098 s = PL_bufptr;
cd81e915 3099 assert(PL_curforce < 0);
5db06880 3100
cd81e915
NC
3101 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3102 if (!PL_thistoken) {
3103 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3104 PL_thistoken = newSVpvs("");
5db06880 3105 else {
c35e046a 3106 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3107 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3108 }
3109 }
cd81e915
NC
3110 if (PL_thismad) /* install head */
3111 CURMAD('X', PL_thistoken);
5db06880
NC
3112 }
3113
3114 /* last whitespace of a sublex? */
cd81e915
NC
3115 if (optype == ')' && PL_endwhite) {
3116 CURMAD('X', PL_endwhite);
5db06880
NC
3117 }
3118
cd81e915 3119 if (!PL_thismad) {
5db06880
NC
3120
3121 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3122 if (!PL_thiswhite && !PL_endwhite && !optype) {
3123 sv_free(PL_thistoken);
3124 PL_thistoken = 0;
5db06880
NC
3125 return 0;
3126 }
3127
3128 /* put off final whitespace till peg */
3129 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3130 PL_nextwhite = PL_thiswhite;
3131 PL_thiswhite = 0;
5db06880 3132 }
cd81e915
NC
3133 else if (PL_thisopen) {
3134 CURMAD('q', PL_thisopen);
3135 if (PL_thistoken)
3136 sv_free(PL_thistoken);
3137 PL_thistoken = 0;
5db06880
NC
3138 }
3139 else {
3140 /* Store actual token text as madprop X */
cd81e915 3141 CURMAD('X', PL_thistoken);
5db06880
NC
3142 }
3143
cd81e915 3144 if (PL_thiswhite) {
5db06880 3145 /* add preceding whitespace as madprop _ */
cd81e915 3146 CURMAD('_', PL_thiswhite);
5db06880
NC
3147 }
3148
cd81e915 3149 if (PL_thisstuff) {
5db06880 3150 /* add quoted material as madprop = */
cd81e915 3151 CURMAD('=', PL_thisstuff);
5db06880
NC
3152 }
3153
cd81e915 3154 if (PL_thisclose) {
5db06880 3155 /* add terminating quote as madprop Q */
cd81e915 3156 CURMAD('Q', PL_thisclose);
5db06880
NC
3157 }
3158 }
3159
3160 /* special processing based on optype */
3161
3162 switch (optype) {
3163
3164 /* opval doesn't need a TOKEN since it can already store mp */
3165 case WORD:
3166 case METHOD:
3167 case FUNCMETH:
3168 case THING:
3169 case PMFUNC:
3170 case PRIVATEREF:
3171 case FUNC0SUB:
3172 case UNIOPSUB:
3173 case LSTOPSUB:
6154021b
RGS
3174 if (pl_yylval.opval)
3175 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 3176 PL_thismad = 0;
5db06880
NC
3177 return optype;
3178
3179 /* fake EOF */
3180 case 0:
3181 optype = PEG;
cd81e915
NC
3182 if (PL_endwhite) {
3183 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3184 PL_endwhite = 0;
5db06880
NC
3185 }
3186 break;
3187
3188 case ']':
3189 case '}':
cd81e915 3190 if (PL_faketokens)
5db06880
NC
3191 break;
3192 /* remember any fake bracket that lexer is about to discard */
3193 if (PL_lex_brackets == 1 &&
3194 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3195 {
3196 s = PL_bufptr;
3197 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3198 s++;
3199 if (*s == '}') {
cd81e915
NC
3200 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3201 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3202 PL_thiswhite = 0;
5db06880
NC
3203 PL_bufptr = s - 1;
3204 break; /* don't bother looking for trailing comment */
3205 }
3206 else
3207 s = PL_bufptr;
3208 }
3209 if (optype == ']')
3210 break;
3211 /* FALLTHROUGH */
3212
3213 /* attach a trailing comment to its statement instead of next token */
3214 case ';':
cd81e915 3215 if (PL_faketokens)
5db06880
NC
3216 break;
3217 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3218 s = PL_bufptr;
3219 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3220 s++;
3221 if (*s == '\n' || *s == '#') {
3222 while (s < PL_bufend && *s != '\n')
3223 s++;
3224 if (s < PL_bufend)
3225 s++;
cd81e915
NC
3226 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3227 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3228 PL_thiswhite = 0;
5db06880
NC
3229 PL_bufptr = s;
3230 }
3231 }
3232 break;
3233
3234 /* pval */
3235 case LABEL:
3236 break;
3237
3238 /* ival */
3239 default:
3240 break;
3241
3242 }
3243
3244 /* Create new token struct. Note: opvals return early above. */
6154021b 3245 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 3246 PL_thismad = 0;
5db06880
NC
3247 return optype;
3248}
3249#endif
3250
468aa647 3251STATIC char *
cc6ed77d 3252S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3253 dVAR;
7918f24d
NC
3254
3255 PERL_ARGS_ASSERT_TOKENIZE_USE;
3256
468aa647
RGS
3257 if (PL_expect != XSTATE)
3258 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3259 is_use ? "use" : "no"));
29595ff2 3260 s = SKIPSPACE1(s);
468aa647
RGS
3261 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3262 s = force_version(s, TRUE);
29595ff2 3263 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3264 start_force(PL_curforce);
9ded7720 3265 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3266 force_next(WORD);
3267 }
3268 else if (*s == 'v') {
3269 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3270 s = force_version(s, FALSE);
3271 }
3272 }
3273 else {
3274 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3275 s = force_version(s, FALSE);
3276 }
6154021b 3277 pl_yylval.ival = is_use;
468aa647
RGS
3278 return s;
3279}
748a9306 3280#ifdef DEBUGGING
27da23d5 3281 static const char* const exp_name[] =
09bef843 3282 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3283 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3284 };
748a9306 3285#endif
463ee0b2 3286
02aa26ce
NT
3287/*
3288 yylex
3289
3290 Works out what to call the token just pulled out of the input
3291 stream. The yacc parser takes care of taking the ops we return and
3292 stitching them into a tree.
3293
3294 Returns:
3295 PRIVATEREF
3296
3297 Structure:
3298 if read an identifier
3299 if we're in a my declaration
3300 croak if they tried to say my($foo::bar)
3301 build the ops for a my() declaration
3302 if it's an access to a my() variable
3303 are we in a sort block?
3304 croak if my($a); $a <=> $b
3305 build ops for access to a my() variable
3306 if in a dq string, and they've said @foo and we can't find @foo
3307 croak
3308 build ops for a bareword
3309 if we already built the token before, use it.
3310*/
3311
20141f0e 3312
dba4d153
JH
3313#ifdef __SC__
3314#pragma segment Perl_yylex
3315#endif
dba4d153 3316int
dba4d153 3317Perl_yylex(pTHX)
20141f0e 3318{
97aff369 3319 dVAR;
3afc138a 3320 register char *s = PL_bufptr;
378cc40b 3321 register char *d;
463ee0b2 3322 STRLEN len;
aa7440fb 3323 bool bof = FALSE;
a687059c 3324
10edeb5d
JH
3325 /* orig_keyword, gvp, and gv are initialized here because
3326 * jump to the label just_a_word_zero can bypass their
3327 * initialization later. */
3328 I32 orig_keyword = 0;
3329 GV *gv = NULL;
3330 GV **gvp = NULL;
3331
bbf60fe6 3332 DEBUG_T( {
396482e1 3333 SV* tmp = newSVpvs("");
b6007c36
DM
3334 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3335 (IV)CopLINE(PL_curcop),
3336 lex_state_names[PL_lex_state],
3337 exp_name[PL_expect],
3338 pv_display(tmp, s, strlen(s), 0, 60));
3339 SvREFCNT_dec(tmp);
bbf60fe6 3340 } );
02aa26ce 3341 /* check if there's an identifier for us to look at */
ba979b31 3342 if (PL_pending_ident)
bbf60fe6 3343 return REPORT(S_pending_ident(aTHX));
bbce6d69 3344
02aa26ce
NT
3345 /* no identifier pending identification */
3346
3280af22 3347 switch (PL_lex_state) {
79072805
LW
3348#ifdef COMMENTARY
3349 case LEX_NORMAL: /* Some compilers will produce faster */
3350 case LEX_INTERPNORMAL: /* code if we comment these out. */
3351 break;
3352#endif
3353
09bef843 3354 /* when we've already built the next token, just pull it out of the queue */
79072805 3355 case LEX_KNOWNEXT:
5db06880
NC
3356#ifdef PERL_MAD
3357 PL_lasttoke--;
6154021b 3358 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 3359 if (PL_madskills) {
cd81e915 3360 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3361 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 3362 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 3363 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
3364 PL_thismad->mad_val = 0;
3365 mad_free(PL_thismad);
3366 PL_thismad = 0;
5db06880
NC
3367 }
3368 }
3369 if (!PL_lasttoke) {
3370 PL_lex_state = PL_lex_defer;
3371 PL_expect = PL_lex_expect;
3372 PL_lex_defer = LEX_NORMAL;
3373 if (!PL_nexttoke[PL_lasttoke].next_type)
3374 return yylex();
3375 }
3376#else
3280af22 3377 PL_nexttoke--;
6154021b 3378 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3379 if (!PL_nexttoke) {
3380 PL_lex_state = PL_lex_defer;
3381 PL_expect = PL_lex_expect;
3382 PL_lex_defer = LEX_NORMAL;
463ee0b2 3383 }
5db06880
NC
3384#endif
3385#ifdef PERL_MAD
3386 /* FIXME - can these be merged? */
3387 return(PL_nexttoke[PL_lasttoke].next_type);
3388#else
bbf60fe6 3389 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3390#endif
79072805 3391
02aa26ce 3392 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3393 when we get here, PL_bufptr is at the \
02aa26ce 3394 */
79072805
LW
3395 case LEX_INTERPCASEMOD:
3396#ifdef DEBUGGING
3280af22 3397 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3398 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3399#endif
02aa26ce 3400 /* handle \E or end of string */
3280af22 3401 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3402 /* if at a \E */
3280af22 3403 if (PL_lex_casemods) {
f54cb97a 3404 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3405 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3406
3792a11b
NC
3407 if (PL_bufptr != PL_bufend
3408 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3409 PL_bufptr += 2;
3410 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3411#ifdef PERL_MAD
3412 if (PL_madskills)
6b29d1f5 3413 PL_thistoken = newSVpvs("\\E");
5db06880 3414#endif
a0d0e21e 3415 }
bbf60fe6 3416 return REPORT(')');
79072805 3417 }
5db06880
NC
3418#ifdef PERL_MAD
3419 while (PL_bufptr != PL_bufend &&
3420 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3421 if (!PL_thiswhite)
6b29d1f5 3422 PL_thiswhite = newSVpvs("");
cd81e915 3423 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3424 PL_bufptr += 2;
3425 }
3426#else
3280af22
NIS
3427 if (PL_bufptr != PL_bufend)
3428 PL_bufptr += 2;
5db06880 3429#endif
3280af22 3430 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3431 return yylex();
79072805
LW
3432 }
3433 else {
607df283 3434 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3435 "### Saw case modifier\n"); });
3280af22 3436 s = PL_bufptr + 1;
6e909404 3437 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3438#ifdef PERL_MAD
cd81e915 3439 if (!PL_thiswhite)
6b29d1f5 3440 PL_thiswhite = newSVpvs("");
cd81e915 3441 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3442#endif
89122651 3443 PL_bufptr = s + 3;
6e909404
JH
3444 PL_lex_state = LEX_INTERPCONCAT;
3445 return yylex();
a0d0e21e 3446 }
6e909404 3447 else {
90771dc0 3448 I32 tmp;
5db06880
NC
3449 if (!PL_madskills) /* when just compiling don't need correct */
3450 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3451 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3452 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3453 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3454 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3455 return REPORT(')');
6e909404
JH
3456 }
3457 if (PL_lex_casemods > 10)
3458 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3459 PL_lex_casestack[PL_lex_casemods++] = *s;
3460 PL_lex_casestack[PL_lex_casemods] = '\0';
3461 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3462 start_force(PL_curforce);
9ded7720 3463 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3464 force_next('(');
cd81e915 3465 start_force(PL_curforce);
6e909404 3466 if (*s == 'l')
9ded7720 3467 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3468 else if (*s == 'u')
9ded7720 3469 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3470 else if (*s == 'L')
9ded7720 3471 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3472 else if (*s == 'U')
9ded7720 3473 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3474 else if (*s == 'Q')
9ded7720 3475 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3476 else
3477 Perl_croak(aTHX_ "panic: yylex");
5db06880 3478 if (PL_madskills) {
a5849ce5
NC
3479 SV* const tmpsv = newSVpvs("\\ ");
3480 /* replace the space with the character we want to escape
3481 */
3482 SvPVX(tmpsv)[1] = *s;
5db06880
NC
3483 curmad('_', tmpsv);
3484 }
6e909404 3485 PL_bufptr = s + 1;
a0d0e21e 3486 }
79072805 3487 force_next(FUNC);
3280af22
NIS
3488 if (PL_lex_starts) {
3489 s = PL_bufptr;
3490 PL_lex_starts = 0;
5db06880
NC
3491#ifdef PERL_MAD
3492 if (PL_madskills) {
cd81e915
NC
3493 if (PL_thistoken)
3494 sv_free(PL_thistoken);
6b29d1f5 3495 PL_thistoken = newSVpvs("");
5db06880
NC
3496 }
3497#endif
131b3ad0
DM
3498 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3499 if (PL_lex_casemods == 1 && PL_lex_inpat)
3500 OPERATOR(',');
3501 else
3502 Aop(OP_CONCAT);
79072805
LW
3503 }
3504 else
cea2e8a9 3505 return yylex();
79072805
LW
3506 }
3507
55497cff 3508 case LEX_INTERPPUSH:
bbf60fe6 3509 return REPORT(sublex_push());
55497cff 3510
79072805 3511 case LEX_INTERPSTART:
3280af22 3512 if (PL_bufptr == PL_bufend)
bbf60fe6 3513 return REPORT(sublex_done());
607df283 3514 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3515 "### Interpolated variable\n"); });
3280af22
NIS
3516 PL_expect = XTERM;
3517 PL_lex_dojoin = (*PL_bufptr == '@');
3518 PL_lex_state = LEX_INTERPNORMAL;
3519 if (PL_lex_dojoin) {
cd81e915 3520 start_force(PL_curforce);
9ded7720 3521 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3522 force_next(',');
cd81e915 3523 start_force(PL_curforce);
a0d0e21e 3524 force_ident("\"", '$');
cd81e915 3525 start_force(PL_curforce);
9ded7720 3526 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3527 force_next('$');
cd81e915 3528 start_force(PL_curforce);
9ded7720 3529 NEXTVAL_NEXTTOKE.ival = 0;
79072805