This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
make add-package git friendly and fix bugs (take 2)
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
378cc40b 24#include "EXTERN.h"
864dbfa3 25#define PERL_IN_TOKE_C
378cc40b 26#include "perl.h"
378cc40b 27
eb0d8d16
NC
28#define new_constant(a,b,c,d,e,f,g) \
29 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
30
6154021b 31#define pl_yylval (PL_parser->yylval)
d3b6f988 32
acdf0a21
DM
33/* YYINITDEPTH -- initial size of the parser's stacks. */
34#define YYINITDEPTH 200
35
199e78b7
DM
36/* XXX temporary backwards compatibility */
37#define PL_lex_brackets (PL_parser->lex_brackets)
38#define PL_lex_brackstack (PL_parser->lex_brackstack)
39#define PL_lex_casemods (PL_parser->lex_casemods)
40#define PL_lex_casestack (PL_parser->lex_casestack)
41#define PL_lex_defer (PL_parser->lex_defer)
42#define PL_lex_dojoin (PL_parser->lex_dojoin)
43#define PL_lex_expect (PL_parser->lex_expect)
44#define PL_lex_formbrack (PL_parser->lex_formbrack)
45#define PL_lex_inpat (PL_parser->lex_inpat)
46#define PL_lex_inwhat (PL_parser->lex_inwhat)
47#define PL_lex_op (PL_parser->lex_op)
48#define PL_lex_repl (PL_parser->lex_repl)
49#define PL_lex_starts (PL_parser->lex_starts)
50#define PL_lex_stuff (PL_parser->lex_stuff)
51#define PL_multi_start (PL_parser->multi_start)
52#define PL_multi_open (PL_parser->multi_open)
53#define PL_multi_close (PL_parser->multi_close)
54#define PL_pending_ident (PL_parser->pending_ident)
55#define PL_preambled (PL_parser->preambled)
56#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 57#define PL_linestr (PL_parser->linestr)
c2598295
DM
58#define PL_expect (PL_parser->expect)
59#define PL_copline (PL_parser->copline)
f06b5848
DM
60#define PL_bufptr (PL_parser->bufptr)
61#define PL_oldbufptr (PL_parser->oldbufptr)
62#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
63#define PL_linestart (PL_parser->linestart)
64#define PL_bufend (PL_parser->bufend)
65#define PL_last_uni (PL_parser->last_uni)
66#define PL_last_lop (PL_parser->last_lop)
67#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 68#define PL_lex_state (PL_parser->lex_state)
2f9285f8 69#define PL_rsfp (PL_parser->rsfp)
5486870f 70#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
71#define PL_in_my (PL_parser->in_my)
72#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 73#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 74#define PL_multi_end (PL_parser->multi_end)
13765c85 75#define PL_error_count (PL_parser->error_count)
199e78b7
DM
76
77#ifdef PERL_MAD
78# define PL_endwhite (PL_parser->endwhite)
79# define PL_faketokens (PL_parser->faketokens)
80# define PL_lasttoke (PL_parser->lasttoke)
81# define PL_nextwhite (PL_parser->nextwhite)
82# define PL_realtokenstart (PL_parser->realtokenstart)
83# define PL_skipwhite (PL_parser->skipwhite)
84# define PL_thisclose (PL_parser->thisclose)
85# define PL_thismad (PL_parser->thismad)
86# define PL_thisopen (PL_parser->thisopen)
87# define PL_thisstuff (PL_parser->thisstuff)
88# define PL_thistoken (PL_parser->thistoken)
89# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
90# define PL_thiswhite (PL_parser->thiswhite)
91# define PL_nexttoke (PL_parser->nexttoke)
92# define PL_curforce (PL_parser->curforce)
93#else
94# define PL_nexttoke (PL_parser->nexttoke)
95# define PL_nexttype (PL_parser->nexttype)
96# define PL_nextval (PL_parser->nextval)
199e78b7
DM
97#endif
98
3cbf51f5
DM
99static int
100S_pending_ident(pTHX);
199e78b7 101
0bd48802 102static const char ident_too_long[] = "Identifier too long";
c445ea15 103static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 104
6e3aabd6 105#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
106static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
107static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 108#endif
51371543 109
29595ff2 110#ifdef PERL_MAD
29595ff2 111# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 112# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 113#else
5db06880 114# define CURMAD(slot,sv)
9ded7720 115# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
116#endif
117
9059aa12
LW
118#define XFAKEBRACK 128
119#define XENUMMASK 127
120
39e02b42
JH
121#ifdef USE_UTF8_SCRIPTS
122# define UTF (!IN_BYTES)
2b9d42f0 123#else
746b446a 124# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 125#endif
a0ed51b3 126
61f0cdd9 127/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
128 * 1999-02-27 mjd-perl-patch@plover.com */
129#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
130
bf4acbe4
GS
131/* On MacOS, respect nonbreaking spaces */
132#ifdef MACOS_TRADITIONAL
133#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
134#else
135#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
136#endif
137
ffb4593c
NT
138/* LEX_* are values for PL_lex_state, the state of the lexer.
139 * They are arranged oddly so that the guard on the switch statement
79072805
LW
140 * can get by with a single comparison (if the compiler is smart enough).
141 */
142
fb73857a
PP
143/* #define LEX_NOTPARSING 11 is done in perl.h. */
144
b6007c36
DM
145#define LEX_NORMAL 10 /* normal code (ie not within "...") */
146#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
147#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
148#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
149#define LEX_INTERPSTART 6 /* expecting the start of a $var */
150
151 /* at end of code, eg "$x" followed by: */
152#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
153#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
154
155#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
156 string or after \E, $foo, etc */
157#define LEX_INTERPCONST 2 /* NOT USED */
158#define LEX_FORMLINE 1 /* expecting a format line */
159#define LEX_KNOWNEXT 0 /* next token known; just return it */
160
79072805 161
bbf60fe6 162#ifdef DEBUGGING
27da23d5 163static const char* const lex_state_names[] = {
bbf60fe6
DM
164 "KNOWNEXT",
165 "FORMLINE",
166 "INTERPCONST",
167 "INTERPCONCAT",
168 "INTERPENDMAYBE",
169 "INTERPEND",
170 "INTERPSTART",
171 "INTERPPUSH",
172 "INTERPCASEMOD",
173 "INTERPNORMAL",
174 "NORMAL"
175};
176#endif
177
79072805
LW
178#ifdef ff_next
179#undef ff_next
d48672a2
LW
180#endif
181
79072805 182#include "keywords.h"
fe14fcc3 183
ffb4593c
NT
184/* CLINE is a macro that ensures PL_copline has a sane value */
185
ae986130
LW
186#ifdef CLINE
187#undef CLINE
188#endif
57843af0 189#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 190
5db06880 191#ifdef PERL_MAD
29595ff2
NC
192# define SKIPSPACE0(s) skipspace0(s)
193# define SKIPSPACE1(s) skipspace1(s)
194# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
195# define PEEKSPACE(s) skipspace2(s,0)
196#else
197# define SKIPSPACE0(s) skipspace(s)
198# define SKIPSPACE1(s) skipspace(s)
199# define SKIPSPACE2(s,tsv) skipspace(s)
200# define PEEKSPACE(s) skipspace(s)
201#endif
202
ffb4593c
NT
203/*
204 * Convenience functions to return different tokens and prime the
9cbb5ea2 205 * lexer for the next token. They all take an argument.
ffb4593c
NT
206 *
207 * TOKEN : generic token (used for '(', DOLSHARP, etc)
208 * OPERATOR : generic operator
209 * AOPERATOR : assignment operator
210 * PREBLOCK : beginning the block after an if, while, foreach, ...
211 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
212 * PREREF : *EXPR where EXPR is not a simple identifier
213 * TERM : expression term
214 * LOOPX : loop exiting command (goto, last, dump, etc)
215 * FTST : file test operator
216 * FUN0 : zero-argument function
2d2e263d 217 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
218 * BOop : bitwise or or xor
219 * BAop : bitwise and
220 * SHop : shift operator
221 * PWop : power operator
9cbb5ea2 222 * PMop : pattern-matching operator
ffb4593c
NT
223 * Aop : addition-level operator
224 * Mop : multiplication-level operator
225 * Eop : equality-testing operator
e5edeb50 226 * Rop : relational operator <= != gt
ffb4593c
NT
227 *
228 * Also see LOP and lop() below.
229 */
230
998054bd 231#ifdef DEBUGGING /* Serve -DT. */
704d4215 232# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 233#else
bbf60fe6 234# define REPORT(retval) (retval)
998054bd
SC
235#endif
236
bbf60fe6
DM
237#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
238#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
239#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
240#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
241#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
242#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
243#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
244#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
245#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
246#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
247#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
248#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
249#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
250#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
251#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
252#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
253#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
254#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
255#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
256#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 257
a687059c
LW
258/* This bit of chicanery makes a unary function followed by
259 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
260 * The UNIDOR macro is for unary functions that can be followed by the //
261 * operator (such as C<shift // 0>).
a687059c 262 */
376fcdbf 263#define UNI2(f,x) { \
6154021b 264 pl_yylval.ival = f; \
376fcdbf
AL
265 PL_expect = x; \
266 PL_bufptr = s; \
267 PL_last_uni = PL_oldbufptr; \
268 PL_last_lop_op = f; \
269 if (*s == '(') \
270 return REPORT( (int)FUNC1 ); \
29595ff2 271 s = PEEKSPACE(s); \
376fcdbf
AL
272 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
273 }
6f33ba73
RGS
274#define UNI(f) UNI2(f,XTERM)
275#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 276
376fcdbf 277#define UNIBRACK(f) { \
6154021b 278 pl_yylval.ival = f; \
376fcdbf
AL
279 PL_bufptr = s; \
280 PL_last_uni = PL_oldbufptr; \
281 if (*s == '(') \
282 return REPORT( (int)FUNC1 ); \
29595ff2 283 s = PEEKSPACE(s); \
376fcdbf
AL
284 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
285 }
79072805 286
9f68db38 287/* grandfather return to old style */
6154021b 288#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 289
8fa7f367
JH
290#ifdef DEBUGGING
291
6154021b 292/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
293enum token_type {
294 TOKENTYPE_NONE,
295 TOKENTYPE_IVAL,
6154021b 296 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
297 TOKENTYPE_PVAL,
298 TOKENTYPE_OPVAL,
299 TOKENTYPE_GVVAL
300};
301
6d4a66ac
NC
302static struct debug_tokens {
303 const int token;
304 enum token_type type;
305 const char *name;
306} const debug_tokens[] =
9041c2e3 307{
bbf60fe6
DM
308 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
309 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
310 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
311 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
312 { ARROW, TOKENTYPE_NONE, "ARROW" },
313 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
314 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
315 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
316 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
317 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 318 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
319 { DO, TOKENTYPE_NONE, "DO" },
320 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
321 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
322 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
323 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
324 { ELSE, TOKENTYPE_NONE, "ELSE" },
325 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
326 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
327 { FOR, TOKENTYPE_IVAL, "FOR" },
328 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
329 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
330 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
331 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
332 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
333 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 334 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
335 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
336 { IF, TOKENTYPE_IVAL, "IF" },
337 { LABEL, TOKENTYPE_PVAL, "LABEL" },
338 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
339 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
340 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
341 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
342 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
343 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
344 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
345 { MY, TOKENTYPE_IVAL, "MY" },
346 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
347 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
348 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
349 { OROP, TOKENTYPE_IVAL, "OROP" },
350 { OROR, TOKENTYPE_NONE, "OROR" },
351 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
352 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
353 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
354 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
355 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
356 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
357 { PREINC, TOKENTYPE_NONE, "PREINC" },
358 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
359 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
360 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
361 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
362 { SUB, TOKENTYPE_NONE, "SUB" },
363 { THING, TOKENTYPE_OPVAL, "THING" },
364 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
365 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
366 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
367 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
368 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
369 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 370 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
371 { WHILE, TOKENTYPE_IVAL, "WHILE" },
372 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 373 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 374 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
375};
376
6154021b 377/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 378
bbf60fe6 379STATIC int
704d4215 380S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 381{
97aff369 382 dVAR;
7918f24d
NC
383
384 PERL_ARGS_ASSERT_TOKEREPORT;
385
bbf60fe6 386 if (DEBUG_T_TEST) {
bd61b366 387 const char *name = NULL;
bbf60fe6 388 enum token_type type = TOKENTYPE_NONE;
f54cb97a 389 const struct debug_tokens *p;
396482e1 390 SV* const report = newSVpvs("<== ");
bbf60fe6 391
f54cb97a 392 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
393 if (p->token == (int)rv) {
394 name = p->name;
395 type = p->type;
396 break;
397 }
398 }
399 if (name)
54667de8 400 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
401 else if ((char)rv > ' ' && (char)rv < '~')
402 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
403 else if (!rv)
396482e1 404 sv_catpvs(report, "EOF");
bbf60fe6
DM
405 else
406 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
407 switch (type) {
408 case TOKENTYPE_NONE:
409 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
410 break;
411 case TOKENTYPE_IVAL:
704d4215 412 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
413 break;
414 case TOKENTYPE_OPNUM:
415 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 416 PL_op_name[lvalp->ival]);
bbf60fe6
DM
417 break;
418 case TOKENTYPE_PVAL:
704d4215 419 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
420 break;
421 case TOKENTYPE_OPVAL:
704d4215 422 if (lvalp->opval) {
401441c0 423 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
424 PL_op_name[lvalp->opval->op_type]);
425 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 426 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 427 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
428 }
429
430 }
401441c0 431 else
396482e1 432 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
433 break;
434 }
b6007c36 435 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
436 };
437 return (int)rv;
998054bd
SC
438}
439
b6007c36
DM
440
441/* print the buffer with suitable escapes */
442
443STATIC void
15f169a1 444S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 445{
396482e1 446 SV* const tmp = newSVpvs("");
7918f24d
NC
447
448 PERL_ARGS_ASSERT_PRINTBUF;
449
b6007c36
DM
450 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
451 SvREFCNT_dec(tmp);
452}
453
8fa7f367
JH
454#endif
455
ffb4593c
NT
456/*
457 * S_ao
458 *
c963b151
BD
459 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
460 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
461 */
462
76e3520e 463STATIC int
cea2e8a9 464S_ao(pTHX_ int toketype)
a0d0e21e 465{
97aff369 466 dVAR;
3280af22
NIS
467 if (*PL_bufptr == '=') {
468 PL_bufptr++;
a0d0e21e 469 if (toketype == ANDAND)
6154021b 470 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 471 else if (toketype == OROR)
6154021b 472 pl_yylval.ival = OP_ORASSIGN;
c963b151 473 else if (toketype == DORDOR)
6154021b 474 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
475 toketype = ASSIGNOP;
476 }
477 return toketype;
478}
479
ffb4593c
NT
480/*
481 * S_no_op
482 * When Perl expects an operator and finds something else, no_op
483 * prints the warning. It always prints "<something> found where
484 * operator expected. It prints "Missing semicolon on previous line?"
485 * if the surprise occurs at the start of the line. "do you need to
486 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
487 * where the compiler doesn't know if foo is a method call or a function.
488 * It prints "Missing operator before end of line" if there's nothing
489 * after the missing operator, or "... before <...>" if there is something
490 * after the missing operator.
491 */
492
76e3520e 493STATIC void
15f169a1 494S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 495{
97aff369 496 dVAR;
9d4ba2ae
AL
497 char * const oldbp = PL_bufptr;
498 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 499
7918f24d
NC
500 PERL_ARGS_ASSERT_NO_OP;
501
1189a94a
GS
502 if (!s)
503 s = oldbp;
07c798fb 504 else
1189a94a 505 PL_bufptr = s;
cea2e8a9 506 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
507 if (ckWARN_d(WARN_SYNTAX)) {
508 if (is_first)
509 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
510 "\t(Missing semicolon on previous line?)\n");
511 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 512 const char *t;
c35e046a
AL
513 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
514 NOOP;
56da5a46
RGS
515 if (t < PL_bufptr && isSPACE(*t))
516 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
517 "\t(Do you need to predeclare %.*s?)\n",
551405c4 518 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
519 }
520 else {
521 assert(s >= oldbp);
522 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 523 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 524 }
07c798fb 525 }
3280af22 526 PL_bufptr = oldbp;
8990e307
LW
527}
528
ffb4593c
NT
529/*
530 * S_missingterm
531 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 532 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
533 * If we're in a delimited string and the delimiter is a control
534 * character, it's reformatted into a two-char sequence like ^C.
535 * This is fatal.
536 */
537
76e3520e 538STATIC void
cea2e8a9 539S_missingterm(pTHX_ char *s)
8990e307 540{
97aff369 541 dVAR;
8990e307
LW
542 char tmpbuf[3];
543 char q;
544 if (s) {
9d4ba2ae 545 char * const nl = strrchr(s,'\n');
d2719217 546 if (nl)
8990e307
LW
547 *nl = '\0';
548 }
463559e7 549 else if (isCNTRL(PL_multi_close)) {
8990e307 550 *tmpbuf = '^';
585ec06d 551 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
552 tmpbuf[2] = '\0';
553 s = tmpbuf;
554 }
555 else {
eb160463 556 *tmpbuf = (char)PL_multi_close;
8990e307
LW
557 tmpbuf[1] = '\0';
558 s = tmpbuf;
559 }
560 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 561 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 562}
79072805 563
ef89dcc3 564#define FEATURE_IS_ENABLED(name) \
0d863452 565 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 566 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b
NC
567/* The longest string we pass in. */
568#define MAX_FEATURE_LEN (sizeof("switch")-1)
569
0d863452
RH
570/*
571 * S_feature_is_enabled
572 * Check whether the named feature is enabled.
573 */
574STATIC bool
15f169a1 575S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 576{
97aff369 577 dVAR;
0d863452 578 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 579 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
580
581 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
582
4a731d7b
NC
583 assert(namelen <= MAX_FEATURE_LEN);
584 memcpy(&he_name[8], name, namelen);
d4c19fe8 585
7b9ef140 586 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
587}
588
ffb4593c
NT
589/*
590 * Perl_deprecate
ffb4593c
NT
591 */
592
79072805 593void
15f169a1 594Perl_deprecate(pTHX_ const char *const s)
a0d0e21e 595{
7918f24d
NC
596 PERL_ARGS_ASSERT_DEPRECATE;
597
599cee73 598 if (ckWARN(WARN_DEPRECATED))
9014280d 599 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
600}
601
12bcd1a6 602void
15f169a1 603Perl_deprecate_old(pTHX_ const char *const s)
12bcd1a6
PM
604{
605 /* This function should NOT be called for any new deprecated warnings */
606 /* Use Perl_deprecate instead */
607 /* */
608 /* It is here to maintain backward compatibility with the pre-5.8 */
609 /* warnings category hierarchy. The "deprecated" category used to */
610 /* live under the "syntax" category. It is now a top-level category */
611 /* in its own right. */
612
7918f24d
NC
613 PERL_ARGS_ASSERT_DEPRECATE_OLD;
614
12bcd1a6 615 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 616 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
617 "Use of %s is deprecated", s);
618}
619
ffb4593c 620/*
9cbb5ea2
GS
621 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
622 * utf16-to-utf8-reversed.
ffb4593c
NT
623 */
624
c39cd008
GS
625#ifdef PERL_CR_FILTER
626static void
627strip_return(SV *sv)
628{
95a20fc0 629 register const char *s = SvPVX_const(sv);
9d4ba2ae 630 register const char * const e = s + SvCUR(sv);
7918f24d
NC
631
632 PERL_ARGS_ASSERT_STRIP_RETURN;
633
c39cd008
GS
634 /* outer loop optimized to do nothing if there are no CR-LFs */
635 while (s < e) {
636 if (*s++ == '\r' && *s == '\n') {
637 /* hit a CR-LF, need to copy the rest */
638 register char *d = s - 1;
639 *d++ = *s++;
640 while (s < e) {
641 if (*s == '\r' && s[1] == '\n')
642 s++;
643 *d++ = *s++;
644 }
645 SvCUR(sv) -= s - d;
646 return;
647 }
648 }
649}
a868473f 650
76e3520e 651STATIC I32
c39cd008 652S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 653{
f54cb97a 654 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
655 if (count > 0 && !maxlen)
656 strip_return(sv);
657 return count;
a868473f
NIS
658}
659#endif
660
199e78b7
DM
661
662
ffb4593c
NT
663/*
664 * Perl_lex_start
5486870f 665 *
e3abe207 666 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
667 *
668 * rsfp is the opened file handle to read from (if any),
669 *
670 * line holds any initial content already read from the file (or in
671 * the case of no file, such as an eval, the whole contents);
672 *
673 * new_filter indicates that this is a new file and it shouldn't inherit
674 * the filters from the current parser (ie require).
ffb4593c
NT
675 */
676
a0d0e21e 677void
5486870f 678Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 679{
97aff369 680 dVAR;
6ef55633 681 const char *s = NULL;
8990e307 682 STRLEN len;
5486870f 683 yy_parser *parser, *oparser;
acdf0a21
DM
684
685 /* create and initialise a parser */
686
199e78b7 687 Newxz(parser, 1, yy_parser);
5486870f 688 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
689 PL_parser = parser;
690
691 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
692 parser->ps = parser->stack;
693 parser->stack_size = YYINITDEPTH;
694
695 parser->stack->state = 0;
696 parser->yyerrstatus = 0;
697 parser->yychar = YYEMPTY; /* Cause a token to be read. */
698
e3abe207
DM
699 /* on scope exit, free this parser and restore any outer one */
700 SAVEPARSER(parser);
7c4baf47 701 parser->saved_curcop = PL_curcop;
e3abe207 702
acdf0a21 703 /* initialise lexer state */
8990e307 704
fb205e7a
DM
705#ifdef PERL_MAD
706 parser->curforce = -1;
707#else
708 parser->nexttoke = 0;
709#endif
ca4cfd28 710 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 711 parser->copline = NOLINE;
5afb0a62 712 parser->lex_state = LEX_NORMAL;
c2598295 713 parser->expect = XSTATE;
2f9285f8 714 parser->rsfp = rsfp;
56b27c9a 715 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
502c6561 716 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 717
199e78b7
DM
718 Newx(parser->lex_brackstack, 120, char);
719 Newx(parser->lex_casestack, 12, char);
720 *parser->lex_casestack = '\0';
02b34bbe 721
10efb74f
NC
722 if (line) {
723 s = SvPV_const(line, len);
724 } else {
725 len = 0;
726 }
bdc0bf6f 727
10efb74f 728 if (!len) {
bdc0bf6f 729 parser->linestr = newSVpvs("\n;");
10efb74f 730 } else if (SvREADONLY(line) || s[len-1] != ';') {
bdc0bf6f 731 parser->linestr = newSVsv(line);
10efb74f 732 if (s[len-1] != ';')
bdc0bf6f 733 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
734 } else {
735 SvTEMP_off(line);
736 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 737 parser->linestr = line;
8990e307 738 }
f06b5848
DM
739 parser->oldoldbufptr =
740 parser->oldbufptr =
741 parser->bufptr =
742 parser->linestart = SvPVX(parser->linestr);
743 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
744 parser->last_lop = parser->last_uni = NULL;
79072805 745}
a687059c 746
e3abe207
DM
747
748/* delete a parser object */
749
750void
751Perl_parser_free(pTHX_ const yy_parser *parser)
752{
7918f24d
NC
753 PERL_ARGS_ASSERT_PARSER_FREE;
754
7c4baf47 755 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
756 SvREFCNT_dec(parser->linestr);
757
2f9285f8
DM
758 if (parser->rsfp == PerlIO_stdin())
759 PerlIO_clearerr(parser->rsfp);
799361c3
SH
760 else if (parser->rsfp && (!parser->old_parser ||
761 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 762 PerlIO_close(parser->rsfp);
5486870f 763 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 764
e3abe207
DM
765 Safefree(parser->stack);
766 Safefree(parser->lex_brackstack);
767 Safefree(parser->lex_casestack);
768 PL_parser = parser->old_parser;
769 Safefree(parser);
770}
771
772
ffb4593c
NT
773/*
774 * Perl_lex_end
9cbb5ea2
GS
775 * Finalizer for lexing operations. Must be called when the parser is
776 * done with the lexer.
ffb4593c
NT
777 */
778
463ee0b2 779void
864dbfa3 780Perl_lex_end(pTHX)
463ee0b2 781{
97aff369 782 dVAR;
3280af22 783 PL_doextract = FALSE;
463ee0b2
LW
784}
785
ffb4593c
NT
786/*
787 * S_incline
788 * This subroutine has nothing to do with tilting, whether at windmills
789 * or pinball tables. Its name is short for "increment line". It
57843af0 790 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 791 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
792 * # line 500 "foo.pm"
793 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
794 */
795
76e3520e 796STATIC void
d9095cec 797S_incline(pTHX_ const char *s)
463ee0b2 798{
97aff369 799 dVAR;
d9095cec
NC
800 const char *t;
801 const char *n;
802 const char *e;
463ee0b2 803
7918f24d
NC
804 PERL_ARGS_ASSERT_INCLINE;
805
57843af0 806 CopLINE_inc(PL_curcop);
463ee0b2
LW
807 if (*s++ != '#')
808 return;
d4c19fe8
AL
809 while (SPACE_OR_TAB(*s))
810 s++;
73659bf1
GS
811 if (strnEQ(s, "line", 4))
812 s += 4;
813 else
814 return;
084592ab 815 if (SPACE_OR_TAB(*s))
73659bf1 816 s++;
4e553d73 817 else
73659bf1 818 return;
d4c19fe8
AL
819 while (SPACE_OR_TAB(*s))
820 s++;
463ee0b2
LW
821 if (!isDIGIT(*s))
822 return;
d4c19fe8 823
463ee0b2
LW
824 n = s;
825 while (isDIGIT(*s))
826 s++;
07714eb4 827 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 828 return;
bf4acbe4 829 while (SPACE_OR_TAB(*s))
463ee0b2 830 s++;
73659bf1 831 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 832 s++;
73659bf1
GS
833 e = t + 1;
834 }
463ee0b2 835 else {
c35e046a
AL
836 t = s;
837 while (!isSPACE(*t))
838 t++;
73659bf1 839 e = t;
463ee0b2 840 }
bf4acbe4 841 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
842 e++;
843 if (*e != '\n' && *e != '\0')
844 return; /* false alarm */
845
f4dd75d9 846 if (t - s > 0) {
d9095cec 847 const STRLEN len = t - s;
8a5ee598 848#ifndef USE_ITHREADS
19bad673
NC
849 SV *const temp_sv = CopFILESV(PL_curcop);
850 const char *cf;
851 STRLEN tmplen;
852
853 if (temp_sv) {
854 cf = SvPVX(temp_sv);
855 tmplen = SvCUR(temp_sv);
856 } else {
857 cf = NULL;
858 tmplen = 0;
859 }
860
42d9b98d 861 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
862 /* must copy *{"::_<(eval N)[oldfilename:L]"}
863 * to *{"::_<newfilename"} */
44867030
NC
864 /* However, the long form of evals is only turned on by the
865 debugger - usually they're "(eval %lu)" */
866 char smallbuf[128];
867 char *tmpbuf;
868 GV **gvp;
d9095cec 869 STRLEN tmplen2 = len;
798b63bc 870 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
871 tmpbuf = smallbuf;
872 else
2ae0db35 873 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
874 tmpbuf[0] = '_';
875 tmpbuf[1] = '<';
2ae0db35 876 memcpy(tmpbuf + 2, cf, tmplen);
44867030 877 tmplen += 2;
8a5ee598
RGS
878 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
879 if (gvp) {
44867030
NC
880 char *tmpbuf2;
881 GV *gv2;
882
883 if (tmplen2 + 2 <= sizeof smallbuf)
884 tmpbuf2 = smallbuf;
885 else
886 Newx(tmpbuf2, tmplen2 + 2, char);
887
888 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
889 /* Either they malloc'd it, or we malloc'd it,
890 so no prefix is present in ours. */
891 tmpbuf2[0] = '_';
892 tmpbuf2[1] = '<';
893 }
894
895 memcpy(tmpbuf2 + 2, s, tmplen2);
896 tmplen2 += 2;
897
8a5ee598 898 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 899 if (!isGV(gv2)) {
8a5ee598 900 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
901 /* adjust ${"::_<newfilename"} to store the new file name */
902 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
903 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
904 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 905 }
44867030
NC
906
907 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 908 }
e66cf94c 909 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 910 }
8a5ee598 911#endif
05ec9bb3 912 CopFILE_free(PL_curcop);
d9095cec 913 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 914 }
57843af0 915 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
916}
917
29595ff2 918#ifdef PERL_MAD
cd81e915 919/* skip space before PL_thistoken */
29595ff2
NC
920
921STATIC char *
922S_skipspace0(pTHX_ register char *s)
923{
7918f24d
NC
924 PERL_ARGS_ASSERT_SKIPSPACE0;
925
29595ff2
NC
926 s = skipspace(s);
927 if (!PL_madskills)
928 return s;
cd81e915
NC
929 if (PL_skipwhite) {
930 if (!PL_thiswhite)
6b29d1f5 931 PL_thiswhite = newSVpvs("");
cd81e915
NC
932 sv_catsv(PL_thiswhite, PL_skipwhite);
933 sv_free(PL_skipwhite);
934 PL_skipwhite = 0;
935 }
936 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
937 return s;
938}
939
cd81e915 940/* skip space after PL_thistoken */
29595ff2
NC
941
942STATIC char *
943S_skipspace1(pTHX_ register char *s)
944{
d4c19fe8 945 const char *start = s;
29595ff2
NC
946 I32 startoff = start - SvPVX(PL_linestr);
947
7918f24d
NC
948 PERL_ARGS_ASSERT_SKIPSPACE1;
949
29595ff2
NC
950 s = skipspace(s);
951 if (!PL_madskills)
952 return s;
953 start = SvPVX(PL_linestr) + startoff;
cd81e915 954 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 955 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
956 PL_thistoken = newSVpvn(tstart, start - tstart);
957 }
958 PL_realtokenstart = -1;
959 if (PL_skipwhite) {
960 if (!PL_nextwhite)
6b29d1f5 961 PL_nextwhite = newSVpvs("");
cd81e915
NC
962 sv_catsv(PL_nextwhite, PL_skipwhite);
963 sv_free(PL_skipwhite);
964 PL_skipwhite = 0;
29595ff2
NC
965 }
966 return s;
967}
968
969STATIC char *
970S_skipspace2(pTHX_ register char *s, SV **svp)
971{
c35e046a
AL
972 char *start;
973 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
974 const I32 startoff = s - SvPVX(PL_linestr);
975
7918f24d
NC
976 PERL_ARGS_ASSERT_SKIPSPACE2;
977
29595ff2
NC
978 s = skipspace(s);
979 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
980 if (!PL_madskills || !svp)
981 return s;
982 start = SvPVX(PL_linestr) + startoff;
cd81e915 983 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 984 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
985 PL_thistoken = newSVpvn(tstart, start - tstart);
986 PL_realtokenstart = -1;
29595ff2 987 }
cd81e915 988 if (PL_skipwhite) {
29595ff2 989 if (!*svp)
6b29d1f5 990 *svp = newSVpvs("");
cd81e915
NC
991 sv_setsv(*svp, PL_skipwhite);
992 sv_free(PL_skipwhite);
993 PL_skipwhite = 0;
29595ff2
NC
994 }
995
996 return s;
997}
998#endif
999
80a702cd 1000STATIC void
15f169a1 1001S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1002{
1003 AV *av = CopFILEAVx(PL_curcop);
1004 if (av) {
b9f83d2f 1005 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1006 if (orig_sv)
1007 sv_setsv(sv, orig_sv);
1008 else
1009 sv_setpvn(sv, buf, len);
80a702cd
RGS
1010 (void)SvIOK_on(sv);
1011 SvIV_set(sv, 0);
1012 av_store(av, (I32)CopLINE(PL_curcop), sv);
1013 }
1014}
1015
ffb4593c
NT
1016/*
1017 * S_skipspace
1018 * Called to gobble the appropriate amount and type of whitespace.
1019 * Skips comments as well.
1020 */
1021
76e3520e 1022STATIC char *
cea2e8a9 1023S_skipspace(pTHX_ register char *s)
a687059c 1024{
97aff369 1025 dVAR;
5db06880
NC
1026#ifdef PERL_MAD
1027 int curoff;
1028 int startoff = s - SvPVX(PL_linestr);
1029
7918f24d
NC
1030 PERL_ARGS_ASSERT_SKIPSPACE;
1031
cd81e915
NC
1032 if (PL_skipwhite) {
1033 sv_free(PL_skipwhite);
1034 PL_skipwhite = 0;
5db06880
NC
1035 }
1036#endif
7918f24d 1037 PERL_ARGS_ASSERT_SKIPSPACE;
5db06880 1038
3280af22 1039 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1040 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1041 s++;
5db06880
NC
1042#ifdef PERL_MAD
1043 goto done;
1044#else
463ee0b2 1045 return s;
5db06880 1046#endif
463ee0b2
LW
1047 }
1048 for (;;) {
fd049845 1049 STRLEN prevlen;
09bef843 1050 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 1051 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
1052 while (s < PL_bufend && isSPACE(*s)) {
1053 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1054 incline(s);
1055 }
ffb4593c
NT
1056
1057 /* comment */
3280af22
NIS
1058 if (s < PL_bufend && *s == '#') {
1059 while (s < PL_bufend && *s != '\n')
463ee0b2 1060 s++;
60e6418e 1061 if (s < PL_bufend) {
463ee0b2 1062 s++;
60e6418e
GS
1063 if (PL_in_eval && !PL_rsfp) {
1064 incline(s);
1065 continue;
1066 }
1067 }
463ee0b2 1068 }
ffb4593c
NT
1069
1070 /* only continue to recharge the buffer if we're at the end
1071 * of the buffer, we're not reading from a source filter, and
1072 * we're in normal lexing mode
1073 */
09bef843
SB
1074 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1075 PL_lex_state == LEX_FORMLINE)
5db06880
NC
1076#ifdef PERL_MAD
1077 goto done;
1078#else
463ee0b2 1079 return s;
5db06880 1080#endif
ffb4593c
NT
1081
1082 /* try to recharge the buffer */
5db06880
NC
1083#ifdef PERL_MAD
1084 curoff = s - SvPVX(PL_linestr);
1085#endif
1086
9cbb5ea2 1087 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 1088 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 1089 {
5db06880
NC
1090#ifdef PERL_MAD
1091 if (PL_madskills && curoff != startoff) {
cd81e915 1092 if (!PL_skipwhite)
6b29d1f5 1093 PL_skipwhite = newSVpvs("");
cd81e915 1094 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1095 curoff - startoff);
1096 }
1097
1098 /* mustn't throw out old stuff yet if madpropping */
1099 SvCUR(PL_linestr) = curoff;
1100 s = SvPVX(PL_linestr) + curoff;
1101 *s = 0;
1102 if (curoff && s[-1] == '\n')
1103 s[-1] = ' ';
1104#endif
1105
9cbb5ea2 1106 /* end of file. Add on the -p or -n magic */
cd81e915 1107 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 1108 if (PL_minus_p) {
5db06880 1109#ifdef PERL_MAD
6502358f 1110 sv_catpvs(PL_linestr,
5db06880
NC
1111 ";}continue{print or die qq(-p destination: $!\\n);}");
1112#else
6502358f 1113 sv_setpvs(PL_linestr,
01a19ab0 1114 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 1115#endif
3280af22 1116 PL_minus_n = PL_minus_p = 0;
a0d0e21e 1117 }
01a19ab0 1118 else if (PL_minus_n) {
5db06880 1119#ifdef PERL_MAD
76f68e9b 1120 sv_catpvs(PL_linestr, ";}");
5db06880 1121#else
76f68e9b 1122 sv_setpvs(PL_linestr, ";}");
5db06880 1123#endif
01a19ab0
NC
1124 PL_minus_n = 0;
1125 }
a0d0e21e 1126 else
5db06880 1127#ifdef PERL_MAD
76f68e9b 1128 sv_catpvs(PL_linestr,";");
5db06880 1129#else
76f68e9b 1130 sv_setpvs(PL_linestr,";");
5db06880 1131#endif
ffb4593c
NT
1132
1133 /* reset variables for next time we lex */
9cbb5ea2 1134 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
1135 = SvPVX(PL_linestr)
1136#ifdef PERL_MAD
1137 + curoff
1138#endif
1139 ;
3280af22 1140 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1141 PL_last_lop = PL_last_uni = NULL;
ffb4593c 1142
4c84d7f2 1143 /* Close the filehandle. Could be from
ffb4593c
NT
1144 * STDIN, or a regular file. If we were reading code from
1145 * STDIN (because the commandline held no -e or filename)
1146 * then we don't close it, we reset it so the code can
1147 * read from STDIN too.
1148 */
1149
4c84d7f2 1150 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
3280af22 1151 PerlIO_clearerr(PL_rsfp);
8990e307 1152 else
3280af22 1153 (void)PerlIO_close(PL_rsfp);
4608196e 1154 PL_rsfp = NULL;
463ee0b2
LW
1155 return s;
1156 }
ffb4593c
NT
1157
1158 /* not at end of file, so we only read another line */
09bef843
SB
1159 /* make corresponding updates to old pointers, for yyerror() */
1160 oldprevlen = PL_oldbufptr - PL_bufend;
1161 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1162 if (PL_last_uni)
1163 oldunilen = PL_last_uni - PL_bufend;
1164 if (PL_last_lop)
1165 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1166 PL_linestart = PL_bufptr = s + prevlen;
1167 PL_bufend = s + SvCUR(PL_linestr);
1168 s = PL_bufptr;
09bef843
SB
1169 PL_oldbufptr = s + oldprevlen;
1170 PL_oldoldbufptr = s + oldoldprevlen;
1171 if (PL_last_uni)
1172 PL_last_uni = s + oldunilen;
1173 if (PL_last_lop)
1174 PL_last_lop = s + oldloplen;
a0d0e21e 1175 incline(s);
ffb4593c
NT
1176
1177 /* debugger active and we're not compiling the debugger code,
1178 * so store the line into the debugger's array of lines
1179 */
65269a95 1180 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 1181 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
463ee0b2 1182 }
5db06880
NC
1183
1184#ifdef PERL_MAD
1185 done:
1186 if (PL_madskills) {
cd81e915 1187 if (!PL_skipwhite)
6b29d1f5 1188 PL_skipwhite = newSVpvs("");
5db06880
NC
1189 curoff = s - SvPVX(PL_linestr);
1190 if (curoff - startoff)
cd81e915 1191 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1192 curoff - startoff);
1193 }
1194 return s;
1195#endif
a687059c 1196}
378cc40b 1197
ffb4593c
NT
1198/*
1199 * S_check_uni
1200 * Check the unary operators to ensure there's no ambiguity in how they're
1201 * used. An ambiguous piece of code would be:
1202 * rand + 5
1203 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1204 * the +5 is its argument.
1205 */
1206
76e3520e 1207STATIC void
cea2e8a9 1208S_check_uni(pTHX)
ba106d47 1209{
97aff369 1210 dVAR;
d4c19fe8
AL
1211 const char *s;
1212 const char *t;
2f3197b3 1213
3280af22 1214 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1215 return;
3280af22
NIS
1216 while (isSPACE(*PL_last_uni))
1217 PL_last_uni++;
c35e046a
AL
1218 s = PL_last_uni;
1219 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1220 s++;
3280af22 1221 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1222 return;
6136c704 1223
0453d815 1224 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1225 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1226 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1227 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1228 }
2f3197b3
LW
1229}
1230
ffb4593c
NT
1231/*
1232 * LOP : macro to build a list operator. Its behaviour has been replaced
1233 * with a subroutine, S_lop() for which LOP is just another name.
1234 */
1235
a0d0e21e
LW
1236#define LOP(f,x) return lop(f,x,s)
1237
ffb4593c
NT
1238/*
1239 * S_lop
1240 * Build a list operator (or something that might be one). The rules:
1241 * - if we have a next token, then it's a list operator [why?]
1242 * - if the next thing is an opening paren, then it's a function
1243 * - else it's a list operator
1244 */
1245
76e3520e 1246STATIC I32
a0be28da 1247S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1248{
97aff369 1249 dVAR;
7918f24d
NC
1250
1251 PERL_ARGS_ASSERT_LOP;
1252
6154021b 1253 pl_yylval.ival = f;
35c8bce7 1254 CLINE;
3280af22
NIS
1255 PL_expect = x;
1256 PL_bufptr = s;
1257 PL_last_lop = PL_oldbufptr;
eb160463 1258 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1259#ifdef PERL_MAD
1260 if (PL_lasttoke)
1261 return REPORT(LSTOP);
1262#else
3280af22 1263 if (PL_nexttoke)
bbf60fe6 1264 return REPORT(LSTOP);
5db06880 1265#endif
79072805 1266 if (*s == '(')
bbf60fe6 1267 return REPORT(FUNC);
29595ff2 1268 s = PEEKSPACE(s);
79072805 1269 if (*s == '(')
bbf60fe6 1270 return REPORT(FUNC);
79072805 1271 else
bbf60fe6 1272 return REPORT(LSTOP);
79072805
LW
1273}
1274
5db06880
NC
1275#ifdef PERL_MAD
1276 /*
1277 * S_start_force
1278 * Sets up for an eventual force_next(). start_force(0) basically does
1279 * an unshift, while start_force(-1) does a push. yylex removes items
1280 * on the "pop" end.
1281 */
1282
1283STATIC void
1284S_start_force(pTHX_ int where)
1285{
1286 int i;
1287
cd81e915 1288 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1289 where = PL_lasttoke;
cd81e915
NC
1290 assert(PL_curforce < 0 || PL_curforce == where);
1291 if (PL_curforce != where) {
5db06880
NC
1292 for (i = PL_lasttoke; i > where; --i) {
1293 PL_nexttoke[i] = PL_nexttoke[i-1];
1294 }
1295 PL_lasttoke++;
1296 }
cd81e915 1297 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1298 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1299 PL_curforce = where;
1300 if (PL_nextwhite) {
5db06880 1301 if (PL_madskills)
6b29d1f5 1302 curmad('^', newSVpvs(""));
cd81e915 1303 CURMAD('_', PL_nextwhite);
5db06880
NC
1304 }
1305}
1306
1307STATIC void
1308S_curmad(pTHX_ char slot, SV *sv)
1309{
1310 MADPROP **where;
1311
1312 if (!sv)
1313 return;
cd81e915
NC
1314 if (PL_curforce < 0)
1315 where = &PL_thismad;
5db06880 1316 else
cd81e915 1317 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1318
cd81e915 1319 if (PL_faketokens)
76f68e9b 1320 sv_setpvs(sv, "");
5db06880
NC
1321 else {
1322 if (!IN_BYTES) {
1323 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1324 SvUTF8_on(sv);
1325 else if (PL_encoding) {
1326 sv_recode_to_utf8(sv, PL_encoding);
1327 }
1328 }
1329 }
1330
1331 /* keep a slot open for the head of the list? */
1332 if (slot != '_' && *where && (*where)->mad_key == '^') {
1333 (*where)->mad_key = slot;
daba3364 1334 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1335 (*where)->mad_val = (void*)sv;
1336 }
1337 else
1338 addmad(newMADsv(slot, sv), where, 0);
1339}
1340#else
b3f24c00
MHM
1341# define start_force(where) NOOP
1342# define curmad(slot, sv) NOOP
5db06880
NC
1343#endif
1344
ffb4593c
NT
1345/*
1346 * S_force_next
9cbb5ea2 1347 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1348 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1349 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1350 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1351 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1352 */
1353
4e553d73 1354STATIC void
cea2e8a9 1355S_force_next(pTHX_ I32 type)
79072805 1356{
97aff369 1357 dVAR;
704d4215
GG
1358#ifdef DEBUGGING
1359 if (DEBUG_T_TEST) {
1360 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1361 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1362 }
1363#endif
5db06880 1364#ifdef PERL_MAD
cd81e915 1365 if (PL_curforce < 0)
5db06880 1366 start_force(PL_lasttoke);
cd81e915 1367 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1368 if (PL_lex_state != LEX_KNOWNEXT)
1369 PL_lex_defer = PL_lex_state;
1370 PL_lex_state = LEX_KNOWNEXT;
1371 PL_lex_expect = PL_expect;
cd81e915 1372 PL_curforce = -1;
5db06880 1373#else
3280af22
NIS
1374 PL_nexttype[PL_nexttoke] = type;
1375 PL_nexttoke++;
1376 if (PL_lex_state != LEX_KNOWNEXT) {
1377 PL_lex_defer = PL_lex_state;
1378 PL_lex_expect = PL_expect;
1379 PL_lex_state = LEX_KNOWNEXT;
79072805 1380 }
5db06880 1381#endif
79072805
LW
1382}
1383
d0a148a6 1384STATIC SV *
15f169a1 1385S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1386{
97aff369 1387 dVAR;
740cce10
NC
1388 SV * const sv = newSVpvn_utf8(start, len,
1389 UTF && !IN_BYTES
1390 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1391 return sv;
1392}
1393
ffb4593c
NT
1394/*
1395 * S_force_word
1396 * When the lexer knows the next thing is a word (for instance, it has
1397 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1398 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1399 * lookahead.
ffb4593c
NT
1400 *
1401 * Arguments:
b1b65b59 1402 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1403 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1404 * int check_keyword : if true, Perl checks to make sure the word isn't
1405 * a keyword (do this if the word is a label, e.g. goto FOO)
1406 * int allow_pack : if true, : characters will also be allowed (require,
1407 * use, etc. do this)
9cbb5ea2 1408 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1409 */
1410
76e3520e 1411STATIC char *
cea2e8a9 1412S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1413{
97aff369 1414 dVAR;
463ee0b2
LW
1415 register char *s;
1416 STRLEN len;
4e553d73 1417
7918f24d
NC
1418 PERL_ARGS_ASSERT_FORCE_WORD;
1419
29595ff2 1420 start = SKIPSPACE1(start);
463ee0b2 1421 s = start;
7e2040f0 1422 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1423 (allow_pack && *s == ':') ||
15f0808c 1424 (allow_initial_tick && *s == '\'') )
a0d0e21e 1425 {
3280af22 1426 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1427 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1428 return start;
cd81e915 1429 start_force(PL_curforce);
5db06880
NC
1430 if (PL_madskills)
1431 curmad('X', newSVpvn(start,s-start));
463ee0b2 1432 if (token == METHOD) {
29595ff2 1433 s = SKIPSPACE1(s);
463ee0b2 1434 if (*s == '(')
3280af22 1435 PL_expect = XTERM;
463ee0b2 1436 else {
3280af22 1437 PL_expect = XOPERATOR;
463ee0b2 1438 }
79072805 1439 }
e74e6b3d 1440 if (PL_madskills)
63575281 1441 curmad('g', newSVpvs( "forced" ));
9ded7720 1442 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1443 = (OP*)newSVOP(OP_CONST,0,
1444 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1445 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1446 force_next(token);
1447 }
1448 return s;
1449}
1450
ffb4593c
NT
1451/*
1452 * S_force_ident
9cbb5ea2 1453 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1454 * text only contains the "foo" portion. The first argument is a pointer
1455 * to the "foo", and the second argument is the type symbol to prefix.
1456 * Forces the next token to be a "WORD".
9cbb5ea2 1457 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1458 */
1459
76e3520e 1460STATIC void
bfed75c6 1461S_force_ident(pTHX_ register const char *s, int kind)
79072805 1462{
97aff369 1463 dVAR;
7918f24d
NC
1464
1465 PERL_ARGS_ASSERT_FORCE_IDENT;
1466
c35e046a 1467 if (*s) {
90e5519e
NC
1468 const STRLEN len = strlen(s);
1469 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1470 start_force(PL_curforce);
9ded7720 1471 NEXTVAL_NEXTTOKE.opval = o;
79072805 1472 force_next(WORD);
748a9306 1473 if (kind) {
11343788 1474 o->op_private = OPpCONST_ENTERED;
55497cff
PP
1475 /* XXX see note in pp_entereval() for why we forgo typo
1476 warnings if the symbol must be introduced in an eval.
1477 GSAR 96-10-12 */
90e5519e
NC
1478 gv_fetchpvn_flags(s, len,
1479 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1480 : GV_ADD,
1481 kind == '$' ? SVt_PV :
1482 kind == '@' ? SVt_PVAV :
1483 kind == '%' ? SVt_PVHV :
a0d0e21e 1484 SVt_PVGV
90e5519e 1485 );
748a9306 1486 }
79072805
LW
1487 }
1488}
1489
1571675a
GS
1490NV
1491Perl_str_to_version(pTHX_ SV *sv)
1492{
1493 NV retval = 0.0;
1494 NV nshift = 1.0;
1495 STRLEN len;
cfd0369c 1496 const char *start = SvPV_const(sv,len);
9d4ba2ae 1497 const char * const end = start + len;
504618e9 1498 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
1499
1500 PERL_ARGS_ASSERT_STR_TO_VERSION;
1501
1571675a 1502 while (start < end) {
ba210ebe 1503 STRLEN skip;
1571675a
GS
1504 UV n;
1505 if (utf)
9041c2e3 1506 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1507 else {
1508 n = *(U8*)start;
1509 skip = 1;
1510 }
1511 retval += ((NV)n)/nshift;
1512 start += skip;
1513 nshift *= 1000;
1514 }
1515 return retval;
1516}
1517
4e553d73 1518/*
ffb4593c
NT
1519 * S_force_version
1520 * Forces the next token to be a version number.
e759cc13
RGS
1521 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1522 * and if "guessing" is TRUE, then no new token is created (and the caller
1523 * must use an alternative parsing method).
ffb4593c
NT
1524 */
1525
76e3520e 1526STATIC char *
e759cc13 1527S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1528{
97aff369 1529 dVAR;
5f66b61c 1530 OP *version = NULL;
44dcb63b 1531 char *d;
5db06880
NC
1532#ifdef PERL_MAD
1533 I32 startoff = s - SvPVX(PL_linestr);
1534#endif
89bfa8cd 1535
7918f24d
NC
1536 PERL_ARGS_ASSERT_FORCE_VERSION;
1537
29595ff2 1538 s = SKIPSPACE1(s);
89bfa8cd 1539
44dcb63b 1540 d = s;
dd629d5b 1541 if (*d == 'v')
44dcb63b 1542 d++;
44dcb63b 1543 if (isDIGIT(*d)) {
e759cc13
RGS
1544 while (isDIGIT(*d) || *d == '_' || *d == '.')
1545 d++;
5db06880
NC
1546#ifdef PERL_MAD
1547 if (PL_madskills) {
cd81e915 1548 start_force(PL_curforce);
5db06880
NC
1549 curmad('X', newSVpvn(s,d-s));
1550 }
1551#endif
9f3d182e 1552 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1553 SV *ver;
6154021b
RGS
1554 s = scan_num(s, &pl_yylval);
1555 version = pl_yylval.opval;
dd629d5b
GS
1556 ver = cSVOPx(version)->op_sv;
1557 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1558 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1559 SvNV_set(ver, str_to_version(ver));
1571675a 1560 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1561 }
89bfa8cd 1562 }
5db06880
NC
1563 else if (guessing) {
1564#ifdef PERL_MAD
1565 if (PL_madskills) {
cd81e915
NC
1566 sv_free(PL_nextwhite); /* let next token collect whitespace */
1567 PL_nextwhite = 0;
5db06880
NC
1568 s = SvPVX(PL_linestr) + startoff;
1569 }
1570#endif
e759cc13 1571 return s;
5db06880 1572 }
89bfa8cd
PP
1573 }
1574
5db06880
NC
1575#ifdef PERL_MAD
1576 if (PL_madskills && !version) {
cd81e915
NC
1577 sv_free(PL_nextwhite); /* let next token collect whitespace */
1578 PL_nextwhite = 0;
5db06880
NC
1579 s = SvPVX(PL_linestr) + startoff;
1580 }
1581#endif
89bfa8cd 1582 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1583 start_force(PL_curforce);
9ded7720 1584 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1585 force_next(WORD);
89bfa8cd 1586
e759cc13 1587 return s;
89bfa8cd
PP
1588}
1589
ffb4593c
NT
1590/*
1591 * S_tokeq
1592 * Tokenize a quoted string passed in as an SV. It finds the next
1593 * chunk, up to end of string or a backslash. It may make a new
1594 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1595 * turns \\ into \.
1596 */
1597
76e3520e 1598STATIC SV *
cea2e8a9 1599S_tokeq(pTHX_ SV *sv)
79072805 1600{
97aff369 1601 dVAR;
79072805
LW
1602 register char *s;
1603 register char *send;
1604 register char *d;
b3ac6de7
IZ
1605 STRLEN len = 0;
1606 SV *pv = sv;
79072805 1607
7918f24d
NC
1608 PERL_ARGS_ASSERT_TOKEQ;
1609
79072805 1610 if (!SvLEN(sv))
b3ac6de7 1611 goto finish;
79072805 1612
a0d0e21e 1613 s = SvPV_force(sv, len);
21a311ee 1614 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1615 goto finish;
463ee0b2 1616 send = s + len;
79072805
LW
1617 while (s < send && *s != '\\')
1618 s++;
1619 if (s == send)
b3ac6de7 1620 goto finish;
79072805 1621 d = s;
be4731d2 1622 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 1623 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 1624 }
79072805
LW
1625 while (s < send) {
1626 if (*s == '\\') {
a0d0e21e 1627 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1628 s++; /* all that, just for this */
1629 }
1630 *d++ = *s++;
1631 }
1632 *d = '\0';
95a20fc0 1633 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1634 finish:
3280af22 1635 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 1636 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
1637 return sv;
1638}
1639
ffb4593c
NT
1640/*
1641 * Now come three functions related to double-quote context,
1642 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1643 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1644 * interact with PL_lex_state, and create fake ( ... ) argument lists
1645 * to handle functions and concatenation.
1646 * They assume that whoever calls them will be setting up a fake
1647 * join call, because each subthing puts a ',' after it. This lets
1648 * "lower \luPpEr"
1649 * become
1650 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1651 *
1652 * (I'm not sure whether the spurious commas at the end of lcfirst's
1653 * arguments and join's arguments are created or not).
1654 */
1655
1656/*
1657 * S_sublex_start
6154021b 1658 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
1659 *
1660 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 1661 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
1662 *
1663 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1664 *
1665 * Everything else becomes a FUNC.
1666 *
1667 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1668 * had an OP_CONST or OP_READLINE). This just sets us up for a
1669 * call to S_sublex_push().
1670 */
1671
76e3520e 1672STATIC I32
cea2e8a9 1673S_sublex_start(pTHX)
79072805 1674{
97aff369 1675 dVAR;
6154021b 1676 register const I32 op_type = pl_yylval.ival;
79072805
LW
1677
1678 if (op_type == OP_NULL) {
6154021b 1679 pl_yylval.opval = PL_lex_op;
5f66b61c 1680 PL_lex_op = NULL;
79072805
LW
1681 return THING;
1682 }
1683 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1684 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1685
1686 if (SvTYPE(sv) == SVt_PVIV) {
1687 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1688 STRLEN len;
96a5add6 1689 const char * const p = SvPV_const(sv, len);
740cce10 1690 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
1691 SvREFCNT_dec(sv);
1692 sv = nsv;
4e553d73 1693 }
6154021b 1694 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1695 PL_lex_stuff = NULL;
6f33ba73
RGS
1696 /* Allow <FH> // "foo" */
1697 if (op_type == OP_READLINE)
1698 PL_expect = XTERMORDORDOR;
79072805
LW
1699 return THING;
1700 }
e3f73d4e
RGS
1701 else if (op_type == OP_BACKTICK && PL_lex_op) {
1702 /* readpipe() vas overriden */
1703 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 1704 pl_yylval.opval = PL_lex_op;
9b201d7d 1705 PL_lex_op = NULL;
e3f73d4e
RGS
1706 PL_lex_stuff = NULL;
1707 return THING;
1708 }
79072805 1709
3280af22 1710 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 1711 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
1712 PL_sublex_info.sub_op = PL_lex_op;
1713 PL_lex_state = LEX_INTERPPUSH;
55497cff 1714
3280af22
NIS
1715 PL_expect = XTERM;
1716 if (PL_lex_op) {
6154021b 1717 pl_yylval.opval = PL_lex_op;
5f66b61c 1718 PL_lex_op = NULL;
55497cff
PP
1719 return PMFUNC;
1720 }
1721 else
1722 return FUNC;
1723}
1724
ffb4593c
NT
1725/*
1726 * S_sublex_push
1727 * Create a new scope to save the lexing state. The scope will be
1728 * ended in S_sublex_done. Returns a '(', starting the function arguments
1729 * to the uc, lc, etc. found before.
1730 * Sets PL_lex_state to LEX_INTERPCONCAT.
1731 */
1732
76e3520e 1733STATIC I32
cea2e8a9 1734S_sublex_push(pTHX)
55497cff 1735{
27da23d5 1736 dVAR;
f46d017c 1737 ENTER;
55497cff 1738
3280af22 1739 PL_lex_state = PL_sublex_info.super_state;
651b5b28 1740 SAVEBOOL(PL_lex_dojoin);
3280af22 1741 SAVEI32(PL_lex_brackets);
3280af22
NIS
1742 SAVEI32(PL_lex_casemods);
1743 SAVEI32(PL_lex_starts);
651b5b28 1744 SAVEI8(PL_lex_state);
7766f137 1745 SAVEVPTR(PL_lex_inpat);
98246f1e 1746 SAVEI16(PL_lex_inwhat);
57843af0 1747 SAVECOPLINE(PL_curcop);
3280af22 1748 SAVEPPTR(PL_bufptr);
8452ff4b 1749 SAVEPPTR(PL_bufend);
3280af22
NIS
1750 SAVEPPTR(PL_oldbufptr);
1751 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1752 SAVEPPTR(PL_last_lop);
1753 SAVEPPTR(PL_last_uni);
3280af22
NIS
1754 SAVEPPTR(PL_linestart);
1755 SAVESPTR(PL_linestr);
8edd5f42
RGS
1756 SAVEGENERICPV(PL_lex_brackstack);
1757 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1758
1759 PL_linestr = PL_lex_stuff;
a0714e2c 1760 PL_lex_stuff = NULL;
3280af22 1761
9cbb5ea2
GS
1762 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1763 = SvPVX(PL_linestr);
3280af22 1764 PL_bufend += SvCUR(PL_linestr);
bd61b366 1765 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1766 SAVEFREESV(PL_linestr);
1767
1768 PL_lex_dojoin = FALSE;
1769 PL_lex_brackets = 0;
a02a5408
JC
1770 Newx(PL_lex_brackstack, 120, char);
1771 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1772 PL_lex_casemods = 0;
1773 *PL_lex_casestack = '\0';
1774 PL_lex_starts = 0;
1775 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1776 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1777
1778 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1779 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1780 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1781 else
5f66b61c 1782 PL_lex_inpat = NULL;
79072805 1783
55497cff 1784 return '(';
79072805
LW
1785}
1786
ffb4593c
NT
1787/*
1788 * S_sublex_done
1789 * Restores lexer state after a S_sublex_push.
1790 */
1791
76e3520e 1792STATIC I32
cea2e8a9 1793S_sublex_done(pTHX)
79072805 1794{
27da23d5 1795 dVAR;
3280af22 1796 if (!PL_lex_starts++) {
396482e1 1797 SV * const sv = newSVpvs("");
9aa983d2
JH
1798 if (SvUTF8(PL_linestr))
1799 SvUTF8_on(sv);
3280af22 1800 PL_expect = XOPERATOR;
6154021b 1801 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1802 return THING;
1803 }
1804
3280af22
NIS
1805 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1806 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1807 return yylex();
79072805
LW
1808 }
1809
ffb4593c 1810 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1811 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1812 PL_linestr = PL_lex_repl;
1813 PL_lex_inpat = 0;
1814 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1815 PL_bufend += SvCUR(PL_linestr);
bd61b366 1816 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1817 SAVEFREESV(PL_linestr);
1818 PL_lex_dojoin = FALSE;
1819 PL_lex_brackets = 0;
3280af22
NIS
1820 PL_lex_casemods = 0;
1821 *PL_lex_casestack = '\0';
1822 PL_lex_starts = 0;
25da4f38 1823 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1824 PL_lex_state = LEX_INTERPNORMAL;
1825 PL_lex_starts++;
e9fa98b2
HS
1826 /* we don't clear PL_lex_repl here, so that we can check later
1827 whether this is an evalled subst; that means we rely on the
1828 logic to ensure sublex_done() is called again only via the
1829 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1830 }
e9fa98b2 1831 else {
3280af22 1832 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1833 PL_lex_repl = NULL;
e9fa98b2 1834 }
79072805 1835 return ',';
ffed7fef
LW
1836 }
1837 else {
5db06880
NC
1838#ifdef PERL_MAD
1839 if (PL_madskills) {
cd81e915
NC
1840 if (PL_thiswhite) {
1841 if (!PL_endwhite)
6b29d1f5 1842 PL_endwhite = newSVpvs("");
cd81e915
NC
1843 sv_catsv(PL_endwhite, PL_thiswhite);
1844 PL_thiswhite = 0;
1845 }
1846 if (PL_thistoken)
76f68e9b 1847 sv_setpvs(PL_thistoken,"");
5db06880 1848 else
cd81e915 1849 PL_realtokenstart = -1;
5db06880
NC
1850 }
1851#endif
f46d017c 1852 LEAVE;
3280af22
NIS
1853 PL_bufend = SvPVX(PL_linestr);
1854 PL_bufend += SvCUR(PL_linestr);
1855 PL_expect = XOPERATOR;
09bef843 1856 PL_sublex_info.sub_inwhat = 0;
79072805 1857 return ')';
ffed7fef
LW
1858 }
1859}
1860
02aa26ce
NT
1861/*
1862 scan_const
1863
1864 Extracts a pattern, double-quoted string, or transliteration. This
1865 is terrifying code.
1866
94def140 1867 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1868 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1869 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1870
94def140
ST
1871 Returns a pointer to the character scanned up to. If this is
1872 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 1873 successfully parsed), will leave an OP for the substring scanned
6154021b 1874 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
1875 by looking at the next characters herself.
1876
02aa26ce
NT
1877 In patterns:
1878 backslashes:
1879 double-quoted style: \r and \n
1880 regexp special ones: \D \s
94def140
ST
1881 constants: \x31
1882 backrefs: \1
02aa26ce
NT
1883 case and quoting: \U \Q \E
1884 stops on @ and $, but not for $ as tail anchor
1885
1886 In transliterations:
1887 characters are VERY literal, except for - not at the start or end
94def140
ST
1888 of the string, which indicates a range. If the range is in bytes,
1889 scan_const expands the range to the full set of intermediate
1890 characters. If the range is in utf8, the hyphen is replaced with
1891 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1892
1893 In double-quoted strings:
1894 backslashes:
1895 double-quoted style: \r and \n
94def140
ST
1896 constants: \x31
1897 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1898 case and quoting: \U \Q \E
1899 stops on @ and $
1900
1901 scan_const does *not* construct ops to handle interpolated strings.
1902 It stops processing as soon as it finds an embedded $ or @ variable
1903 and leaves it to the caller to work out what's going on.
1904
94def140
ST
1905 embedded arrays (whether in pattern or not) could be:
1906 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1907
1908 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1909
1910 $ in pattern could be $foo or could be tail anchor. Assumption:
1911 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1912 followed by one of "()| \r\n\t"
02aa26ce
NT
1913
1914 \1 (backreferences) are turned into $1
1915
1916 The structure of the code is
1917 while (there's a character to process) {
94def140
ST
1918 handle transliteration ranges
1919 skip regexp comments /(?#comment)/ and codes /(?{code})/
1920 skip #-initiated comments in //x patterns
1921 check for embedded arrays
02aa26ce
NT
1922 check for embedded scalars
1923 if (backslash) {
94def140
ST
1924 leave intact backslashes from leaveit (below)
1925 deprecate \1 in substitution replacements
02aa26ce
NT
1926 handle string-changing backslashes \l \U \Q \E, etc.
1927 switch (what was escaped) {
94def140
ST
1928 handle \- in a transliteration (becomes a literal -)
1929 handle \132 (octal characters)
1930 handle \x15 and \x{1234} (hex characters)
1931 handle \N{name} (named characters)
1932 handle \cV (control characters)
1933 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce
NT
1934 } (end switch)
1935 } (end if backslash)
1936 } (end while character to read)
4e553d73 1937
02aa26ce
NT
1938*/
1939
76e3520e 1940STATIC char *
cea2e8a9 1941S_scan_const(pTHX_ char *start)
79072805 1942{
97aff369 1943 dVAR;
3280af22 1944 register char *send = PL_bufend; /* end of the constant */
561b68a9 1945 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1946 register char *s = start; /* start of the constant */
1947 register char *d = SvPVX(sv); /* destination for copies */
1948 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1949 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1950 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1951 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1952 UV uv;
4c3a8340
ST
1953#ifdef EBCDIC
1954 UV literal_endpoint = 0;
e294cc5d 1955 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1956#endif
012bcf8d 1957
7918f24d
NC
1958 PERL_ARGS_ASSERT_SCAN_CONST;
1959
2b9d42f0
NIS
1960 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1961 /* If we are doing a trans and we know we want UTF8 set expectation */
1962 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1963 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1964 }
1965
1966
79072805 1967 while (s < send || dorange) {
02aa26ce 1968 /* get transliterations out of the way (they're most literal) */
3280af22 1969 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1970 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1971 if (dorange) {
1ba5c669
JH
1972 I32 i; /* current expanded character */
1973 I32 min; /* first character in range */
1974 I32 max; /* last character in range */
02aa26ce 1975
e294cc5d
JH
1976#ifdef EBCDIC
1977 UV uvmax = 0;
1978#endif
1979
1980 if (has_utf8
1981#ifdef EBCDIC
1982 && !native_range
1983#endif
1984 ) {
9d4ba2ae 1985 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1986 char *e = d++;
1987 while (e-- > c)
1988 *(e + 1) = *e;
25716404 1989 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1990 /* mark the range as done, and continue */
1991 dorange = FALSE;
1992 didrange = TRUE;
1993 continue;
1994 }
2b9d42f0 1995
95a20fc0 1996 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1997#ifdef EBCDIC
1998 SvGROW(sv,
1999 SvLEN(sv) + (has_utf8 ?
2000 (512 - UTF_CONTINUATION_MARK +
2001 UNISKIP(0x100))
2002 : 256));
2003 /* How many two-byte within 0..255: 128 in UTF-8,
2004 * 96 in UTF-8-mod. */
2005#else
9cbb5ea2 2006 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2007#endif
9cbb5ea2 2008 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2009#ifdef EBCDIC
2010 if (has_utf8) {
2011 int j;
2012 for (j = 0; j <= 1; j++) {
2013 char * const c = (char*)utf8_hop((U8*)d, -1);
2014 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2015 if (j)
2016 min = (U8)uv;
2017 else if (uv < 256)
2018 max = (U8)uv;
2019 else {
2020 max = (U8)0xff; /* only to \xff */
2021 uvmax = uv; /* \x{100} to uvmax */
2022 }
2023 d = c; /* eat endpoint chars */
2024 }
2025 }
2026 else {
2027#endif
2028 d -= 2; /* eat the first char and the - */
2029 min = (U8)*d; /* first char in range */
2030 max = (U8)d[1]; /* last char in range */
2031#ifdef EBCDIC
2032 }
2033#endif
8ada0baa 2034
c2e66d9e 2035 if (min > max) {
01ec43d0 2036 Perl_croak(aTHX_
d1573ac7 2037 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2038 (char)min, (char)max);
c2e66d9e
GS
2039 }
2040
c7f1f016 2041#ifdef EBCDIC
4c3a8340
ST
2042 if (literal_endpoint == 2 &&
2043 ((isLOWER(min) && isLOWER(max)) ||
2044 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2045 if (isLOWER(min)) {
2046 for (i = min; i <= max; i++)
2047 if (isLOWER(i))
db42d148 2048 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2049 } else {
2050 for (i = min; i <= max; i++)
2051 if (isUPPER(i))
db42d148 2052 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2053 }
2054 }
2055 else
2056#endif
2057 for (i = min; i <= max; i++)
e294cc5d
JH
2058#ifdef EBCDIC
2059 if (has_utf8) {
2060 const U8 ch = (U8)NATIVE_TO_UTF(i);
2061 if (UNI_IS_INVARIANT(ch))
2062 *d++ = (U8)i;
2063 else {
2064 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2065 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2066 }
2067 }
2068 else
2069#endif
2070 *d++ = (char)i;
2071
2072#ifdef EBCDIC
2073 if (uvmax) {
2074 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2075 if (uvmax > 0x101)
2076 *d++ = (char)UTF_TO_NATIVE(0xff);
2077 if (uvmax > 0x100)
2078 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2079 }
2080#endif
02aa26ce
NT
2081
2082 /* mark the range as done, and continue */
79072805 2083 dorange = FALSE;
01ec43d0 2084 didrange = TRUE;
4c3a8340
ST
2085#ifdef EBCDIC
2086 literal_endpoint = 0;
2087#endif
79072805 2088 continue;
4e553d73 2089 }
02aa26ce
NT
2090
2091 /* range begins (ignore - as first or last char) */
79072805 2092 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2093 if (didrange) {
1fafa243 2094 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2095 }
e294cc5d
JH
2096 if (has_utf8
2097#ifdef EBCDIC
2098 && !native_range
2099#endif
2100 ) {
25716404 2101 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2102 s++;
2103 continue;
2104 }
79072805
LW
2105 dorange = TRUE;
2106 s++;
01ec43d0
GS
2107 }
2108 else {
2109 didrange = FALSE;
4c3a8340
ST
2110#ifdef EBCDIC
2111 literal_endpoint = 0;
e294cc5d 2112 native_range = TRUE;
4c3a8340 2113#endif
01ec43d0 2114 }
79072805 2115 }
02aa26ce
NT
2116
2117 /* if we get here, we're not doing a transliteration */
2118
0f5d15d6
IZ
2119 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2120 except for the last char, which will be done separately. */
3280af22 2121 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2122 if (s[2] == '#') {
e994fd66 2123 while (s+1 < send && *s != ')')
db42d148 2124 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2125 }
2126 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2127 || (s[2] == '?' && s[3] == '{'))
155aba94 2128 {
cc6b7395 2129 I32 count = 1;
0f5d15d6 2130 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2131 char c;
2132
d9f97599
GS
2133 while (count && (c = *regparse)) {
2134 if (c == '\\' && regparse[1])
2135 regparse++;
4e553d73 2136 else if (c == '{')
cc6b7395 2137 count++;
4e553d73 2138 else if (c == '}')
cc6b7395 2139 count--;
d9f97599 2140 regparse++;
cc6b7395 2141 }
e994fd66 2142 if (*regparse != ')')
5bdf89e7 2143 regparse--; /* Leave one char for continuation. */
0f5d15d6 2144 while (s < regparse)
db42d148 2145 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2146 }
748a9306 2147 }
02aa26ce
NT
2148
2149 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2150 else if (*s == '#' && PL_lex_inpat &&
2151 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2152 while (s+1 < send && *s != '\n')
db42d148 2153 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2154 }
02aa26ce 2155
5d1d4326 2156 /* check for embedded arrays
da6eedaa 2157 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2158 */
1749ea0d
ST
2159 else if (*s == '@' && s[1]) {
2160 if (isALNUM_lazy_if(s+1,UTF))
2161 break;
2162 if (strchr(":'{$", s[1]))
2163 break;
2164 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2165 break; /* in regexp, neither @+ nor @- are interpolated */
2166 }
02aa26ce
NT
2167
2168 /* check for embedded scalars. only stop if we're sure it's a
2169 variable.
2170 */
79072805 2171 else if (*s == '$') {
3280af22 2172 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2173 break;
77772344
B
2174 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2175 if (s[1] == '\\' && ckWARN(WARN_AMBIGUOUS)) {
2176 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2177 "Possible unintended interpolation of $\\ in regex");
2178 }
79072805 2179 break; /* in regexp, $ might be tail anchor */
77772344 2180 }
79072805 2181 }
02aa26ce 2182
2b9d42f0
NIS
2183 /* End of else if chain - OP_TRANS rejoin rest */
2184
02aa26ce 2185 /* backslashes */
79072805
LW
2186 if (*s == '\\' && s+1 < send) {
2187 s++;
02aa26ce 2188
02aa26ce 2189 /* deprecate \1 in strings and substitution replacements */
3280af22 2190 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2191 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2192 {
599cee73 2193 if (ckWARN(WARN_SYNTAX))
9014280d 2194 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2195 *--s = '$';
2196 break;
2197 }
02aa26ce
NT
2198
2199 /* string-change backslash escapes */
3280af22 2200 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2201 --s;
2202 break;
2203 }
cc74c5bd
ST
2204 /* skip any other backslash escapes in a pattern */
2205 else if (PL_lex_inpat) {
2206 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2207 goto default_action;
2208 }
02aa26ce
NT
2209
2210 /* if we get here, it's either a quoted -, or a digit */
79072805 2211 switch (*s) {
02aa26ce
NT
2212
2213 /* quoted - in transliterations */
79072805 2214 case '-':
3280af22 2215 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2216 *d++ = *s++;
2217 continue;
2218 }
2219 /* FALL THROUGH */
2220 default:
11b8faa4 2221 {
86f97054 2222 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 2223 ckWARN(WARN_MISC))
9014280d 2224 Perl_warner(aTHX_ packWARN(WARN_MISC),
e294cc5d
JH
2225 "Unrecognized escape \\%c passed through",
2226 *s);
11b8faa4 2227 /* default action is to copy the quoted character */
f9a63242 2228 goto default_action;
11b8faa4 2229 }
02aa26ce
NT
2230
2231 /* \132 indicates an octal constant */
79072805
LW
2232 case '0': case '1': case '2': case '3':
2233 case '4': case '5': case '6': case '7':
ba210ebe 2234 {
53305cf1
NC
2235 I32 flags = 0;
2236 STRLEN len = 3;
2237 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2238 s += len;
2239 }
012bcf8d 2240 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2241
2242 /* \x24 indicates a hex constant */
79072805 2243 case 'x':
a0ed51b3
LW
2244 ++s;
2245 if (*s == '{') {
9d4ba2ae 2246 char* const e = strchr(s, '}');
a4c04bdc
NC
2247 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2248 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2249 STRLEN len;
355860ce 2250
53305cf1 2251 ++s;
adaeee49 2252 if (!e) {
a0ed51b3 2253 yyerror("Missing right brace on \\x{}");
355860ce 2254 continue;
ba210ebe 2255 }
53305cf1
NC
2256 len = e - s;
2257 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2258 s = e + 1;
a0ed51b3
LW
2259 }
2260 else {
ba210ebe 2261 {
53305cf1 2262 STRLEN len = 2;
a4c04bdc 2263 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2264 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2265 s += len;
2266 }
012bcf8d
GS
2267 }
2268
2269 NUM_ESCAPE_INSERT:
2270 /* Insert oct or hex escaped character.
301d3d20 2271 * There will always enough room in sv since such
db42d148 2272 * escapes will be longer than any UTF-8 sequence
301d3d20 2273 * they can end up as. */
ba7cea30 2274
c7f1f016
NIS
2275 /* We need to map to chars to ASCII before doing the tests
2276 to cover EBCDIC
2277 */
c4d5f83a 2278 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2279 if (!has_utf8 && uv > 255) {
301d3d20
JH
2280 /* Might need to recode whatever we have
2281 * accumulated so far if it contains any
2282 * hibit chars.
2283 *
2284 * (Can't we keep track of that and avoid
2285 * this rescan? --jhi)
012bcf8d 2286 */
c7f1f016 2287 int hicount = 0;
63cd0674
NIS
2288 U8 *c;
2289 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2290 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2291 hicount++;
db42d148 2292 }
012bcf8d 2293 }
63cd0674 2294 if (hicount) {
9d4ba2ae 2295 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2296 U8 *src, *dst;
2297 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2298 src = (U8 *)d - 1;
2299 dst = src+hicount;
2300 d += hicount;
cfd0369c 2301 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2302 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2303 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2304 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2305 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2306 }
2307 else {
63cd0674 2308 *dst-- = *src;
012bcf8d 2309 }
c7f1f016 2310 src--;
012bcf8d
GS
2311 }
2312 }
2313 }
2314
9aa983d2 2315 if (has_utf8 || uv > 255) {
9041c2e3 2316 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2317 has_utf8 = TRUE;
f9a63242
JH
2318 if (PL_lex_inwhat == OP_TRANS &&
2319 PL_sublex_info.sub_op) {
2320 PL_sublex_info.sub_op->op_private |=
2321 (PL_lex_repl ? OPpTRANS_FROM_UTF
2322 : OPpTRANS_TO_UTF);
f9a63242 2323 }
e294cc5d
JH
2324#ifdef EBCDIC
2325 if (uv > 255 && !dorange)
2326 native_range = FALSE;
2327#endif
012bcf8d 2328 }
a0ed51b3 2329 else {
012bcf8d 2330 *d++ = (char)uv;
a0ed51b3 2331 }
012bcf8d
GS
2332 }
2333 else {
c4d5f83a 2334 *d++ = (char) uv;
a0ed51b3 2335 }
79072805 2336 continue;
02aa26ce 2337
b239daa5 2338 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2339 case 'N':
55eda711 2340 ++s;
423cee85
JH
2341 if (*s == '{') {
2342 char* e = strchr(s, '}');
155aba94 2343 SV *res;
423cee85 2344 STRLEN len;
cfd0369c 2345 const char *str;
4e553d73 2346
423cee85 2347 if (!e) {
5777a3f7 2348 yyerror("Missing right brace on \\N{}");
423cee85
JH
2349 e = s - 1;
2350 goto cont_scan;
2351 }
dbc0d4f2
JH
2352 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2353 /* \N{U+...} */
2354 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2355 PERL_SCAN_DISALLOW_PREFIX;
2356 s += 3;
2357 len = e - s;
2358 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2359 if ( e > s && len != (STRLEN)(e - s) ) {
2360 uv = 0xFFFD;
fc8cd66c 2361 }
dbc0d4f2
JH
2362 s = e + 1;
2363 goto NUM_ESCAPE_INSERT;
2364 }
55eda711 2365 res = newSVpvn(s + 1, e - s - 1);
bd61b366 2366 res = new_constant( NULL, 0, "charnames",
eb0d8d16 2367 res, NULL, s - 2, e - s + 3 );
f9a63242
JH
2368 if (has_utf8)
2369 sv_utf8_upgrade(res);
cfd0369c 2370 str = SvPV_const(res,len);
1c47067b
JH
2371#ifdef EBCDIC_NEVER_MIND
2372 /* charnames uses pack U and that has been
2373 * recently changed to do the below uni->native
2374 * mapping, so this would be redundant (and wrong,
2375 * the code point would be doubly converted).
2376 * But leave this in just in case the pack U change
2377 * gets revoked, but the semantics is still
2378 * desireable for charnames. --jhi */
cddc7ef4 2379 {
cfd0369c 2380 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2381
2382 if (uv < 0x100) {
89ebb4a3 2383 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2384
2385 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2386 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2387 str = SvPV_const(res, len);
cddc7ef4
JH
2388 }
2389 }
2390#endif
89491803 2391 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2392 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2393 SvCUR_set(sv, d - ostart);
2394 SvPOK_on(sv);
e4f3eed8 2395 *d = '\0';
f08d6ad9 2396 sv_utf8_upgrade(sv);
d2f449dd 2397 /* this just broke our allocation above... */
eb160463 2398 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2399 d = SvPVX(sv) + SvCUR(sv);
89491803 2400 has_utf8 = TRUE;
f08d6ad9 2401 }
eb160463 2402 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2403 const char * const odest = SvPVX_const(sv);
423cee85 2404
8973db79 2405 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2406 d = SvPVX(sv) + (d - odest);
2407 }
e294cc5d
JH
2408#ifdef EBCDIC
2409 if (!dorange)
2410 native_range = FALSE; /* \N{} is guessed to be Unicode */
2411#endif
423cee85
JH
2412 Copy(str, d, len, char);
2413 d += len;
2414 SvREFCNT_dec(res);
2415 cont_scan:
2416 s = e + 1;
2417 }
2418 else
5777a3f7 2419 yyerror("Missing braces on \\N{}");
423cee85
JH
2420 continue;
2421
02aa26ce 2422 /* \c is a control character */
79072805
LW
2423 case 'c':
2424 s++;
961ce445 2425 if (s < send) {
ba210ebe 2426 U8 c = *s++;
c7f1f016
NIS
2427#ifdef EBCDIC
2428 if (isLOWER(c))
2429 c = toUPPER(c);
2430#endif
db42d148 2431 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2432 }
961ce445
RGS
2433 else {
2434 yyerror("Missing control char name in \\c");
2435 }
79072805 2436 continue;
02aa26ce
NT
2437
2438 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2439 case 'b':
db42d148 2440 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2441 break;
2442 case 'n':
db42d148 2443 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2444 break;
2445 case 'r':
db42d148 2446 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2447 break;
2448 case 'f':
db42d148 2449 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2450 break;
2451 case 't':
db42d148 2452 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2453 break;
34a3fe2a 2454 case 'e':
db42d148 2455 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2456 break;
2457 case 'a':
db42d148 2458 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2459 break;
02aa26ce
NT
2460 } /* end switch */
2461
79072805
LW
2462 s++;
2463 continue;
02aa26ce 2464 } /* end if (backslash) */
4c3a8340
ST
2465#ifdef EBCDIC
2466 else
2467 literal_endpoint++;
2468#endif
02aa26ce 2469
f9a63242 2470 default_action:
2b9d42f0
NIS
2471 /* If we started with encoded form, or already know we want it
2472 and then encode the next character */
2473 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2474 STRLEN len = 1;
5f66b61c
AL
2475 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2476 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2477 s += len;
2478 if (need > len) {
2479 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2480 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2481 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2482 }
5f66b61c 2483 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0 2484 has_utf8 = TRUE;
e294cc5d
JH
2485#ifdef EBCDIC
2486 if (uv > 255 && !dorange)
2487 native_range = FALSE;
2488#endif
2b9d42f0
NIS
2489 }
2490 else {
2491 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2492 }
02aa26ce
NT
2493 } /* while loop to process each character */
2494
2495 /* terminate the string and set up the sv */
79072805 2496 *d = '\0';
95a20fc0 2497 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2498 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2499 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2500
79072805 2501 SvPOK_on(sv);
9f4817db 2502 if (PL_encoding && !has_utf8) {
d0063567
DK
2503 sv_recode_to_utf8(sv, PL_encoding);
2504 if (SvUTF8(sv))
2505 has_utf8 = TRUE;
9f4817db 2506 }
2b9d42f0 2507 if (has_utf8) {
7e2040f0 2508 SvUTF8_on(sv);
2b9d42f0 2509 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2510 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2511 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2512 }
2513 }
79072805 2514
02aa26ce 2515 /* shrink the sv if we allocated more than we used */
79072805 2516 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2517 SvPV_shrink_to_cur(sv);
79072805 2518 }
02aa26ce 2519
6154021b 2520 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 2521 if (s > PL_bufptr) {
eb0d8d16
NC
2522 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2523 const char *const key = PL_lex_inpat ? "qr" : "q";
2524 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2525 const char *type;
2526 STRLEN typelen;
2527
2528 if (PL_lex_inwhat == OP_TRANS) {
2529 type = "tr";
2530 typelen = 2;
2531 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2532 type = "s";
2533 typelen = 1;
2534 } else {
2535 type = "qq";
2536 typelen = 2;
2537 }
2538
2539 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2540 type, typelen);
2541 }
6154021b 2542 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2543 } else
8990e307 2544 SvREFCNT_dec(sv);
79072805
LW
2545 return s;
2546}
2547
ffb4593c
NT
2548/* S_intuit_more
2549 * Returns TRUE if there's more to the expression (e.g., a subscript),
2550 * FALSE otherwise.
ffb4593c
NT
2551 *
2552 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2553 *
2554 * ->[ and ->{ return TRUE
2555 * { and [ outside a pattern are always subscripts, so return TRUE
2556 * if we're outside a pattern and it's not { or [, then return FALSE
2557 * if we're in a pattern and the first char is a {
2558 * {4,5} (any digits around the comma) returns FALSE
2559 * if we're in a pattern and the first char is a [
2560 * [] returns FALSE
2561 * [SOMETHING] has a funky algorithm to decide whether it's a
2562 * character class or not. It has to deal with things like
2563 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2564 * anything else returns TRUE
2565 */
2566
9cbb5ea2
GS
2567/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2568
76e3520e 2569STATIC int
cea2e8a9 2570S_intuit_more(pTHX_ register char *s)
79072805 2571{
97aff369 2572 dVAR;
7918f24d
NC
2573
2574 PERL_ARGS_ASSERT_INTUIT_MORE;
2575
3280af22 2576 if (PL_lex_brackets)
79072805
LW
2577 return TRUE;
2578 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2579 return TRUE;
2580 if (*s != '{' && *s != '[')
2581 return FALSE;
3280af22 2582 if (!PL_lex_inpat)
79072805
LW
2583 return TRUE;
2584
2585 /* In a pattern, so maybe we have {n,m}. */
2586 if (*s == '{') {
2587 s++;
2588 if (!isDIGIT(*s))
2589 return TRUE;
2590 while (isDIGIT(*s))
2591 s++;
2592 if (*s == ',')
2593 s++;
2594 while (isDIGIT(*s))
2595 s++;
2596 if (*s == '}')
2597 return FALSE;
2598 return TRUE;
2599
2600 }
2601
2602 /* On the other hand, maybe we have a character class */
2603
2604 s++;
2605 if (*s == ']' || *s == '^')
2606 return FALSE;
2607 else {
ffb4593c 2608 /* this is terrifying, and it works */
79072805
LW
2609 int weight = 2; /* let's weigh the evidence */
2610 char seen[256];
f27ffc4a 2611 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2612 const char * const send = strchr(s,']');
3280af22 2613 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2614
2615 if (!send) /* has to be an expression */
2616 return TRUE;
2617
2618 Zero(seen,256,char);
2619 if (*s == '$')
2620 weight -= 3;
2621 else if (isDIGIT(*s)) {
2622 if (s[1] != ']') {
2623 if (isDIGIT(s[1]) && s[2] == ']')
2624 weight -= 10;
2625 }
2626 else
2627 weight -= 100;
2628 }
2629 for (; s < send; s++) {
2630 last_un_char = un_char;
2631 un_char = (unsigned char)*s;
2632 switch (*s) {
2633 case '@':
2634 case '&':
2635 case '$':
2636 weight -= seen[un_char] * 10;
7e2040f0 2637 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2638 int len;
8903cb82 2639 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2640 len = (int)strlen(tmpbuf);
2641 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2642 weight -= 100;
2643 else
2644 weight -= 10;
2645 }
2646 else if (*s == '$' && s[1] &&
93a17b20
LW
2647 strchr("[#!%*<>()-=",s[1])) {
2648 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2649 weight -= 10;
2650 else
2651 weight -= 1;
2652 }
2653 break;
2654 case '\\':
2655 un_char = 254;
2656 if (s[1]) {
93a17b20 2657 if (strchr("wds]",s[1]))
79072805 2658 weight += 100;
10edeb5d 2659 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2660 weight += 1;
93a17b20 2661 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2662 weight += 40;
2663 else if (isDIGIT(s[1])) {
2664 weight += 40;
2665 while (s[1] && isDIGIT(s[1]))
2666 s++;
2667 }
2668 }
2669 else
2670 weight += 100;
2671 break;
2672 case '-':
2673 if (s[1] == '\\')
2674 weight += 50;
93a17b20 2675 if (strchr("aA01! ",last_un_char))
79072805 2676 weight += 30;
93a17b20 2677 if (strchr("zZ79~",s[1]))
79072805 2678 weight += 30;
f27ffc4a
GS
2679 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2680 weight -= 5; /* cope with negative subscript */
79072805
LW
2681 break;
2682 default:
3792a11b
NC
2683 if (!isALNUM(last_un_char)
2684 && !(last_un_char == '$' || last_un_char == '@'
2685 || last_un_char == '&')
2686 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2687 char *d = tmpbuf;
2688 while (isALPHA(*s))
2689 *d++ = *s++;
2690 *d = '\0';
5458a98a 2691 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
2692 weight -= 150;
2693 }
2694 if (un_char == last_un_char + 1)
2695 weight += 5;
2696 weight -= seen[un_char];
2697 break;
2698 }
2699 seen[un_char]++;
2700 }
2701 if (weight >= 0) /* probably a character class */
2702 return FALSE;
2703 }
2704
2705 return TRUE;
2706}
ffed7fef 2707
ffb4593c
NT
2708/*
2709 * S_intuit_method
2710 *
2711 * Does all the checking to disambiguate
2712 * foo bar
2713 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2714 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2715 *
2716 * First argument is the stuff after the first token, e.g. "bar".
2717 *
2718 * Not a method if bar is a filehandle.
2719 * Not a method if foo is a subroutine prototyped to take a filehandle.
2720 * Not a method if it's really "Foo $bar"
2721 * Method if it's "foo $bar"
2722 * Not a method if it's really "print foo $bar"
2723 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2724 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2725 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2726 * =>
2727 */
2728
76e3520e 2729STATIC int
62d55b22 2730S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2731{
97aff369 2732 dVAR;
a0d0e21e 2733 char *s = start + (*start == '$');
3280af22 2734 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2735 STRLEN len;
2736 GV* indirgv;
5db06880
NC
2737#ifdef PERL_MAD
2738 int soff;
2739#endif
a0d0e21e 2740
7918f24d
NC
2741 PERL_ARGS_ASSERT_INTUIT_METHOD;
2742
a0d0e21e 2743 if (gv) {
62d55b22 2744 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2745 return 0;
62d55b22
NC
2746 if (cv) {
2747 if (SvPOK(cv)) {
2748 const char *proto = SvPVX_const(cv);
2749 if (proto) {
2750 if (*proto == ';')
2751 proto++;
2752 if (*proto == '*')
2753 return 0;
2754 }
b6c543e3
IZ
2755 }
2756 } else
c35e046a 2757 gv = NULL;
a0d0e21e 2758 }
8903cb82 2759 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2760 /* start is the beginning of the possible filehandle/object,
2761 * and s is the end of it
2762 * tmpbuf is a copy of it
2763 */
2764
a0d0e21e 2765 if (*start == '$') {
3ef1310e
RGS
2766 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2767 isUPPER(*PL_tokenbuf))
a0d0e21e 2768 return 0;
5db06880
NC
2769#ifdef PERL_MAD
2770 len = start - SvPVX(PL_linestr);
2771#endif
29595ff2 2772 s = PEEKSPACE(s);
f0092767 2773#ifdef PERL_MAD
5db06880
NC
2774 start = SvPVX(PL_linestr) + len;
2775#endif
3280af22
NIS
2776 PL_bufptr = start;
2777 PL_expect = XREF;
a0d0e21e
LW
2778 return *s == '(' ? FUNCMETH : METHOD;
2779 }
5458a98a 2780 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
2781 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2782 len -= 2;
2783 tmpbuf[len] = '\0';
5db06880
NC
2784#ifdef PERL_MAD
2785 soff = s - SvPVX(PL_linestr);
2786#endif
c3e0f903
GS
2787 goto bare_package;
2788 }
90e5519e 2789 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2790 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2791 return 0;
2792 /* filehandle or package name makes it a method */
da51bb9b 2793 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
2794#ifdef PERL_MAD
2795 soff = s - SvPVX(PL_linestr);
2796#endif
29595ff2 2797 s = PEEKSPACE(s);
3280af22 2798 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2799 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2800 bare_package:
cd81e915 2801 start_force(PL_curforce);
9ded7720 2802 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2803 newSVpvn(tmpbuf,len));
9ded7720 2804 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2805 if (PL_madskills)
2806 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2807 PL_expect = XTERM;
a0d0e21e 2808 force_next(WORD);
3280af22 2809 PL_bufptr = s;
5db06880
NC
2810#ifdef PERL_MAD
2811 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2812#endif
a0d0e21e
LW
2813 return *s == '(' ? FUNCMETH : METHOD;
2814 }
2815 }
2816 return 0;
2817}
2818
16d20bd9 2819/* Encoded script support. filter_add() effectively inserts a
4e553d73 2820 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2821 * Note that the filter function only applies to the current source file
2822 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2823 *
2824 * The datasv parameter (which may be NULL) can be used to pass
2825 * private data to this instance of the filter. The filter function
2826 * can recover the SV using the FILTER_DATA macro and use it to
2827 * store private buffers and state information.
2828 *
2829 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2830 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2831 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2832 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2833 * private use must be set using malloc'd pointers.
2834 */
16d20bd9
AD
2835
2836SV *
864dbfa3 2837Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2838{
97aff369 2839 dVAR;
f4c556ac 2840 if (!funcp)
a0714e2c 2841 return NULL;
f4c556ac 2842
5486870f
DM
2843 if (!PL_parser)
2844 return NULL;
2845
3280af22
NIS
2846 if (!PL_rsfp_filters)
2847 PL_rsfp_filters = newAV();
16d20bd9 2848 if (!datasv)
561b68a9 2849 datasv = newSV(0);
862a34c6 2850 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2851 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2852 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2853 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2854 FPTR2DPTR(void *, IoANY(datasv)),
2855 SvPV_nolen(datasv)));
3280af22
NIS
2856 av_unshift(PL_rsfp_filters, 1);
2857 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2858 return(datasv);
2859}
4e553d73 2860
16d20bd9
AD
2861
2862/* Delete most recently added instance of this filter function. */
a0d0e21e 2863void
864dbfa3 2864Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2865{
97aff369 2866 dVAR;
e0c19803 2867 SV *datasv;
24801a4b 2868
7918f24d
NC
2869 PERL_ARGS_ASSERT_FILTER_DEL;
2870
33073adb 2871#ifdef DEBUGGING
55662e27
JH
2872 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2873 FPTR2DPTR(void*, funcp)));
33073adb 2874#endif
5486870f 2875 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2876 return;
2877 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2878 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2879 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2880 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2881 IoANY(datasv) = (void *)NULL;
3280af22 2882 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2883
16d20bd9
AD
2884 return;
2885 }
2886 /* we need to search for the correct entry and clear it */
cea2e8a9 2887 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2888}
2889
2890
1de9afcd
RGS
2891/* Invoke the idxth filter function for the current rsfp. */
2892/* maxlen 0 = read one text line */
16d20bd9 2893I32
864dbfa3 2894Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2895{
97aff369 2896 dVAR;
16d20bd9
AD
2897 filter_t funcp;
2898 SV *datasv = NULL;
f482118e
NC
2899 /* This API is bad. It should have been using unsigned int for maxlen.
2900 Not sure if we want to change the API, but if not we should sanity
2901 check the value here. */
39cd7a59
NC
2902 const unsigned int correct_length
2903 = maxlen < 0 ?
2904#ifdef PERL_MICRO
2905 0x7FFFFFFF
2906#else
2907 INT_MAX
2908#endif
2909 : maxlen;
e50aee73 2910
7918f24d
NC
2911 PERL_ARGS_ASSERT_FILTER_READ;
2912
5486870f 2913 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 2914 return -1;
1de9afcd 2915 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2916 /* Provide a default input filter to make life easy. */
2917 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2918 DEBUG_P(PerlIO_printf(Perl_debug_log,
2919 "filter_read %d: from rsfp\n", idx));
f482118e 2920 if (correct_length) {
16d20bd9
AD
2921 /* Want a block */
2922 int len ;
f54cb97a 2923 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2924
2925 /* ensure buf_sv is large enough */
f482118e
NC
2926 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2927 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2928 correct_length)) <= 0) {
3280af22 2929 if (PerlIO_error(PL_rsfp))
37120919
AD
2930 return -1; /* error */
2931 else
2932 return 0 ; /* end of file */
2933 }
16d20bd9
AD
2934 SvCUR_set(buf_sv, old_len + len) ;
2935 } else {
2936 /* Want a line */
3280af22
NIS
2937 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2938 if (PerlIO_error(PL_rsfp))
37120919
AD
2939 return -1; /* error */
2940 else
2941 return 0 ; /* end of file */
2942 }
16d20bd9
AD
2943 }
2944 return SvCUR(buf_sv);
2945 }
2946 /* Skip this filter slot if filter has been deleted */
1de9afcd 2947 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2948 DEBUG_P(PerlIO_printf(Perl_debug_log,
2949 "filter_read %d: skipped (filter deleted)\n",
2950 idx));
f482118e 2951 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2952 }
2953 /* Get function pointer hidden within datasv */
8141890a 2954 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2955 DEBUG_P(PerlIO_printf(Perl_debug_log,
2956 "filter_read %d: via function %p (%s)\n",
ca0270c4 2957 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2958 /* Call function. The function is expected to */
2959 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2960 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2961 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2962}
2963
76e3520e 2964STATIC char *
cea2e8a9 2965S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2966{
97aff369 2967 dVAR;
7918f24d
NC
2968
2969 PERL_ARGS_ASSERT_FILTER_GETS;
2970
c39cd008 2971#ifdef PERL_CR_FILTER
3280af22 2972 if (!PL_rsfp_filters) {
c39cd008 2973 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2974 }
2975#endif
3280af22 2976 if (PL_rsfp_filters) {
55497cff
PP
2977 if (!append)
2978 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2979 if (FILTER_READ(0, sv, 0) > 0)
2980 return ( SvPVX(sv) ) ;
2981 else
bd61b366 2982 return NULL ;
16d20bd9 2983 }
9d116dd7 2984 else
fd049845 2985 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2986}
2987
01ec43d0 2988STATIC HV *
9bde8eb0 2989S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 2990{
97aff369 2991 dVAR;
def3634b
GS
2992 GV *gv;
2993
7918f24d
NC
2994 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
2995
01ec43d0 2996 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2997 return PL_curstash;
2998
2999 if (len > 2 &&
3000 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3001 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3002 {
3003 return GvHV(gv); /* Foo:: */
def3634b
GS
3004 }
3005
3006 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3007 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3008 if (gv && GvCV(gv)) {
3009 SV * const sv = cv_const_sv(GvCV(gv));
3010 if (sv)
9bde8eb0 3011 pkgname = SvPV_const(sv, len);
def3634b
GS
3012 }
3013
9bde8eb0 3014 return gv_stashpvn(pkgname, len, 0);
def3634b 3015}
a0d0e21e 3016
e3f73d4e
RGS
3017/*
3018 * S_readpipe_override
3019 * Check whether readpipe() is overriden, and generates the appropriate
3020 * optree, provided sublex_start() is called afterwards.
3021 */
3022STATIC void
1d51329b 3023S_readpipe_override(pTHX)
e3f73d4e
RGS
3024{
3025 GV **gvp;
3026 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 3027 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
3028 if ((gv_readpipe
3029 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3030 ||
3031 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 3032 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
3033 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3034 {
3035 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3036 append_elem(OP_LIST,
3037 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3038 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3039 }
e3f73d4e
RGS
3040}
3041
5db06880
NC
3042#ifdef PERL_MAD
3043 /*
3044 * Perl_madlex
3045 * The intent of this yylex wrapper is to minimize the changes to the
3046 * tokener when we aren't interested in collecting madprops. It remains
3047 * to be seen how successful this strategy will be...
3048 */
3049
3050int
3051Perl_madlex(pTHX)
3052{
3053 int optype;
3054 char *s = PL_bufptr;
3055
cd81e915
NC
3056 /* make sure PL_thiswhite is initialized */
3057 PL_thiswhite = 0;
3058 PL_thismad = 0;
5db06880 3059
cd81e915 3060 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
3061 if (PL_pending_ident)
3062 return S_pending_ident(aTHX);
3063
3064 /* previous token ate up our whitespace? */
cd81e915
NC
3065 if (!PL_lasttoke && PL_nextwhite) {
3066 PL_thiswhite = PL_nextwhite;
3067 PL_nextwhite = 0;
5db06880
NC
3068 }
3069
3070 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3071 PL_realtokenstart = -1;
3072 PL_thistoken = 0;
5db06880
NC
3073 optype = yylex();
3074 s = PL_bufptr;
cd81e915 3075 assert(PL_curforce < 0);
5db06880 3076
cd81e915
NC
3077 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3078 if (!PL_thistoken) {
3079 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3080 PL_thistoken = newSVpvs("");
5db06880 3081 else {
c35e046a 3082 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3083 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3084 }
3085 }
cd81e915
NC
3086 if (PL_thismad) /* install head */
3087 CURMAD('X', PL_thistoken);
5db06880
NC
3088 }
3089
3090 /* last whitespace of a sublex? */
cd81e915
NC
3091 if (optype == ')' && PL_endwhite) {
3092 CURMAD('X', PL_endwhite);
5db06880
NC
3093 }
3094
cd81e915 3095 if (!PL_thismad) {
5db06880
NC
3096
3097 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3098 if (!PL_thiswhite && !PL_endwhite && !optype) {
3099 sv_free(PL_thistoken);
3100 PL_thistoken = 0;
5db06880
NC
3101 return 0;
3102 }
3103
3104 /* put off final whitespace till peg */
3105 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3106 PL_nextwhite = PL_thiswhite;
3107 PL_thiswhite = 0;
5db06880 3108 }
cd81e915
NC
3109 else if (PL_thisopen) {
3110 CURMAD('q', PL_thisopen);
3111 if (PL_thistoken)
3112 sv_free(PL_thistoken);
3113 PL_thistoken = 0;
5db06880
NC
3114 }
3115 else {
3116 /* Store actual token text as madprop X */
cd81e915 3117 CURMAD('X', PL_thistoken);
5db06880
NC
3118 }
3119
cd81e915 3120 if (PL_thiswhite) {
5db06880 3121 /* add preceding whitespace as madprop _ */
cd81e915 3122 CURMAD('_', PL_thiswhite);
5db06880
NC
3123 }
3124
cd81e915 3125 if (PL_thisstuff) {
5db06880 3126 /* add quoted material as madprop = */
cd81e915 3127 CURMAD('=', PL_thisstuff);
5db06880
NC
3128 }
3129
cd81e915 3130 if (PL_thisclose) {
5db06880 3131 /* add terminating quote as madprop Q */
cd81e915 3132 CURMAD('Q', PL_thisclose);
5db06880
NC
3133 }
3134 }
3135
3136 /* special processing based on optype */
3137
3138 switch (optype) {
3139
3140 /* opval doesn't need a TOKEN since it can already store mp */
3141 case WORD:
3142 case METHOD:
3143 case FUNCMETH:
3144 case THING:
3145 case PMFUNC:
3146 case PRIVATEREF:
3147 case FUNC0SUB:
3148 case UNIOPSUB:
3149 case LSTOPSUB:
6154021b
RGS
3150 if (pl_yylval.opval)
3151 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 3152 PL_thismad = 0;
5db06880
NC
3153 return optype;
3154
3155 /* fake EOF */
3156 case 0:
3157 optype = PEG;
cd81e915
NC
3158 if (PL_endwhite) {
3159 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3160 PL_endwhite = 0;
5db06880
NC
3161 }
3162 break;
3163
3164 case ']':
3165 case '}':
cd81e915 3166 if (PL_faketokens)
5db06880
NC
3167 break;
3168 /* remember any fake bracket that lexer is about to discard */
3169 if (PL_lex_brackets == 1 &&
3170 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3171 {
3172 s = PL_bufptr;
3173 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3174 s++;
3175 if (*s == '}') {
cd81e915
NC
3176 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3177 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3178 PL_thiswhite = 0;
5db06880
NC
3179 PL_bufptr = s - 1;
3180 break; /* don't bother looking for trailing comment */
3181 }
3182 else
3183 s = PL_bufptr;
3184 }
3185 if (optype == ']')
3186 break;
3187 /* FALLTHROUGH */
3188
3189 /* attach a trailing comment to its statement instead of next token */
3190 case ';':
cd81e915 3191 if (PL_faketokens)
5db06880
NC
3192 break;
3193 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3194 s = PL_bufptr;
3195 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3196 s++;
3197 if (*s == '\n' || *s == '#') {
3198 while (s < PL_bufend && *s != '\n')
3199 s++;
3200 if (s < PL_bufend)
3201 s++;
cd81e915
NC
3202 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3203 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3204 PL_thiswhite = 0;
5db06880
NC
3205 PL_bufptr = s;
3206 }
3207 }
3208 break;
3209
3210 /* pval */
3211 case LABEL:
3212 break;
3213
3214 /* ival */
3215 default:
3216 break;
3217
3218 }
3219
3220 /* Create new token struct. Note: opvals return early above. */
6154021b 3221 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 3222 PL_thismad = 0;
5db06880
NC
3223 return optype;
3224}
3225#endif
3226
468aa647 3227STATIC char *
cc6ed77d 3228S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3229 dVAR;
7918f24d
NC
3230
3231 PERL_ARGS_ASSERT_TOKENIZE_USE;
3232
468aa647
RGS
3233 if (PL_expect != XSTATE)
3234 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3235 is_use ? "use" : "no"));
29595ff2 3236 s = SKIPSPACE1(s);
468aa647
RGS
3237 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3238 s = force_version(s, TRUE);
29595ff2 3239 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3240 start_force(PL_curforce);
9ded7720 3241 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3242 force_next(WORD);
3243 }
3244 else if (*s == 'v') {
3245 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3246 s = force_version(s, FALSE);
3247 }
3248 }
3249 else {
3250 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3251 s = force_version(s, FALSE);
3252 }
6154021b 3253 pl_yylval.ival = is_use;
468aa647
RGS
3254 return s;
3255}
748a9306 3256#ifdef DEBUGGING
27da23d5 3257 static const char* const exp_name[] =
09bef843 3258 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3259 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3260 };
748a9306 3261#endif
463ee0b2 3262
02aa26ce
NT
3263/*
3264 yylex
3265
3266 Works out what to call the token just pulled out of the input
3267 stream. The yacc parser takes care of taking the ops we return and
3268 stitching them into a tree.
3269
3270 Returns:
3271 PRIVATEREF
3272
3273 Structure:
3274 if read an identifier
3275 if we're in a my declaration
3276 croak if they tried to say my($foo::bar)
3277 build the ops for a my() declaration
3278 if it's an access to a my() variable
3279 are we in a sort block?
3280 croak if my($a); $a <=> $b
3281 build ops for access to a my() variable
3282 if in a dq string, and they've said @foo and we can't find @foo
3283 croak
3284 build ops for a bareword
3285 if we already built the token before, use it.
3286*/
3287
20141f0e 3288
dba4d153
JH
3289#ifdef __SC__
3290#pragma segment Perl_yylex
3291#endif
dba4d153 3292int
dba4d153 3293Perl_yylex(pTHX)
20141f0e 3294{
97aff369 3295 dVAR;
3afc138a 3296 register char *s = PL_bufptr;
378cc40b 3297 register char *d;
463ee0b2 3298 STRLEN len;
aa7440fb 3299 bool bof = FALSE;
a687059c 3300
10edeb5d
JH
3301 /* orig_keyword, gvp, and gv are initialized here because
3302 * jump to the label just_a_word_zero can bypass their
3303 * initialization later. */
3304 I32 orig_keyword = 0;
3305 GV *gv = NULL;
3306 GV **gvp = NULL;
3307
bbf60fe6 3308 DEBUG_T( {
396482e1 3309 SV* tmp = newSVpvs("");
b6007c36
DM
3310 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3311 (IV)CopLINE(PL_curcop),
3312 lex_state_names[PL_lex_state],
3313 exp_name[PL_expect],
3314 pv_display(tmp, s, strlen(s), 0, 60));
3315 SvREFCNT_dec(tmp);
bbf60fe6 3316 } );
02aa26ce 3317 /* check if there's an identifier for us to look at */
ba979b31 3318 if (PL_pending_ident)
bbf60fe6 3319 return REPORT(S_pending_ident(aTHX));
bbce6d69 3320
02aa26ce
NT
3321 /* no identifier pending identification */
3322
3280af22 3323 switch (PL_lex_state) {
79072805
LW
3324#ifdef COMMENTARY
3325 case LEX_NORMAL: /* Some compilers will produce faster */
3326 case LEX_INTERPNORMAL: /* code if we comment these out. */
3327 break;
3328#endif
3329
09bef843 3330 /* when we've already built the next token, just pull it out of the queue */
79072805 3331 case LEX_KNOWNEXT:
5db06880
NC
3332#ifdef PERL_MAD
3333 PL_lasttoke--;
6154021b 3334 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 3335 if (PL_madskills) {
cd81e915 3336 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3337 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 3338 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 3339 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
3340 PL_thismad->mad_val = 0;
3341 mad_free(PL_thismad);
3342 PL_thismad = 0;
5db06880
NC
3343 }
3344 }
3345 if (!PL_lasttoke) {
3346 PL_lex_state = PL_lex_defer;
3347 PL_expect = PL_lex_expect;
3348 PL_lex_defer = LEX_NORMAL;
3349 if (!PL_nexttoke[PL_lasttoke].next_type)
3350 return yylex();
3351 }
3352#else
3280af22 3353 PL_nexttoke--;
6154021b 3354 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3355 if (!PL_nexttoke) {
3356 PL_lex_state = PL_lex_defer;
3357 PL_expect = PL_lex_expect;
3358 PL_lex_defer = LEX_NORMAL;
463ee0b2 3359 }
5db06880
NC
3360#endif
3361#ifdef PERL_MAD
3362 /* FIXME - can these be merged? */
3363 return(PL_nexttoke[PL_lasttoke].next_type);
3364#else
bbf60fe6 3365 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3366#endif
79072805 3367
02aa26ce 3368 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3369 when we get here, PL_bufptr is at the \
02aa26ce 3370 */
79072805
LW
3371 case LEX_INTERPCASEMOD:
3372#ifdef DEBUGGING
3280af22 3373 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3374 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3375#endif
02aa26ce 3376 /* handle \E or end of string */
3280af22 3377 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3378 /* if at a \E */
3280af22 3379 if (PL_lex_casemods) {
f54cb97a 3380 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3381 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3382
3792a11b
NC
3383 if (PL_bufptr != PL_bufend
3384 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3385 PL_bufptr += 2;
3386 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3387#ifdef PERL_MAD
3388 if (PL_madskills)
6b29d1f5 3389 PL_thistoken = newSVpvs("\\E");
5db06880 3390#endif
a0d0e21e 3391 }
bbf60fe6 3392 return REPORT(')');
79072805 3393 }
5db06880
NC
3394#ifdef PERL_MAD
3395 while (PL_bufptr != PL_bufend &&
3396 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3397 if (!PL_thiswhite)
6b29d1f5 3398 PL_thiswhite = newSVpvs("");
cd81e915 3399 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3400 PL_bufptr += 2;
3401 }
3402#else
3280af22
NIS
3403 if (PL_bufptr != PL_bufend)
3404 PL_bufptr += 2;
5db06880 3405#endif
3280af22 3406 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3407 return yylex();
79072805
LW
3408 }
3409 else {
607df283 3410 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3411 "### Saw case modifier\n"); });
3280af22 3412 s = PL_bufptr + 1;
6e909404 3413 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3414#ifdef PERL_MAD
cd81e915 3415 if (!PL_thiswhite)
6b29d1f5 3416 PL_thiswhite = newSVpvs("");
cd81e915 3417 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3418#endif
89122651 3419 PL_bufptr = s + 3;
6e909404
JH
3420 PL_lex_state = LEX_INTERPCONCAT;
3421 return yylex();
a0d0e21e 3422 }
6e909404 3423 else {
90771dc0 3424 I32 tmp;
5db06880
NC
3425 if (!PL_madskills) /* when just compiling don't need correct */
3426 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3427 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3428 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3429 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3430 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3431 return REPORT(')');
6e909404
JH
3432 }
3433 if (PL_lex_casemods > 10)
3434 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3435 PL_lex_casestack[PL_lex_casemods++] = *s;
3436 PL_lex_casestack[PL_lex_casemods] = '\0';
3437 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3438 start_force(PL_curforce);
9ded7720 3439 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3440 force_next('(');
cd81e915 3441 start_force(PL_curforce);
6e909404 3442 if (*s == 'l')
9ded7720 3443 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3444 else if (*s == 'u')
9ded7720 3445 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3446 else if (*s == 'L')
9ded7720 3447 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3448 else if (*s == 'U')
9ded7720 3449 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3450 else if (*s == 'Q')
9ded7720 3451 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3452 else
3453 Perl_croak(aTHX_ "panic: yylex");
5db06880 3454 if (PL_madskills) {
a5849ce5
NC
3455 SV* const tmpsv = newSVpvs("\\ ");
3456 /* replace the space with the character we want to escape
3457 */
3458 SvPVX(tmpsv)[1] = *s;
5db06880
NC
3459 curmad('_', tmpsv);
3460 }
6e909404 3461 PL_bufptr = s + 1;
a0d0e21e 3462 }
79072805 3463 force_next(FUNC);
3280af22
NIS
3464 if (PL_lex_starts) {
3465 s = PL_bufptr;
3466 PL_lex_starts = 0;
5db06880
NC
3467#ifdef PERL_MAD
3468 if (PL_madskills) {
cd81e915
NC
3469 if (PL_thistoken)
3470 sv_free(PL_thistoken);
6b29d1f5 3471 PL_thistoken = newSVpvs("");
5db06880
NC
3472 }
3473#endif
131b3ad0
DM
3474 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3475 if (PL_lex_casemods == 1 && PL_lex_inpat)
3476 OPERATOR(',');
3477 else
3478 Aop(OP_CONCAT);
79072805
LW
3479 }
3480 else
cea2e8a9 3481 return yylex();
79072805
LW
3482 }
3483
55497cff 3484 case LEX_INTERPPUSH:
bbf60fe6 3485 return REPORT(sublex_push());
55497cff 3486
79072805 3487 case LEX_INTERPSTART:
3280af22 3488 if (PL_bufptr == PL_bufend)
bbf60fe6 3489 return REPORT(sublex_done());
607df283 3490 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3491 "### Interpolated variable\n"); });
3280af22
NIS
3492 PL_expect = XTERM;
3493 PL_lex_dojoin = (*PL_bufptr == '@');
3494 PL_lex_state = LEX_INTERPNORMAL;
3495 if (PL_lex_dojoin) {
cd81e915 3496 start_force(PL_curforce);
9ded7720 3497 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3498 force_next(',');
cd81e915 3499 start_force(PL_curforce);
a0d0e21e 3500 force_ident("\"", '$');
cd81e915 3501 start_force(PL_curforce);
9ded7720 3502 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3503 force_next('$');
cd81e915 3504 start_force(PL_curforce);
9ded7720 3505 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3506 force_next('(');
cd81e915 3507 start_force(PL_curforce);
9ded7720 3508 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3509 force_next(FUNC);
3510 }
3280af22
NIS
3511 if (PL_lex_starts++) {
3512 s = PL_bufptr;
5db06880
NC
3513#ifdef PERL_MAD
3514 if (PL_madskills) {
cd81e915
NC
3515 if (PL_thistoken)
3516 sv_free(PL_thistoken);
6b29d1f5 3517 PL_thistoken = newSVpvs("");
5db06880
NC
3518 }
3519#endif
131b3ad0
DM
3520 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3521 if (!PL_lex_casemods && PL_lex_inpat)
3522 OPERATOR(',');
3523 else
3524 Aop(OP_CONCAT);
79072805 3525<