This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Remove category 'syntax' from 5 warnings that should just be in 'deprecated'.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
378cc40b 24#include "EXTERN.h"
864dbfa3 25#define PERL_IN_TOKE_C
378cc40b 26#include "perl.h"
378cc40b 27
eb0d8d16
NC
28#define new_constant(a,b,c,d,e,f,g) \
29 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
30
6154021b 31#define pl_yylval (PL_parser->yylval)
d3b6f988 32
acdf0a21
DM
33/* YYINITDEPTH -- initial size of the parser's stacks. */
34#define YYINITDEPTH 200
35
199e78b7
DM
36/* XXX temporary backwards compatibility */
37#define PL_lex_brackets (PL_parser->lex_brackets)
38#define PL_lex_brackstack (PL_parser->lex_brackstack)
39#define PL_lex_casemods (PL_parser->lex_casemods)
40#define PL_lex_casestack (PL_parser->lex_casestack)
41#define PL_lex_defer (PL_parser->lex_defer)
42#define PL_lex_dojoin (PL_parser->lex_dojoin)
43#define PL_lex_expect (PL_parser->lex_expect)
44#define PL_lex_formbrack (PL_parser->lex_formbrack)
45#define PL_lex_inpat (PL_parser->lex_inpat)
46#define PL_lex_inwhat (PL_parser->lex_inwhat)
47#define PL_lex_op (PL_parser->lex_op)
48#define PL_lex_repl (PL_parser->lex_repl)
49#define PL_lex_starts (PL_parser->lex_starts)
50#define PL_lex_stuff (PL_parser->lex_stuff)
51#define PL_multi_start (PL_parser->multi_start)
52#define PL_multi_open (PL_parser->multi_open)
53#define PL_multi_close (PL_parser->multi_close)
54#define PL_pending_ident (PL_parser->pending_ident)
55#define PL_preambled (PL_parser->preambled)
56#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 57#define PL_linestr (PL_parser->linestr)
c2598295
DM
58#define PL_expect (PL_parser->expect)
59#define PL_copline (PL_parser->copline)
f06b5848
DM
60#define PL_bufptr (PL_parser->bufptr)
61#define PL_oldbufptr (PL_parser->oldbufptr)
62#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
63#define PL_linestart (PL_parser->linestart)
64#define PL_bufend (PL_parser->bufend)
65#define PL_last_uni (PL_parser->last_uni)
66#define PL_last_lop (PL_parser->last_lop)
67#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 68#define PL_lex_state (PL_parser->lex_state)
2f9285f8 69#define PL_rsfp (PL_parser->rsfp)
5486870f 70#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
71#define PL_in_my (PL_parser->in_my)
72#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 73#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 74#define PL_multi_end (PL_parser->multi_end)
13765c85 75#define PL_error_count (PL_parser->error_count)
199e78b7
DM
76
77#ifdef PERL_MAD
78# define PL_endwhite (PL_parser->endwhite)
79# define PL_faketokens (PL_parser->faketokens)
80# define PL_lasttoke (PL_parser->lasttoke)
81# define PL_nextwhite (PL_parser->nextwhite)
82# define PL_realtokenstart (PL_parser->realtokenstart)
83# define PL_skipwhite (PL_parser->skipwhite)
84# define PL_thisclose (PL_parser->thisclose)
85# define PL_thismad (PL_parser->thismad)
86# define PL_thisopen (PL_parser->thisopen)
87# define PL_thisstuff (PL_parser->thisstuff)
88# define PL_thistoken (PL_parser->thistoken)
89# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
90# define PL_thiswhite (PL_parser->thiswhite)
91# define PL_nexttoke (PL_parser->nexttoke)
92# define PL_curforce (PL_parser->curforce)
93#else
94# define PL_nexttoke (PL_parser->nexttoke)
95# define PL_nexttype (PL_parser->nexttype)
96# define PL_nextval (PL_parser->nextval)
199e78b7
DM
97#endif
98
3cbf51f5
DM
99static int
100S_pending_ident(pTHX);
199e78b7 101
0bd48802 102static const char ident_too_long[] = "Identifier too long";
c445ea15 103static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 104
6e3aabd6 105#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
106static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
107static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 108#endif
51371543 109
29595ff2 110#ifdef PERL_MAD
29595ff2 111# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 112# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 113#else
5db06880 114# define CURMAD(slot,sv)
9ded7720 115# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
116#endif
117
9059aa12
LW
118#define XFAKEBRACK 128
119#define XENUMMASK 127
120
39e02b42
JH
121#ifdef USE_UTF8_SCRIPTS
122# define UTF (!IN_BYTES)
2b9d42f0 123#else
746b446a 124# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 125#endif
a0ed51b3 126
b1fc3636
CJ
127/* The maximum number of characters preceding the unrecognized one to display */
128#define UNRECOGNIZED_PRECEDE_COUNT 10
129
61f0cdd9 130/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
131 * 1999-02-27 mjd-perl-patch@plover.com */
132#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
133
bf4acbe4 134#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 135
ffb4593c
NT
136/* LEX_* are values for PL_lex_state, the state of the lexer.
137 * They are arranged oddly so that the guard on the switch statement
79072805
LW
138 * can get by with a single comparison (if the compiler is smart enough).
139 */
140
fb73857a 141/* #define LEX_NOTPARSING 11 is done in perl.h. */
142
b6007c36
DM
143#define LEX_NORMAL 10 /* normal code (ie not within "...") */
144#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
145#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
146#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
147#define LEX_INTERPSTART 6 /* expecting the start of a $var */
148
149 /* at end of code, eg "$x" followed by: */
150#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
151#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
152
153#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
154 string or after \E, $foo, etc */
155#define LEX_INTERPCONST 2 /* NOT USED */
156#define LEX_FORMLINE 1 /* expecting a format line */
157#define LEX_KNOWNEXT 0 /* next token known; just return it */
158
79072805 159
bbf60fe6 160#ifdef DEBUGGING
27da23d5 161static const char* const lex_state_names[] = {
bbf60fe6
DM
162 "KNOWNEXT",
163 "FORMLINE",
164 "INTERPCONST",
165 "INTERPCONCAT",
166 "INTERPENDMAYBE",
167 "INTERPEND",
168 "INTERPSTART",
169 "INTERPPUSH",
170 "INTERPCASEMOD",
171 "INTERPNORMAL",
172 "NORMAL"
173};
174#endif
175
79072805
LW
176#ifdef ff_next
177#undef ff_next
d48672a2
LW
178#endif
179
79072805 180#include "keywords.h"
fe14fcc3 181
ffb4593c
NT
182/* CLINE is a macro that ensures PL_copline has a sane value */
183
ae986130
LW
184#ifdef CLINE
185#undef CLINE
186#endif
57843af0 187#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 188
5db06880 189#ifdef PERL_MAD
29595ff2
NC
190# define SKIPSPACE0(s) skipspace0(s)
191# define SKIPSPACE1(s) skipspace1(s)
192# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
193# define PEEKSPACE(s) skipspace2(s,0)
194#else
195# define SKIPSPACE0(s) skipspace(s)
196# define SKIPSPACE1(s) skipspace(s)
197# define SKIPSPACE2(s,tsv) skipspace(s)
198# define PEEKSPACE(s) skipspace(s)
199#endif
200
ffb4593c
NT
201/*
202 * Convenience functions to return different tokens and prime the
9cbb5ea2 203 * lexer for the next token. They all take an argument.
ffb4593c
NT
204 *
205 * TOKEN : generic token (used for '(', DOLSHARP, etc)
206 * OPERATOR : generic operator
207 * AOPERATOR : assignment operator
208 * PREBLOCK : beginning the block after an if, while, foreach, ...
209 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
210 * PREREF : *EXPR where EXPR is not a simple identifier
211 * TERM : expression term
212 * LOOPX : loop exiting command (goto, last, dump, etc)
213 * FTST : file test operator
214 * FUN0 : zero-argument function
2d2e263d 215 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
216 * BOop : bitwise or or xor
217 * BAop : bitwise and
218 * SHop : shift operator
219 * PWop : power operator
9cbb5ea2 220 * PMop : pattern-matching operator
ffb4593c
NT
221 * Aop : addition-level operator
222 * Mop : multiplication-level operator
223 * Eop : equality-testing operator
e5edeb50 224 * Rop : relational operator <= != gt
ffb4593c
NT
225 *
226 * Also see LOP and lop() below.
227 */
228
998054bd 229#ifdef DEBUGGING /* Serve -DT. */
704d4215 230# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 231#else
bbf60fe6 232# define REPORT(retval) (retval)
998054bd
SC
233#endif
234
bbf60fe6
DM
235#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
236#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
237#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
238#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
239#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
240#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
241#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
242#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
243#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
244#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
245#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
246#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
247#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
248#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
249#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
250#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
251#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
252#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
253#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
254#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 255
a687059c
LW
256/* This bit of chicanery makes a unary function followed by
257 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
258 * The UNIDOR macro is for unary functions that can be followed by the //
259 * operator (such as C<shift // 0>).
a687059c 260 */
376fcdbf 261#define UNI2(f,x) { \
6154021b 262 pl_yylval.ival = f; \
376fcdbf
AL
263 PL_expect = x; \
264 PL_bufptr = s; \
265 PL_last_uni = PL_oldbufptr; \
266 PL_last_lop_op = f; \
267 if (*s == '(') \
268 return REPORT( (int)FUNC1 ); \
29595ff2 269 s = PEEKSPACE(s); \
376fcdbf
AL
270 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
271 }
6f33ba73
RGS
272#define UNI(f) UNI2(f,XTERM)
273#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 274
376fcdbf 275#define UNIBRACK(f) { \
6154021b 276 pl_yylval.ival = f; \
376fcdbf
AL
277 PL_bufptr = s; \
278 PL_last_uni = PL_oldbufptr; \
279 if (*s == '(') \
280 return REPORT( (int)FUNC1 ); \
29595ff2 281 s = PEEKSPACE(s); \
376fcdbf
AL
282 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
283 }
79072805 284
9f68db38 285/* grandfather return to old style */
6154021b 286#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 287
8fa7f367
JH
288#ifdef DEBUGGING
289
6154021b 290/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
291enum token_type {
292 TOKENTYPE_NONE,
293 TOKENTYPE_IVAL,
6154021b 294 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
295 TOKENTYPE_PVAL,
296 TOKENTYPE_OPVAL,
297 TOKENTYPE_GVVAL
298};
299
6d4a66ac
NC
300static struct debug_tokens {
301 const int token;
302 enum token_type type;
303 const char *name;
304} const debug_tokens[] =
9041c2e3 305{
bbf60fe6
DM
306 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
307 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
308 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
309 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
310 { ARROW, TOKENTYPE_NONE, "ARROW" },
311 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
312 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
313 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
314 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
315 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 316 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
317 { DO, TOKENTYPE_NONE, "DO" },
318 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
319 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
320 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
321 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
322 { ELSE, TOKENTYPE_NONE, "ELSE" },
323 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
324 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
325 { FOR, TOKENTYPE_IVAL, "FOR" },
326 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
327 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
328 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
329 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
330 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
331 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 332 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
333 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
334 { IF, TOKENTYPE_IVAL, "IF" },
335 { LABEL, TOKENTYPE_PVAL, "LABEL" },
336 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
337 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
338 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
339 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
340 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
341 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
342 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
343 { MY, TOKENTYPE_IVAL, "MY" },
344 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
345 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
346 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
347 { OROP, TOKENTYPE_IVAL, "OROP" },
348 { OROR, TOKENTYPE_NONE, "OROR" },
349 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
350 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
351 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
352 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
353 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
354 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
355 { PREINC, TOKENTYPE_NONE, "PREINC" },
356 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
357 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
358 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
359 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
360 { SUB, TOKENTYPE_NONE, "SUB" },
361 { THING, TOKENTYPE_OPVAL, "THING" },
362 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
363 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
364 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
365 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
366 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
367 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 368 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
369 { WHILE, TOKENTYPE_IVAL, "WHILE" },
370 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 371 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 372 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
373};
374
6154021b 375/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 376
bbf60fe6 377STATIC int
704d4215 378S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 379{
97aff369 380 dVAR;
7918f24d
NC
381
382 PERL_ARGS_ASSERT_TOKEREPORT;
383
bbf60fe6 384 if (DEBUG_T_TEST) {
bd61b366 385 const char *name = NULL;
bbf60fe6 386 enum token_type type = TOKENTYPE_NONE;
f54cb97a 387 const struct debug_tokens *p;
396482e1 388 SV* const report = newSVpvs("<== ");
bbf60fe6 389
f54cb97a 390 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
391 if (p->token == (int)rv) {
392 name = p->name;
393 type = p->type;
394 break;
395 }
396 }
397 if (name)
54667de8 398 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
399 else if ((char)rv > ' ' && (char)rv < '~')
400 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
401 else if (!rv)
396482e1 402 sv_catpvs(report, "EOF");
bbf60fe6
DM
403 else
404 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
405 switch (type) {
406 case TOKENTYPE_NONE:
407 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
408 break;
409 case TOKENTYPE_IVAL:
704d4215 410 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
411 break;
412 case TOKENTYPE_OPNUM:
413 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 414 PL_op_name[lvalp->ival]);
bbf60fe6
DM
415 break;
416 case TOKENTYPE_PVAL:
704d4215 417 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
418 break;
419 case TOKENTYPE_OPVAL:
704d4215 420 if (lvalp->opval) {
401441c0 421 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
422 PL_op_name[lvalp->opval->op_type]);
423 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 424 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 425 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
426 }
427
428 }
401441c0 429 else
396482e1 430 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
431 break;
432 }
b6007c36 433 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
434 };
435 return (int)rv;
998054bd
SC
436}
437
b6007c36
DM
438
439/* print the buffer with suitable escapes */
440
441STATIC void
15f169a1 442S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 443{
396482e1 444 SV* const tmp = newSVpvs("");
7918f24d
NC
445
446 PERL_ARGS_ASSERT_PRINTBUF;
447
b6007c36
DM
448 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
449 SvREFCNT_dec(tmp);
450}
451
8fa7f367
JH
452#endif
453
ffb4593c
NT
454/*
455 * S_ao
456 *
c963b151
BD
457 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
458 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
459 */
460
76e3520e 461STATIC int
cea2e8a9 462S_ao(pTHX_ int toketype)
a0d0e21e 463{
97aff369 464 dVAR;
3280af22
NIS
465 if (*PL_bufptr == '=') {
466 PL_bufptr++;
a0d0e21e 467 if (toketype == ANDAND)
6154021b 468 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 469 else if (toketype == OROR)
6154021b 470 pl_yylval.ival = OP_ORASSIGN;
c963b151 471 else if (toketype == DORDOR)
6154021b 472 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
473 toketype = ASSIGNOP;
474 }
475 return toketype;
476}
477
ffb4593c
NT
478/*
479 * S_no_op
480 * When Perl expects an operator and finds something else, no_op
481 * prints the warning. It always prints "<something> found where
482 * operator expected. It prints "Missing semicolon on previous line?"
483 * if the surprise occurs at the start of the line. "do you need to
484 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
485 * where the compiler doesn't know if foo is a method call or a function.
486 * It prints "Missing operator before end of line" if there's nothing
487 * after the missing operator, or "... before <...>" if there is something
488 * after the missing operator.
489 */
490
76e3520e 491STATIC void
15f169a1 492S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 493{
97aff369 494 dVAR;
9d4ba2ae
AL
495 char * const oldbp = PL_bufptr;
496 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 497
7918f24d
NC
498 PERL_ARGS_ASSERT_NO_OP;
499
1189a94a
GS
500 if (!s)
501 s = oldbp;
07c798fb 502 else
1189a94a 503 PL_bufptr = s;
cea2e8a9 504 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
505 if (ckWARN_d(WARN_SYNTAX)) {
506 if (is_first)
507 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
508 "\t(Missing semicolon on previous line?)\n");
509 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 510 const char *t;
c35e046a
AL
511 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
512 NOOP;
56da5a46
RGS
513 if (t < PL_bufptr && isSPACE(*t))
514 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
515 "\t(Do you need to predeclare %.*s?)\n",
551405c4 516 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
517 }
518 else {
519 assert(s >= oldbp);
520 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 521 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 522 }
07c798fb 523 }
3280af22 524 PL_bufptr = oldbp;
8990e307
LW
525}
526
ffb4593c
NT
527/*
528 * S_missingterm
529 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 530 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
531 * If we're in a delimited string and the delimiter is a control
532 * character, it's reformatted into a two-char sequence like ^C.
533 * This is fatal.
534 */
535
76e3520e 536STATIC void
cea2e8a9 537S_missingterm(pTHX_ char *s)
8990e307 538{
97aff369 539 dVAR;
8990e307
LW
540 char tmpbuf[3];
541 char q;
542 if (s) {
9d4ba2ae 543 char * const nl = strrchr(s,'\n');
d2719217 544 if (nl)
8990e307
LW
545 *nl = '\0';
546 }
463559e7 547 else if (isCNTRL(PL_multi_close)) {
8990e307 548 *tmpbuf = '^';
585ec06d 549 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
550 tmpbuf[2] = '\0';
551 s = tmpbuf;
552 }
553 else {
eb160463 554 *tmpbuf = (char)PL_multi_close;
8990e307
LW
555 tmpbuf[1] = '\0';
556 s = tmpbuf;
557 }
558 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 559 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 560}
79072805 561
ef89dcc3 562#define FEATURE_IS_ENABLED(name) \
0d863452 563 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 564 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b
NC
565/* The longest string we pass in. */
566#define MAX_FEATURE_LEN (sizeof("switch")-1)
567
0d863452
RH
568/*
569 * S_feature_is_enabled
570 * Check whether the named feature is enabled.
571 */
572STATIC bool
15f169a1 573S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 574{
97aff369 575 dVAR;
0d863452 576 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 577 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
578
579 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
580
4a731d7b
NC
581 assert(namelen <= MAX_FEATURE_LEN);
582 memcpy(&he_name[8], name, namelen);
d4c19fe8 583
7b9ef140 584 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
585}
586
ffb4593c
NT
587/*
588 * Perl_deprecate
ffb4593c
NT
589 */
590
79072805 591void
15f169a1 592Perl_deprecate(pTHX_ const char *const s)
a0d0e21e 593{
7918f24d
NC
594 PERL_ARGS_ASSERT_DEPRECATE;
595
a2a5de95 596 Perl_ck_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
597}
598
ffb4593c 599/*
9cbb5ea2
GS
600 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
601 * utf16-to-utf8-reversed.
ffb4593c
NT
602 */
603
c39cd008
GS
604#ifdef PERL_CR_FILTER
605static void
606strip_return(SV *sv)
607{
95a20fc0 608 register const char *s = SvPVX_const(sv);
9d4ba2ae 609 register const char * const e = s + SvCUR(sv);
7918f24d
NC
610
611 PERL_ARGS_ASSERT_STRIP_RETURN;
612
c39cd008
GS
613 /* outer loop optimized to do nothing if there are no CR-LFs */
614 while (s < e) {
615 if (*s++ == '\r' && *s == '\n') {
616 /* hit a CR-LF, need to copy the rest */
617 register char *d = s - 1;
618 *d++ = *s++;
619 while (s < e) {
620 if (*s == '\r' && s[1] == '\n')
621 s++;
622 *d++ = *s++;
623 }
624 SvCUR(sv) -= s - d;
625 return;
626 }
627 }
628}
a868473f 629
76e3520e 630STATIC I32
c39cd008 631S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 632{
f54cb97a 633 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
634 if (count > 0 && !maxlen)
635 strip_return(sv);
636 return count;
a868473f
NIS
637}
638#endif
639
199e78b7
DM
640
641
ffb4593c
NT
642/*
643 * Perl_lex_start
5486870f 644 *
e3abe207 645 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
646 *
647 * rsfp is the opened file handle to read from (if any),
648 *
649 * line holds any initial content already read from the file (or in
650 * the case of no file, such as an eval, the whole contents);
651 *
652 * new_filter indicates that this is a new file and it shouldn't inherit
653 * the filters from the current parser (ie require).
ffb4593c
NT
654 */
655
a0d0e21e 656void
5486870f 657Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 658{
97aff369 659 dVAR;
6ef55633 660 const char *s = NULL;
8990e307 661 STRLEN len;
5486870f 662 yy_parser *parser, *oparser;
acdf0a21
DM
663
664 /* create and initialise a parser */
665
199e78b7 666 Newxz(parser, 1, yy_parser);
5486870f 667 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
668 PL_parser = parser;
669
670 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
671 parser->ps = parser->stack;
672 parser->stack_size = YYINITDEPTH;
673
674 parser->stack->state = 0;
675 parser->yyerrstatus = 0;
676 parser->yychar = YYEMPTY; /* Cause a token to be read. */
677
e3abe207
DM
678 /* on scope exit, free this parser and restore any outer one */
679 SAVEPARSER(parser);
7c4baf47 680 parser->saved_curcop = PL_curcop;
e3abe207 681
acdf0a21 682 /* initialise lexer state */
8990e307 683
fb205e7a
DM
684#ifdef PERL_MAD
685 parser->curforce = -1;
686#else
687 parser->nexttoke = 0;
688#endif
ca4cfd28 689 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 690 parser->copline = NOLINE;
5afb0a62 691 parser->lex_state = LEX_NORMAL;
c2598295 692 parser->expect = XSTATE;
2f9285f8 693 parser->rsfp = rsfp;
56b27c9a 694 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
502c6561 695 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 696
199e78b7
DM
697 Newx(parser->lex_brackstack, 120, char);
698 Newx(parser->lex_casestack, 12, char);
699 *parser->lex_casestack = '\0';
02b34bbe 700
10efb74f
NC
701 if (line) {
702 s = SvPV_const(line, len);
703 } else {
704 len = 0;
705 }
bdc0bf6f 706
10efb74f 707 if (!len) {
bdc0bf6f 708 parser->linestr = newSVpvs("\n;");
10efb74f 709 } else if (SvREADONLY(line) || s[len-1] != ';') {
bdc0bf6f 710 parser->linestr = newSVsv(line);
10efb74f 711 if (s[len-1] != ';')
bdc0bf6f 712 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
713 } else {
714 SvTEMP_off(line);
715 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 716 parser->linestr = line;
8990e307 717 }
f06b5848
DM
718 parser->oldoldbufptr =
719 parser->oldbufptr =
720 parser->bufptr =
721 parser->linestart = SvPVX(parser->linestr);
722 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
723 parser->last_lop = parser->last_uni = NULL;
79072805 724}
a687059c 725
e3abe207
DM
726
727/* delete a parser object */
728
729void
730Perl_parser_free(pTHX_ const yy_parser *parser)
731{
7918f24d
NC
732 PERL_ARGS_ASSERT_PARSER_FREE;
733
7c4baf47 734 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
735 SvREFCNT_dec(parser->linestr);
736
2f9285f8
DM
737 if (parser->rsfp == PerlIO_stdin())
738 PerlIO_clearerr(parser->rsfp);
799361c3
SH
739 else if (parser->rsfp && (!parser->old_parser ||
740 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 741 PerlIO_close(parser->rsfp);
5486870f 742 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 743
e3abe207
DM
744 Safefree(parser->stack);
745 Safefree(parser->lex_brackstack);
746 Safefree(parser->lex_casestack);
747 PL_parser = parser->old_parser;
748 Safefree(parser);
749}
750
751
ffb4593c
NT
752/*
753 * Perl_lex_end
9cbb5ea2
GS
754 * Finalizer for lexing operations. Must be called when the parser is
755 * done with the lexer.
ffb4593c
NT
756 */
757
463ee0b2 758void
864dbfa3 759Perl_lex_end(pTHX)
463ee0b2 760{
97aff369 761 dVAR;
3280af22 762 PL_doextract = FALSE;
463ee0b2
LW
763}
764
ffb4593c
NT
765/*
766 * S_incline
767 * This subroutine has nothing to do with tilting, whether at windmills
768 * or pinball tables. Its name is short for "increment line". It
57843af0 769 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 770 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
771 * # line 500 "foo.pm"
772 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
773 */
774
76e3520e 775STATIC void
d9095cec 776S_incline(pTHX_ const char *s)
463ee0b2 777{
97aff369 778 dVAR;
d9095cec
NC
779 const char *t;
780 const char *n;
781 const char *e;
463ee0b2 782
7918f24d
NC
783 PERL_ARGS_ASSERT_INCLINE;
784
57843af0 785 CopLINE_inc(PL_curcop);
463ee0b2
LW
786 if (*s++ != '#')
787 return;
d4c19fe8
AL
788 while (SPACE_OR_TAB(*s))
789 s++;
73659bf1
GS
790 if (strnEQ(s, "line", 4))
791 s += 4;
792 else
793 return;
084592ab 794 if (SPACE_OR_TAB(*s))
73659bf1 795 s++;
4e553d73 796 else
73659bf1 797 return;
d4c19fe8
AL
798 while (SPACE_OR_TAB(*s))
799 s++;
463ee0b2
LW
800 if (!isDIGIT(*s))
801 return;
d4c19fe8 802
463ee0b2
LW
803 n = s;
804 while (isDIGIT(*s))
805 s++;
07714eb4 806 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 807 return;
bf4acbe4 808 while (SPACE_OR_TAB(*s))
463ee0b2 809 s++;
73659bf1 810 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 811 s++;
73659bf1
GS
812 e = t + 1;
813 }
463ee0b2 814 else {
c35e046a
AL
815 t = s;
816 while (!isSPACE(*t))
817 t++;
73659bf1 818 e = t;
463ee0b2 819 }
bf4acbe4 820 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
821 e++;
822 if (*e != '\n' && *e != '\0')
823 return; /* false alarm */
824
f4dd75d9 825 if (t - s > 0) {
d9095cec 826 const STRLEN len = t - s;
8a5ee598 827#ifndef USE_ITHREADS
19bad673
NC
828 SV *const temp_sv = CopFILESV(PL_curcop);
829 const char *cf;
830 STRLEN tmplen;
831
832 if (temp_sv) {
833 cf = SvPVX(temp_sv);
834 tmplen = SvCUR(temp_sv);
835 } else {
836 cf = NULL;
837 tmplen = 0;
838 }
839
42d9b98d 840 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
841 /* must copy *{"::_<(eval N)[oldfilename:L]"}
842 * to *{"::_<newfilename"} */
44867030
NC
843 /* However, the long form of evals is only turned on by the
844 debugger - usually they're "(eval %lu)" */
845 char smallbuf[128];
846 char *tmpbuf;
847 GV **gvp;
d9095cec 848 STRLEN tmplen2 = len;
798b63bc 849 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
850 tmpbuf = smallbuf;
851 else
2ae0db35 852 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
853 tmpbuf[0] = '_';
854 tmpbuf[1] = '<';
2ae0db35 855 memcpy(tmpbuf + 2, cf, tmplen);
44867030 856 tmplen += 2;
8a5ee598
RGS
857 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
858 if (gvp) {
44867030
NC
859 char *tmpbuf2;
860 GV *gv2;
861
862 if (tmplen2 + 2 <= sizeof smallbuf)
863 tmpbuf2 = smallbuf;
864 else
865 Newx(tmpbuf2, tmplen2 + 2, char);
866
867 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
868 /* Either they malloc'd it, or we malloc'd it,
869 so no prefix is present in ours. */
870 tmpbuf2[0] = '_';
871 tmpbuf2[1] = '<';
872 }
873
874 memcpy(tmpbuf2 + 2, s, tmplen2);
875 tmplen2 += 2;
876
8a5ee598 877 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 878 if (!isGV(gv2)) {
8a5ee598 879 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
880 /* adjust ${"::_<newfilename"} to store the new file name */
881 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
882 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
883 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 884 }
44867030
NC
885
886 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 887 }
e66cf94c 888 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 889 }
8a5ee598 890#endif
05ec9bb3 891 CopFILE_free(PL_curcop);
d9095cec 892 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 893 }
57843af0 894 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
895}
896
29595ff2 897#ifdef PERL_MAD
cd81e915 898/* skip space before PL_thistoken */
29595ff2
NC
899
900STATIC char *
901S_skipspace0(pTHX_ register char *s)
902{
7918f24d
NC
903 PERL_ARGS_ASSERT_SKIPSPACE0;
904
29595ff2
NC
905 s = skipspace(s);
906 if (!PL_madskills)
907 return s;
cd81e915
NC
908 if (PL_skipwhite) {
909 if (!PL_thiswhite)
6b29d1f5 910 PL_thiswhite = newSVpvs("");
cd81e915
NC
911 sv_catsv(PL_thiswhite, PL_skipwhite);
912 sv_free(PL_skipwhite);
913 PL_skipwhite = 0;
914 }
915 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
916 return s;
917}
918
cd81e915 919/* skip space after PL_thistoken */
29595ff2
NC
920
921STATIC char *
922S_skipspace1(pTHX_ register char *s)
923{
d4c19fe8 924 const char *start = s;
29595ff2
NC
925 I32 startoff = start - SvPVX(PL_linestr);
926
7918f24d
NC
927 PERL_ARGS_ASSERT_SKIPSPACE1;
928
29595ff2
NC
929 s = skipspace(s);
930 if (!PL_madskills)
931 return s;
932 start = SvPVX(PL_linestr) + startoff;
cd81e915 933 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 934 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
935 PL_thistoken = newSVpvn(tstart, start - tstart);
936 }
937 PL_realtokenstart = -1;
938 if (PL_skipwhite) {
939 if (!PL_nextwhite)
6b29d1f5 940 PL_nextwhite = newSVpvs("");
cd81e915
NC
941 sv_catsv(PL_nextwhite, PL_skipwhite);
942 sv_free(PL_skipwhite);
943 PL_skipwhite = 0;
29595ff2
NC
944 }
945 return s;
946}
947
948STATIC char *
949S_skipspace2(pTHX_ register char *s, SV **svp)
950{
c35e046a
AL
951 char *start;
952 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
953 const I32 startoff = s - SvPVX(PL_linestr);
954
7918f24d
NC
955 PERL_ARGS_ASSERT_SKIPSPACE2;
956
29595ff2
NC
957 s = skipspace(s);
958 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
959 if (!PL_madskills || !svp)
960 return s;
961 start = SvPVX(PL_linestr) + startoff;
cd81e915 962 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 963 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
964 PL_thistoken = newSVpvn(tstart, start - tstart);
965 PL_realtokenstart = -1;
29595ff2 966 }
cd81e915 967 if (PL_skipwhite) {
29595ff2 968 if (!*svp)
6b29d1f5 969 *svp = newSVpvs("");
cd81e915
NC
970 sv_setsv(*svp, PL_skipwhite);
971 sv_free(PL_skipwhite);
972 PL_skipwhite = 0;
29595ff2
NC
973 }
974
975 return s;
976}
977#endif
978
80a702cd 979STATIC void
15f169a1 980S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
981{
982 AV *av = CopFILEAVx(PL_curcop);
983 if (av) {
b9f83d2f 984 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
985 if (orig_sv)
986 sv_setsv(sv, orig_sv);
987 else
988 sv_setpvn(sv, buf, len);
80a702cd
RGS
989 (void)SvIOK_on(sv);
990 SvIV_set(sv, 0);
991 av_store(av, (I32)CopLINE(PL_curcop), sv);
992 }
993}
994
ffb4593c
NT
995/*
996 * S_skipspace
997 * Called to gobble the appropriate amount and type of whitespace.
998 * Skips comments as well.
999 */
1000
76e3520e 1001STATIC char *
cea2e8a9 1002S_skipspace(pTHX_ register char *s)
a687059c 1003{
97aff369 1004 dVAR;
5db06880
NC
1005#ifdef PERL_MAD
1006 int curoff;
1007 int startoff = s - SvPVX(PL_linestr);
1008
7918f24d
NC
1009 PERL_ARGS_ASSERT_SKIPSPACE;
1010
cd81e915
NC
1011 if (PL_skipwhite) {
1012 sv_free(PL_skipwhite);
1013 PL_skipwhite = 0;
5db06880
NC
1014 }
1015#endif
7918f24d 1016 PERL_ARGS_ASSERT_SKIPSPACE;
5db06880 1017
3280af22 1018 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1019 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1020 s++;
5db06880
NC
1021#ifdef PERL_MAD
1022 goto done;
1023#else
463ee0b2 1024 return s;
5db06880 1025#endif
463ee0b2
LW
1026 }
1027 for (;;) {
fd049845 1028 STRLEN prevlen;
09bef843 1029 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 1030 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
1031 while (s < PL_bufend && isSPACE(*s)) {
1032 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1033 incline(s);
1034 }
ffb4593c
NT
1035
1036 /* comment */
3280af22
NIS
1037 if (s < PL_bufend && *s == '#') {
1038 while (s < PL_bufend && *s != '\n')
463ee0b2 1039 s++;
60e6418e 1040 if (s < PL_bufend) {
463ee0b2 1041 s++;
60e6418e
GS
1042 if (PL_in_eval && !PL_rsfp) {
1043 incline(s);
1044 continue;
1045 }
1046 }
463ee0b2 1047 }
ffb4593c
NT
1048
1049 /* only continue to recharge the buffer if we're at the end
1050 * of the buffer, we're not reading from a source filter, and
1051 * we're in normal lexing mode
1052 */
09bef843
SB
1053 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1054 PL_lex_state == LEX_FORMLINE)
5db06880
NC
1055#ifdef PERL_MAD
1056 goto done;
1057#else
463ee0b2 1058 return s;
5db06880 1059#endif
ffb4593c
NT
1060
1061 /* try to recharge the buffer */
5db06880
NC
1062#ifdef PERL_MAD
1063 curoff = s - SvPVX(PL_linestr);
1064#endif
1065
9cbb5ea2 1066 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 1067 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 1068 {
5db06880
NC
1069#ifdef PERL_MAD
1070 if (PL_madskills && curoff != startoff) {
cd81e915 1071 if (!PL_skipwhite)
6b29d1f5 1072 PL_skipwhite = newSVpvs("");
cd81e915 1073 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1074 curoff - startoff);
1075 }
1076
1077 /* mustn't throw out old stuff yet if madpropping */
1078 SvCUR(PL_linestr) = curoff;
1079 s = SvPVX(PL_linestr) + curoff;
1080 *s = 0;
1081 if (curoff && s[-1] == '\n')
1082 s[-1] = ' ';
1083#endif
1084
9cbb5ea2 1085 /* end of file. Add on the -p or -n magic */
cd81e915 1086 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 1087 if (PL_minus_p) {
5db06880 1088#ifdef PERL_MAD
6502358f 1089 sv_catpvs(PL_linestr,
5db06880
NC
1090 ";}continue{print or die qq(-p destination: $!\\n);}");
1091#else
6502358f 1092 sv_setpvs(PL_linestr,
01a19ab0 1093 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 1094#endif
3280af22 1095 PL_minus_n = PL_minus_p = 0;
a0d0e21e 1096 }
01a19ab0 1097 else if (PL_minus_n) {
5db06880 1098#ifdef PERL_MAD
76f68e9b 1099 sv_catpvs(PL_linestr, ";}");
5db06880 1100#else
76f68e9b 1101 sv_setpvs(PL_linestr, ";}");
5db06880 1102#endif
01a19ab0
NC
1103 PL_minus_n = 0;
1104 }
a0d0e21e 1105 else
5db06880 1106#ifdef PERL_MAD
76f68e9b 1107 sv_catpvs(PL_linestr,";");
5db06880 1108#else
76f68e9b 1109 sv_setpvs(PL_linestr,";");
5db06880 1110#endif
ffb4593c
NT
1111
1112 /* reset variables for next time we lex */
9cbb5ea2 1113 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
1114 = SvPVX(PL_linestr)
1115#ifdef PERL_MAD
1116 + curoff
1117#endif
1118 ;
3280af22 1119 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 1120 PL_last_lop = PL_last_uni = NULL;
ffb4593c 1121
4c84d7f2 1122 /* Close the filehandle. Could be from
ffb4593c
NT
1123 * STDIN, or a regular file. If we were reading code from
1124 * STDIN (because the commandline held no -e or filename)
1125 * then we don't close it, we reset it so the code can
1126 * read from STDIN too.
1127 */
1128
4c84d7f2 1129 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
3280af22 1130 PerlIO_clearerr(PL_rsfp);
8990e307 1131 else
3280af22 1132 (void)PerlIO_close(PL_rsfp);
4608196e 1133 PL_rsfp = NULL;
463ee0b2
LW
1134 return s;
1135 }
ffb4593c
NT
1136
1137 /* not at end of file, so we only read another line */
09bef843
SB
1138 /* make corresponding updates to old pointers, for yyerror() */
1139 oldprevlen = PL_oldbufptr - PL_bufend;
1140 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1141 if (PL_last_uni)
1142 oldunilen = PL_last_uni - PL_bufend;
1143 if (PL_last_lop)
1144 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1145 PL_linestart = PL_bufptr = s + prevlen;
1146 PL_bufend = s + SvCUR(PL_linestr);
1147 s = PL_bufptr;
09bef843
SB
1148 PL_oldbufptr = s + oldprevlen;
1149 PL_oldoldbufptr = s + oldoldprevlen;
1150 if (PL_last_uni)
1151 PL_last_uni = s + oldunilen;
1152 if (PL_last_lop)
1153 PL_last_lop = s + oldloplen;
a0d0e21e 1154 incline(s);
ffb4593c
NT
1155
1156 /* debugger active and we're not compiling the debugger code,
1157 * so store the line into the debugger's array of lines
1158 */
65269a95 1159 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 1160 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
463ee0b2 1161 }
5db06880
NC
1162
1163#ifdef PERL_MAD
1164 done:
1165 if (PL_madskills) {
cd81e915 1166 if (!PL_skipwhite)
6b29d1f5 1167 PL_skipwhite = newSVpvs("");
5db06880
NC
1168 curoff = s - SvPVX(PL_linestr);
1169 if (curoff - startoff)
cd81e915 1170 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1171 curoff - startoff);
1172 }
1173 return s;
1174#endif
a687059c 1175}
378cc40b 1176
ffb4593c
NT
1177/*
1178 * S_check_uni
1179 * Check the unary operators to ensure there's no ambiguity in how they're
1180 * used. An ambiguous piece of code would be:
1181 * rand + 5
1182 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1183 * the +5 is its argument.
1184 */
1185
76e3520e 1186STATIC void
cea2e8a9 1187S_check_uni(pTHX)
ba106d47 1188{
97aff369 1189 dVAR;
d4c19fe8
AL
1190 const char *s;
1191 const char *t;
2f3197b3 1192
3280af22 1193 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1194 return;
3280af22
NIS
1195 while (isSPACE(*PL_last_uni))
1196 PL_last_uni++;
c35e046a
AL
1197 s = PL_last_uni;
1198 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1199 s++;
3280af22 1200 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1201 return;
6136c704 1202
9b387841
NC
1203 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1204 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1205 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1206}
1207
ffb4593c
NT
1208/*
1209 * LOP : macro to build a list operator. Its behaviour has been replaced
1210 * with a subroutine, S_lop() for which LOP is just another name.
1211 */
1212
a0d0e21e
LW
1213#define LOP(f,x) return lop(f,x,s)
1214
ffb4593c
NT
1215/*
1216 * S_lop
1217 * Build a list operator (or something that might be one). The rules:
1218 * - if we have a next token, then it's a list operator [why?]
1219 * - if the next thing is an opening paren, then it's a function
1220 * - else it's a list operator
1221 */
1222
76e3520e 1223STATIC I32
a0be28da 1224S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1225{
97aff369 1226 dVAR;
7918f24d
NC
1227
1228 PERL_ARGS_ASSERT_LOP;
1229
6154021b 1230 pl_yylval.ival = f;
35c8bce7 1231 CLINE;
3280af22
NIS
1232 PL_expect = x;
1233 PL_bufptr = s;
1234 PL_last_lop = PL_oldbufptr;
eb160463 1235 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1236#ifdef PERL_MAD
1237 if (PL_lasttoke)
1238 return REPORT(LSTOP);
1239#else
3280af22 1240 if (PL_nexttoke)
bbf60fe6 1241 return REPORT(LSTOP);
5db06880 1242#endif
79072805 1243 if (*s == '(')
bbf60fe6 1244 return REPORT(FUNC);
29595ff2 1245 s = PEEKSPACE(s);
79072805 1246 if (*s == '(')
bbf60fe6 1247 return REPORT(FUNC);
79072805 1248 else
bbf60fe6 1249 return REPORT(LSTOP);
79072805
LW
1250}
1251
5db06880
NC
1252#ifdef PERL_MAD
1253 /*
1254 * S_start_force
1255 * Sets up for an eventual force_next(). start_force(0) basically does
1256 * an unshift, while start_force(-1) does a push. yylex removes items
1257 * on the "pop" end.
1258 */
1259
1260STATIC void
1261S_start_force(pTHX_ int where)
1262{
1263 int i;
1264
cd81e915 1265 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1266 where = PL_lasttoke;
cd81e915
NC
1267 assert(PL_curforce < 0 || PL_curforce == where);
1268 if (PL_curforce != where) {
5db06880
NC
1269 for (i = PL_lasttoke; i > where; --i) {
1270 PL_nexttoke[i] = PL_nexttoke[i-1];
1271 }
1272 PL_lasttoke++;
1273 }
cd81e915 1274 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1275 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1276 PL_curforce = where;
1277 if (PL_nextwhite) {
5db06880 1278 if (PL_madskills)
6b29d1f5 1279 curmad('^', newSVpvs(""));
cd81e915 1280 CURMAD('_', PL_nextwhite);
5db06880
NC
1281 }
1282}
1283
1284STATIC void
1285S_curmad(pTHX_ char slot, SV *sv)
1286{
1287 MADPROP **where;
1288
1289 if (!sv)
1290 return;
cd81e915
NC
1291 if (PL_curforce < 0)
1292 where = &PL_thismad;
5db06880 1293 else
cd81e915 1294 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1295
cd81e915 1296 if (PL_faketokens)
76f68e9b 1297 sv_setpvs(sv, "");
5db06880
NC
1298 else {
1299 if (!IN_BYTES) {
1300 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1301 SvUTF8_on(sv);
1302 else if (PL_encoding) {
1303 sv_recode_to_utf8(sv, PL_encoding);
1304 }
1305 }
1306 }
1307
1308 /* keep a slot open for the head of the list? */
1309 if (slot != '_' && *where && (*where)->mad_key == '^') {
1310 (*where)->mad_key = slot;
daba3364 1311 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1312 (*where)->mad_val = (void*)sv;
1313 }
1314 else
1315 addmad(newMADsv(slot, sv), where, 0);
1316}
1317#else
b3f24c00
MHM
1318# define start_force(where) NOOP
1319# define curmad(slot, sv) NOOP
5db06880
NC
1320#endif
1321
ffb4593c
NT
1322/*
1323 * S_force_next
9cbb5ea2 1324 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1325 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1326 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1327 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1328 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1329 */
1330
4e553d73 1331STATIC void
cea2e8a9 1332S_force_next(pTHX_ I32 type)
79072805 1333{
97aff369 1334 dVAR;
704d4215
GG
1335#ifdef DEBUGGING
1336 if (DEBUG_T_TEST) {
1337 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1338 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1339 }
1340#endif
5db06880 1341#ifdef PERL_MAD
cd81e915 1342 if (PL_curforce < 0)
5db06880 1343 start_force(PL_lasttoke);
cd81e915 1344 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1345 if (PL_lex_state != LEX_KNOWNEXT)
1346 PL_lex_defer = PL_lex_state;
1347 PL_lex_state = LEX_KNOWNEXT;
1348 PL_lex_expect = PL_expect;
cd81e915 1349 PL_curforce = -1;
5db06880 1350#else
3280af22
NIS
1351 PL_nexttype[PL_nexttoke] = type;
1352 PL_nexttoke++;
1353 if (PL_lex_state != LEX_KNOWNEXT) {
1354 PL_lex_defer = PL_lex_state;
1355 PL_lex_expect = PL_expect;
1356 PL_lex_state = LEX_KNOWNEXT;
79072805 1357 }
5db06880 1358#endif
79072805
LW
1359}
1360
d0a148a6 1361STATIC SV *
15f169a1 1362S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1363{
97aff369 1364 dVAR;
740cce10 1365 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
1366 !IN_BYTES
1367 && UTF
1368 && !is_ascii_string((const U8*)start, len)
740cce10 1369 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1370 return sv;
1371}
1372
ffb4593c
NT
1373/*
1374 * S_force_word
1375 * When the lexer knows the next thing is a word (for instance, it has
1376 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1377 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1378 * lookahead.
ffb4593c
NT
1379 *
1380 * Arguments:
b1b65b59 1381 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1382 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1383 * int check_keyword : if true, Perl checks to make sure the word isn't
1384 * a keyword (do this if the word is a label, e.g. goto FOO)
1385 * int allow_pack : if true, : characters will also be allowed (require,
1386 * use, etc. do this)
9cbb5ea2 1387 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1388 */
1389
76e3520e 1390STATIC char *
cea2e8a9 1391S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1392{
97aff369 1393 dVAR;
463ee0b2
LW
1394 register char *s;
1395 STRLEN len;
4e553d73 1396
7918f24d
NC
1397 PERL_ARGS_ASSERT_FORCE_WORD;
1398
29595ff2 1399 start = SKIPSPACE1(start);
463ee0b2 1400 s = start;
7e2040f0 1401 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1402 (allow_pack && *s == ':') ||
15f0808c 1403 (allow_initial_tick && *s == '\'') )
a0d0e21e 1404 {
3280af22 1405 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1406 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1407 return start;
cd81e915 1408 start_force(PL_curforce);
5db06880
NC
1409 if (PL_madskills)
1410 curmad('X', newSVpvn(start,s-start));
463ee0b2 1411 if (token == METHOD) {
29595ff2 1412 s = SKIPSPACE1(s);
463ee0b2 1413 if (*s == '(')
3280af22 1414 PL_expect = XTERM;
463ee0b2 1415 else {
3280af22 1416 PL_expect = XOPERATOR;
463ee0b2 1417 }
79072805 1418 }
e74e6b3d 1419 if (PL_madskills)
63575281 1420 curmad('g', newSVpvs( "forced" ));
9ded7720 1421 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1422 = (OP*)newSVOP(OP_CONST,0,
1423 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1424 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1425 force_next(token);
1426 }
1427 return s;
1428}
1429
ffb4593c
NT
1430/*
1431 * S_force_ident
9cbb5ea2 1432 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1433 * text only contains the "foo" portion. The first argument is a pointer
1434 * to the "foo", and the second argument is the type symbol to prefix.
1435 * Forces the next token to be a "WORD".
9cbb5ea2 1436 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1437 */
1438
76e3520e 1439STATIC void
bfed75c6 1440S_force_ident(pTHX_ register const char *s, int kind)
79072805 1441{
97aff369 1442 dVAR;
7918f24d
NC
1443
1444 PERL_ARGS_ASSERT_FORCE_IDENT;
1445
c35e046a 1446 if (*s) {
90e5519e
NC
1447 const STRLEN len = strlen(s);
1448 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1449 start_force(PL_curforce);
9ded7720 1450 NEXTVAL_NEXTTOKE.opval = o;
79072805 1451 force_next(WORD);
748a9306 1452 if (kind) {
11343788 1453 o->op_private = OPpCONST_ENTERED;
55497cff 1454 /* XXX see note in pp_entereval() for why we forgo typo
1455 warnings if the symbol must be introduced in an eval.
1456 GSAR 96-10-12 */
90e5519e
NC
1457 gv_fetchpvn_flags(s, len,
1458 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1459 : GV_ADD,
1460 kind == '$' ? SVt_PV :
1461 kind == '@' ? SVt_PVAV :
1462 kind == '%' ? SVt_PVHV :
a0d0e21e 1463 SVt_PVGV
90e5519e 1464 );
748a9306 1465 }
79072805
LW
1466 }
1467}
1468
1571675a
GS
1469NV
1470Perl_str_to_version(pTHX_ SV *sv)
1471{
1472 NV retval = 0.0;
1473 NV nshift = 1.0;
1474 STRLEN len;
cfd0369c 1475 const char *start = SvPV_const(sv,len);
9d4ba2ae 1476 const char * const end = start + len;
504618e9 1477 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
1478
1479 PERL_ARGS_ASSERT_STR_TO_VERSION;
1480
1571675a 1481 while (start < end) {
ba210ebe 1482 STRLEN skip;
1571675a
GS
1483 UV n;
1484 if (utf)
9041c2e3 1485 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1486 else {
1487 n = *(U8*)start;
1488 skip = 1;
1489 }
1490 retval += ((NV)n)/nshift;
1491 start += skip;
1492 nshift *= 1000;
1493 }
1494 return retval;
1495}
1496
4e553d73 1497/*
ffb4593c
NT
1498 * S_force_version
1499 * Forces the next token to be a version number.
e759cc13
RGS
1500 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1501 * and if "guessing" is TRUE, then no new token is created (and the caller
1502 * must use an alternative parsing method).
ffb4593c
NT
1503 */
1504
76e3520e 1505STATIC char *
e759cc13 1506S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1507{
97aff369 1508 dVAR;
5f66b61c 1509 OP *version = NULL;
44dcb63b 1510 char *d;
5db06880
NC
1511#ifdef PERL_MAD
1512 I32 startoff = s - SvPVX(PL_linestr);
1513#endif
89bfa8cd 1514
7918f24d
NC
1515 PERL_ARGS_ASSERT_FORCE_VERSION;
1516
29595ff2 1517 s = SKIPSPACE1(s);
89bfa8cd 1518
44dcb63b 1519 d = s;
dd629d5b 1520 if (*d == 'v')
44dcb63b 1521 d++;
44dcb63b 1522 if (isDIGIT(*d)) {
e759cc13
RGS
1523 while (isDIGIT(*d) || *d == '_' || *d == '.')
1524 d++;
5db06880
NC
1525#ifdef PERL_MAD
1526 if (PL_madskills) {
cd81e915 1527 start_force(PL_curforce);
5db06880
NC
1528 curmad('X', newSVpvn(s,d-s));
1529 }
1530#endif
9f3d182e 1531 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1532 SV *ver;
6154021b
RGS
1533 s = scan_num(s, &pl_yylval);
1534 version = pl_yylval.opval;
dd629d5b
GS
1535 ver = cSVOPx(version)->op_sv;
1536 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1537 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1538 SvNV_set(ver, str_to_version(ver));
1571675a 1539 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1540 }
89bfa8cd 1541 }
5db06880
NC
1542 else if (guessing) {
1543#ifdef PERL_MAD
1544 if (PL_madskills) {
cd81e915
NC
1545 sv_free(PL_nextwhite); /* let next token collect whitespace */
1546 PL_nextwhite = 0;
5db06880
NC
1547 s = SvPVX(PL_linestr) + startoff;
1548 }
1549#endif
e759cc13 1550 return s;
5db06880 1551 }
89bfa8cd 1552 }
1553
5db06880
NC
1554#ifdef PERL_MAD
1555 if (PL_madskills && !version) {
cd81e915
NC
1556 sv_free(PL_nextwhite); /* let next token collect whitespace */
1557 PL_nextwhite = 0;
5db06880
NC
1558 s = SvPVX(PL_linestr) + startoff;
1559 }
1560#endif
89bfa8cd 1561 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1562 start_force(PL_curforce);
9ded7720 1563 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1564 force_next(WORD);
89bfa8cd 1565
e759cc13 1566 return s;
89bfa8cd 1567}
1568
ffb4593c
NT
1569/*
1570 * S_tokeq
1571 * Tokenize a quoted string passed in as an SV. It finds the next
1572 * chunk, up to end of string or a backslash. It may make a new
1573 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1574 * turns \\ into \.
1575 */
1576
76e3520e 1577STATIC SV *
cea2e8a9 1578S_tokeq(pTHX_ SV *sv)
79072805 1579{
97aff369 1580 dVAR;
79072805
LW
1581 register char *s;
1582 register char *send;
1583 register char *d;
b3ac6de7
IZ
1584 STRLEN len = 0;
1585 SV *pv = sv;
79072805 1586
7918f24d
NC
1587 PERL_ARGS_ASSERT_TOKEQ;
1588
79072805 1589 if (!SvLEN(sv))
b3ac6de7 1590 goto finish;
79072805 1591
a0d0e21e 1592 s = SvPV_force(sv, len);
21a311ee 1593 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1594 goto finish;
463ee0b2 1595 send = s + len;
79072805
LW
1596 while (s < send && *s != '\\')
1597 s++;
1598 if (s == send)
b3ac6de7 1599 goto finish;
79072805 1600 d = s;
be4731d2 1601 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 1602 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 1603 }
79072805
LW
1604 while (s < send) {
1605 if (*s == '\\') {
a0d0e21e 1606 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1607 s++; /* all that, just for this */
1608 }
1609 *d++ = *s++;
1610 }
1611 *d = '\0';
95a20fc0 1612 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1613 finish:
3280af22 1614 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 1615 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
1616 return sv;
1617}
1618
ffb4593c
NT
1619/*
1620 * Now come three functions related to double-quote context,
1621 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1622 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1623 * interact with PL_lex_state, and create fake ( ... ) argument lists
1624 * to handle functions and concatenation.
1625 * They assume that whoever calls them will be setting up a fake
1626 * join call, because each subthing puts a ',' after it. This lets
1627 * "lower \luPpEr"
1628 * become
1629 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1630 *
1631 * (I'm not sure whether the spurious commas at the end of lcfirst's
1632 * arguments and join's arguments are created or not).
1633 */
1634
1635/*
1636 * S_sublex_start
6154021b 1637 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
1638 *
1639 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 1640 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
1641 *
1642 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1643 *
1644 * Everything else becomes a FUNC.
1645 *
1646 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1647 * had an OP_CONST or OP_READLINE). This just sets us up for a
1648 * call to S_sublex_push().
1649 */
1650
76e3520e 1651STATIC I32
cea2e8a9 1652S_sublex_start(pTHX)
79072805 1653{
97aff369 1654 dVAR;
6154021b 1655 register const I32 op_type = pl_yylval.ival;
79072805
LW
1656
1657 if (op_type == OP_NULL) {
6154021b 1658 pl_yylval.opval = PL_lex_op;
5f66b61c 1659 PL_lex_op = NULL;
79072805
LW
1660 return THING;
1661 }
1662 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1663 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1664
1665 if (SvTYPE(sv) == SVt_PVIV) {
1666 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1667 STRLEN len;
96a5add6 1668 const char * const p = SvPV_const(sv, len);
740cce10 1669 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
1670 SvREFCNT_dec(sv);
1671 sv = nsv;
4e553d73 1672 }
6154021b 1673 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1674 PL_lex_stuff = NULL;
6f33ba73
RGS
1675 /* Allow <FH> // "foo" */
1676 if (op_type == OP_READLINE)
1677 PL_expect = XTERMORDORDOR;
79072805
LW
1678 return THING;
1679 }
e3f73d4e
RGS
1680 else if (op_type == OP_BACKTICK && PL_lex_op) {
1681 /* readpipe() vas overriden */
1682 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 1683 pl_yylval.opval = PL_lex_op;
9b201d7d 1684 PL_lex_op = NULL;
e3f73d4e
RGS
1685 PL_lex_stuff = NULL;
1686 return THING;
1687 }
79072805 1688
3280af22 1689 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 1690 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
1691 PL_sublex_info.sub_op = PL_lex_op;
1692 PL_lex_state = LEX_INTERPPUSH;
55497cff 1693
3280af22
NIS
1694 PL_expect = XTERM;
1695 if (PL_lex_op) {
6154021b 1696 pl_yylval.opval = PL_lex_op;
5f66b61c 1697 PL_lex_op = NULL;
55497cff 1698 return PMFUNC;
1699 }
1700 else
1701 return FUNC;
1702}
1703
ffb4593c
NT
1704/*
1705 * S_sublex_push
1706 * Create a new scope to save the lexing state. The scope will be
1707 * ended in S_sublex_done. Returns a '(', starting the function arguments
1708 * to the uc, lc, etc. found before.
1709 * Sets PL_lex_state to LEX_INTERPCONCAT.
1710 */
1711
76e3520e 1712STATIC I32
cea2e8a9 1713S_sublex_push(pTHX)
55497cff 1714{
27da23d5 1715 dVAR;
f46d017c 1716 ENTER;
55497cff 1717
3280af22 1718 PL_lex_state = PL_sublex_info.super_state;
651b5b28 1719 SAVEBOOL(PL_lex_dojoin);
3280af22 1720 SAVEI32(PL_lex_brackets);
3280af22
NIS
1721 SAVEI32(PL_lex_casemods);
1722 SAVEI32(PL_lex_starts);
651b5b28 1723 SAVEI8(PL_lex_state);
7766f137 1724 SAVEVPTR(PL_lex_inpat);
98246f1e 1725 SAVEI16(PL_lex_inwhat);
57843af0 1726 SAVECOPLINE(PL_curcop);
3280af22 1727 SAVEPPTR(PL_bufptr);
8452ff4b 1728 SAVEPPTR(PL_bufend);
3280af22
NIS
1729 SAVEPPTR(PL_oldbufptr);
1730 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1731 SAVEPPTR(PL_last_lop);
1732 SAVEPPTR(PL_last_uni);
3280af22
NIS
1733 SAVEPPTR(PL_linestart);
1734 SAVESPTR(PL_linestr);
8edd5f42
RGS
1735 SAVEGENERICPV(PL_lex_brackstack);
1736 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1737
1738 PL_linestr = PL_lex_stuff;
a0714e2c 1739 PL_lex_stuff = NULL;
3280af22 1740
9cbb5ea2
GS
1741 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1742 = SvPVX(PL_linestr);
3280af22 1743 PL_bufend += SvCUR(PL_linestr);
bd61b366 1744 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1745 SAVEFREESV(PL_linestr);
1746
1747 PL_lex_dojoin = FALSE;
1748 PL_lex_brackets = 0;
a02a5408
JC
1749 Newx(PL_lex_brackstack, 120, char);
1750 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1751 PL_lex_casemods = 0;
1752 *PL_lex_casestack = '\0';
1753 PL_lex_starts = 0;
1754 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1755 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1756
1757 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1758 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1759 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1760 else
5f66b61c 1761 PL_lex_inpat = NULL;
79072805 1762
55497cff 1763 return '(';
79072805
LW
1764}
1765
ffb4593c
NT
1766/*
1767 * S_sublex_done
1768 * Restores lexer state after a S_sublex_push.
1769 */
1770
76e3520e 1771STATIC I32
cea2e8a9 1772S_sublex_done(pTHX)
79072805 1773{
27da23d5 1774 dVAR;
3280af22 1775 if (!PL_lex_starts++) {
396482e1 1776 SV * const sv = newSVpvs("");
9aa983d2
JH
1777 if (SvUTF8(PL_linestr))
1778 SvUTF8_on(sv);
3280af22 1779 PL_expect = XOPERATOR;
6154021b 1780 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1781 return THING;
1782 }
1783
3280af22
NIS
1784 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1785 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1786 return yylex();
79072805
LW
1787 }
1788
ffb4593c 1789 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1790 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1791 PL_linestr = PL_lex_repl;
1792 PL_lex_inpat = 0;
1793 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1794 PL_bufend += SvCUR(PL_linestr);
bd61b366 1795 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1796 SAVEFREESV(PL_linestr);
1797 PL_lex_dojoin = FALSE;
1798 PL_lex_brackets = 0;
3280af22
NIS
1799 PL_lex_casemods = 0;
1800 *PL_lex_casestack = '\0';
1801 PL_lex_starts = 0;
25da4f38 1802 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1803 PL_lex_state = LEX_INTERPNORMAL;
1804 PL_lex_starts++;
e9fa98b2
HS
1805 /* we don't clear PL_lex_repl here, so that we can check later
1806 whether this is an evalled subst; that means we rely on the
1807 logic to ensure sublex_done() is called again only via the
1808 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1809 }
e9fa98b2 1810 else {
3280af22 1811 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1812 PL_lex_repl = NULL;
e9fa98b2 1813 }
79072805 1814 return ',';
ffed7fef
LW
1815 }
1816 else {
5db06880
NC
1817#ifdef PERL_MAD
1818 if (PL_madskills) {
cd81e915
NC
1819 if (PL_thiswhite) {
1820 if (!PL_endwhite)
6b29d1f5 1821 PL_endwhite = newSVpvs("");
cd81e915
NC
1822 sv_catsv(PL_endwhite, PL_thiswhite);
1823 PL_thiswhite = 0;
1824 }
1825 if (PL_thistoken)
76f68e9b 1826 sv_setpvs(PL_thistoken,"");
5db06880 1827 else
cd81e915 1828 PL_realtokenstart = -1;
5db06880
NC
1829 }
1830#endif
f46d017c 1831 LEAVE;
3280af22
NIS
1832 PL_bufend = SvPVX(PL_linestr);
1833 PL_bufend += SvCUR(PL_linestr);
1834 PL_expect = XOPERATOR;
09bef843 1835 PL_sublex_info.sub_inwhat = 0;
79072805 1836 return ')';
ffed7fef
LW
1837 }
1838}
1839
02aa26ce
NT
1840/*
1841 scan_const
1842
1843 Extracts a pattern, double-quoted string, or transliteration. This
1844 is terrifying code.
1845
94def140 1846 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1847 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1848 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1849
94def140
TS
1850 Returns a pointer to the character scanned up to. If this is
1851 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 1852 successfully parsed), will leave an OP for the substring scanned
6154021b 1853 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
1854 by looking at the next characters herself.
1855
02aa26ce
NT
1856 In patterns:
1857 backslashes:
1858 double-quoted style: \r and \n
1859 regexp special ones: \D \s
94def140
TS
1860 constants: \x31
1861 backrefs: \1
02aa26ce
NT
1862 case and quoting: \U \Q \E
1863 stops on @ and $, but not for $ as tail anchor
1864
1865 In transliterations:
1866 characters are VERY literal, except for - not at the start or end
94def140
TS
1867 of the string, which indicates a range. If the range is in bytes,
1868 scan_const expands the range to the full set of intermediate
1869 characters. If the range is in utf8, the hyphen is replaced with
1870 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1871
1872 In double-quoted strings:
1873 backslashes:
1874 double-quoted style: \r and \n
94def140
TS
1875 constants: \x31
1876 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1877 case and quoting: \U \Q \E
1878 stops on @ and $
1879
1880 scan_const does *not* construct ops to handle interpolated strings.
1881 It stops processing as soon as it finds an embedded $ or @ variable
1882 and leaves it to the caller to work out what's going on.
1883
94def140
TS
1884 embedded arrays (whether in pattern or not) could be:
1885 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1886
1887 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1888
1889 $ in pattern could be $foo or could be tail anchor. Assumption:
1890 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1891 followed by one of "()| \r\n\t"
02aa26ce
NT
1892
1893 \1 (backreferences) are turned into $1
1894
1895 The structure of the code is
1896 while (there's a character to process) {
94def140
TS
1897 handle transliteration ranges
1898 skip regexp comments /(?#comment)/ and codes /(?{code})/
1899 skip #-initiated comments in //x patterns
1900 check for embedded arrays
02aa26ce
NT
1901 check for embedded scalars
1902 if (backslash) {
94def140
TS
1903 leave intact backslashes from leaveit (below)
1904 deprecate \1 in substitution replacements
02aa26ce
NT
1905 handle string-changing backslashes \l \U \Q \E, etc.
1906 switch (what was escaped) {
94def140
TS
1907 handle \- in a transliteration (becomes a literal -)
1908 handle \132 (octal characters)
1909 handle \x15 and \x{1234} (hex characters)
1910 handle \N{name} (named characters)
1911 handle \cV (control characters)
1912 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 1913 } (end switch)
77a135fe 1914 continue
02aa26ce 1915 } (end if backslash)
77a135fe 1916 handle regular character
02aa26ce 1917 } (end while character to read)
4e553d73 1918
02aa26ce
NT
1919*/
1920
76e3520e 1921STATIC char *
cea2e8a9 1922S_scan_const(pTHX_ char *start)
79072805 1923{
97aff369 1924 dVAR;
3280af22 1925 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
1926 SV *sv = newSV(send - start); /* sv for the constant. See
1927 note below on sizing. */
02aa26ce
NT
1928 register char *s = start; /* start of the constant */
1929 register char *d = SvPVX(sv); /* destination for copies */
1930 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1931 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0 1932 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
77a135fe
KW
1933 I32 this_utf8 = UTF; /* Is the source string assumed
1934 to be UTF8? But, this can
1935 show as true when the source
1936 isn't utf8, as for example
1937 when it is entirely composed
1938 of hex constants */
1939
1940 /* Note on sizing: The scanned constant is placed into sv, which is
1941 * initialized by newSV() assuming one byte of output for every byte of
1942 * input. This routine expects newSV() to allocate an extra byte for a
1943 * trailing NUL, which this routine will append if it gets to the end of
1944 * the input. There may be more bytes of input than output (eg., \N{LATIN
1945 * CAPITAL LETTER A}), or more output than input if the constant ends up
1946 * recoded to utf8, but each time a construct is found that might increase
1947 * the needed size, SvGROW() is called. Its size parameter each time is
1948 * based on the best guess estimate at the time, namely the length used so
1949 * far, plus the length the current construct will occupy, plus room for
1950 * the trailing NUL, plus one byte for every input byte still unscanned */
1951
012bcf8d 1952 UV uv;
4c3a8340
TS
1953#ifdef EBCDIC
1954 UV literal_endpoint = 0;
e294cc5d 1955 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1956#endif
012bcf8d 1957
7918f24d
NC
1958 PERL_ARGS_ASSERT_SCAN_CONST;
1959
2b9d42f0
NIS
1960 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1961 /* If we are doing a trans and we know we want UTF8 set expectation */
1962 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1963 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1964 }
1965
1966
79072805 1967 while (s < send || dorange) {
02aa26ce 1968 /* get transliterations out of the way (they're most literal) */
3280af22 1969 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1970 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1971 if (dorange) {
1ba5c669
JH
1972 I32 i; /* current expanded character */
1973 I32 min; /* first character in range */
1974 I32 max; /* last character in range */
02aa26ce 1975
e294cc5d
JH
1976#ifdef EBCDIC
1977 UV uvmax = 0;
1978#endif
1979
1980 if (has_utf8
1981#ifdef EBCDIC
1982 && !native_range
1983#endif
1984 ) {
9d4ba2ae 1985 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1986 char *e = d++;
1987 while (e-- > c)
1988 *(e + 1) = *e;
25716404 1989 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1990 /* mark the range as done, and continue */
1991 dorange = FALSE;
1992 didrange = TRUE;
1993 continue;
1994 }
2b9d42f0 1995
95a20fc0 1996 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1997#ifdef EBCDIC
1998 SvGROW(sv,
1999 SvLEN(sv) + (has_utf8 ?
2000 (512 - UTF_CONTINUATION_MARK +
2001 UNISKIP(0x100))
2002 : 256));
2003 /* How many two-byte within 0..255: 128 in UTF-8,
2004 * 96 in UTF-8-mod. */
2005#else
9cbb5ea2 2006 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2007#endif
9cbb5ea2 2008 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2009#ifdef EBCDIC
2010 if (has_utf8) {
2011 int j;
2012 for (j = 0; j <= 1; j++) {
2013 char * const c = (char*)utf8_hop((U8*)d, -1);
2014 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2015 if (j)
2016 min = (U8)uv;
2017 else if (uv < 256)
2018 max = (U8)uv;
2019 else {
2020 max = (U8)0xff; /* only to \xff */
2021 uvmax = uv; /* \x{100} to uvmax */
2022 }
2023 d = c; /* eat endpoint chars */
2024 }
2025 }
2026 else {
2027#endif
2028 d -= 2; /* eat the first char and the - */
2029 min = (U8)*d; /* first char in range */
2030 max = (U8)d[1]; /* last char in range */
2031#ifdef EBCDIC
2032 }
2033#endif
8ada0baa 2034
c2e66d9e 2035 if (min > max) {
01ec43d0 2036 Perl_croak(aTHX_
d1573ac7 2037 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2038 (char)min, (char)max);
c2e66d9e
GS
2039 }
2040
c7f1f016 2041#ifdef EBCDIC
4c3a8340
TS
2042 if (literal_endpoint == 2 &&
2043 ((isLOWER(min) && isLOWER(max)) ||
2044 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2045 if (isLOWER(min)) {
2046 for (i = min; i <= max; i++)
2047 if (isLOWER(i))
db42d148 2048 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2049 } else {
2050 for (i = min; i <= max; i++)
2051 if (isUPPER(i))
db42d148 2052 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2053 }
2054 }
2055 else
2056#endif
2057 for (i = min; i <= max; i++)
e294cc5d
JH
2058#ifdef EBCDIC
2059 if (has_utf8) {
2060 const U8 ch = (U8)NATIVE_TO_UTF(i);
2061 if (UNI_IS_INVARIANT(ch))
2062 *d++ = (U8)i;
2063 else {
2064 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2065 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2066 }
2067 }
2068 else
2069#endif
2070 *d++ = (char)i;
2071
2072#ifdef EBCDIC
2073 if (uvmax) {
2074 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2075 if (uvmax > 0x101)
2076 *d++ = (char)UTF_TO_NATIVE(0xff);
2077 if (uvmax > 0x100)
2078 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2079 }
2080#endif
02aa26ce
NT
2081
2082 /* mark the range as done, and continue */
79072805 2083 dorange = FALSE;
01ec43d0 2084 didrange = TRUE;
4c3a8340
TS
2085#ifdef EBCDIC
2086 literal_endpoint = 0;
2087#endif
79072805 2088 continue;
4e553d73 2089 }
02aa26ce
NT
2090
2091 /* range begins (ignore - as first or last char) */
79072805 2092 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2093 if (didrange) {
1fafa243 2094 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2095 }
e294cc5d
JH
2096 if (has_utf8
2097#ifdef EBCDIC
2098 && !native_range
2099#endif
2100 ) {
25716404 2101 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2102 s++;
2103 continue;
2104 }
79072805
LW
2105 dorange = TRUE;
2106 s++;
01ec43d0
GS
2107 }
2108 else {
2109 didrange = FALSE;
4c3a8340
TS
2110#ifdef EBCDIC
2111 literal_endpoint = 0;
e294cc5d 2112 native_range = TRUE;
4c3a8340 2113#endif
01ec43d0 2114 }
79072805 2115 }
02aa26ce
NT
2116
2117 /* if we get here, we're not doing a transliteration */
2118
0f5d15d6
IZ
2119 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2120 except for the last char, which will be done separately. */
3280af22 2121 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2122 if (s[2] == '#') {
e994fd66 2123 while (s+1 < send && *s != ')')
db42d148 2124 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2125 }
2126 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2127 || (s[2] == '?' && s[3] == '{'))
155aba94 2128 {
cc6b7395 2129 I32 count = 1;
0f5d15d6 2130 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2131 char c;
2132
d9f97599
GS
2133 while (count && (c = *regparse)) {
2134 if (c == '\\' && regparse[1])
2135 regparse++;
4e553d73 2136 else if (c == '{')
cc6b7395 2137 count++;
4e553d73 2138 else if (c == '}')
cc6b7395 2139 count--;
d9f97599 2140 regparse++;
cc6b7395 2141 }
e994fd66 2142 if (*regparse != ')')
5bdf89e7 2143 regparse--; /* Leave one char for continuation. */
0f5d15d6 2144 while (s < regparse)
db42d148 2145 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2146 }
748a9306 2147 }
02aa26ce
NT
2148
2149 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2150 else if (*s == '#' && PL_lex_inpat &&
2151 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2152 while (s+1 < send && *s != '\n')
db42d148 2153 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2154 }
02aa26ce 2155
5d1d4326 2156 /* check for embedded arrays
da6eedaa 2157 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2158 */
1749ea0d
TS
2159 else if (*s == '@' && s[1]) {
2160 if (isALNUM_lazy_if(s+1,UTF))
2161 break;
2162 if (strchr(":'{$", s[1]))
2163 break;
2164 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2165 break; /* in regexp, neither @+ nor @- are interpolated */
2166 }
02aa26ce
NT
2167
2168 /* check for embedded scalars. only stop if we're sure it's a
2169 variable.
2170 */
79072805 2171 else if (*s == '$') {
3280af22 2172 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2173 break;
77772344 2174 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2175 if (s[1] == '\\') {
2176 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2177 "Possible unintended interpolation of $\\ in regex");
77772344 2178 }
79072805 2179 break; /* in regexp, $ might be tail anchor */
77772344 2180 }
79072805 2181 }
02aa26ce 2182
2b9d42f0
NIS
2183 /* End of else if chain - OP_TRANS rejoin rest */
2184
02aa26ce 2185 /* backslashes */
79072805
LW
2186 if (*s == '\\' && s+1 < send) {
2187 s++;
02aa26ce 2188
02aa26ce 2189 /* deprecate \1 in strings and substitution replacements */
3280af22 2190 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2191 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2192 {
a2a5de95 2193 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2194 *--s = '$';
2195 break;
2196 }
02aa26ce
NT
2197
2198 /* string-change backslash escapes */
3280af22 2199 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2200 --s;
2201 break;
2202 }
cc74c5bd
TS
2203 /* skip any other backslash escapes in a pattern */
2204 else if (PL_lex_inpat) {
2205 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2206 goto default_action;
2207 }
02aa26ce
NT
2208
2209 /* if we get here, it's either a quoted -, or a digit */
79072805 2210 switch (*s) {
02aa26ce
NT
2211
2212 /* quoted - in transliterations */
79072805 2213 case '-':
3280af22 2214 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2215 *d++ = *s++;
2216 continue;
2217 }
2218 /* FALL THROUGH */
2219 default:
11b8faa4 2220 {
a2a5de95
NC
2221 if ((isALPHA(*s) || isDIGIT(*s)))
2222 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2223 "Unrecognized escape \\%c passed through",
2224 *s);
11b8faa4 2225 /* default action is to copy the quoted character */
f9a63242 2226 goto default_action;
11b8faa4 2227 }
02aa26ce 2228
77a135fe 2229 /* eg. \132 indicates the octal constant 0x132 */
79072805
LW
2230 case '0': case '1': case '2': case '3':
2231 case '4': case '5': case '6': case '7':
ba210ebe 2232 {
53305cf1
NC
2233 I32 flags = 0;
2234 STRLEN len = 3;
77a135fe 2235 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2236 s += len;
2237 }
012bcf8d 2238 goto NUM_ESCAPE_INSERT;
02aa26ce 2239
77a135fe 2240 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2241 case 'x':
a0ed51b3
LW
2242 ++s;
2243 if (*s == '{') {
9d4ba2ae 2244 char* const e = strchr(s, '}');
a4c04bdc
NC
2245 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2246 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2247 STRLEN len;
355860ce 2248
53305cf1 2249 ++s;
adaeee49 2250 if (!e) {
a0ed51b3 2251 yyerror("Missing right brace on \\x{}");
355860ce 2252 continue;
ba210ebe 2253 }
53305cf1 2254 len = e - s;
77a135fe 2255 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2256 s = e + 1;
a0ed51b3
LW
2257 }
2258 else {
ba210ebe 2259 {
53305cf1 2260 STRLEN len = 2;
a4c04bdc 2261 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2262 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2263 s += len;
2264 }
012bcf8d
GS
2265 }
2266
2267 NUM_ESCAPE_INSERT:
77a135fe
KW
2268 /* Insert oct, hex, or \N{U+...} escaped character. There will
2269 * always be enough room in sv since such escapes will be
2270 * longer than any UTF-8 sequence they can end up as, except if
2271 * they force us to recode the rest of the string into utf8 */
ba7cea30 2272
77a135fe
KW
2273 /* Here uv is the ordinal of the next character being added in
2274 * unicode (converted from native). (It has to be done before
2275 * here because \N is interpreted as unicode, and oct and hex
2276 * as native.) */
2277 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 2278 if (!has_utf8 && uv > 255) {
77a135fe
KW
2279 /* Might need to recode whatever we have accumulated so
2280 * far if it contains any chars variant in utf8 or
2281 * utf-ebcdic. */
2282
2283 SvCUR_set(sv, d - SvPVX_const(sv));
2284 SvPOK_on(sv);
2285 *d = '\0';
77a135fe 2286 /* See Note on sizing above. */
7bf79863
KW
2287 sv_utf8_upgrade_flags_grow(sv,
2288 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2289 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
2290 d = SvPVX(sv) + SvCUR(sv);
2291 has_utf8 = TRUE;
012bcf8d
GS
2292 }
2293
77a135fe
KW
2294 if (has_utf8) {
2295 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
2296 if (PL_lex_inwhat == OP_TRANS &&
2297 PL_sublex_info.sub_op) {
2298 PL_sublex_info.sub_op->op_private |=
2299 (PL_lex_repl ? OPpTRANS_FROM_UTF
2300 : OPpTRANS_TO_UTF);
f9a63242 2301 }
e294cc5d
JH
2302#ifdef EBCDIC
2303 if (uv > 255 && !dorange)
2304 native_range = FALSE;
2305#endif
012bcf8d 2306 }
a0ed51b3 2307 else {
012bcf8d 2308 *d++ = (char)uv;
a0ed51b3 2309 }
012bcf8d
GS
2310 }
2311 else {
c4d5f83a 2312 *d++ = (char) uv;
a0ed51b3 2313 }
79072805 2314 continue;
02aa26ce 2315
77a135fe
KW
2316 /* \N{LATIN SMALL LETTER A} is a named character, and so is
2317 * \N{U+0041} */
4a2d328f 2318 case 'N':
55eda711 2319 ++s;
423cee85
JH
2320 if (*s == '{') {
2321 char* e = strchr(s, '}');
155aba94 2322 SV *res;
423cee85 2323 STRLEN len;
cfd0369c 2324 const char *str;
4e553d73 2325
423cee85 2326 if (!e) {
5777a3f7 2327 yyerror("Missing right brace on \\N{}");
423cee85
JH
2328 e = s - 1;
2329 goto cont_scan;
2330 }
dbc0d4f2 2331 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
77a135fe
KW
2332 /* \N{U+...} The ... is a unicode value even on EBCDIC
2333 * machines */
dbc0d4f2
JH
2334 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2335 PERL_SCAN_DISALLOW_PREFIX;
2336 s += 3;
2337 len = e - s;
2338 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2339 if ( e > s && len != (STRLEN)(e - s) ) {
2340 uv = 0xFFFD;
fc8cd66c 2341 }
dbc0d4f2
JH
2342 s = e + 1;
2343 goto NUM_ESCAPE_INSERT;
2344 }
55eda711 2345 res = newSVpvn(s + 1, e - s - 1);
bd61b366 2346 res = new_constant( NULL, 0, "charnames",
eb0d8d16 2347 res, NULL, s - 2, e - s + 3 );
f9a63242
JH
2348 if (has_utf8)
2349 sv_utf8_upgrade(res);
cfd0369c 2350 str = SvPV_const(res,len);
1c47067b
JH
2351#ifdef EBCDIC_NEVER_MIND
2352 /* charnames uses pack U and that has been
2353 * recently changed to do the below uni->native
2354 * mapping, so this would be redundant (and wrong,
2355 * the code point would be doubly converted).
2356 * But leave this in just in case the pack U change
2357 * gets revoked, but the semantics is still
2358 * desireable for charnames. --jhi */
cddc7ef4 2359 {
cfd0369c 2360 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2361
2362 if (uv < 0x100) {
89ebb4a3 2363 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2364
2365 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2366 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2367 str = SvPV_const(res, len);
cddc7ef4
JH
2368 }
2369 }
2370#endif
77a135fe
KW
2371 /* If destination is not in utf8 but this new character is,
2372 * recode the dest to utf8 */
89491803 2373 if (!has_utf8 && SvUTF8(res)) {
77a135fe 2374 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 2375 SvPOK_on(sv);
e4f3eed8 2376 *d = '\0';
77a135fe 2377 /* See Note on sizing above. */
7bf79863
KW
2378 sv_utf8_upgrade_flags_grow(sv,
2379 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2380 len + (STRLEN)(send - s) + 1);
f08d6ad9 2381 d = SvPVX(sv) + SvCUR(sv);
89491803 2382 has_utf8 = TRUE;
77a135fe 2383 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
423cee85 2384
77a135fe
KW
2385 /* See Note on sizing above. (NOTE: SvCUR() is not set
2386 * correctly here). */
2387 const STRLEN off = d - SvPVX_const(sv);
2388 d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
423cee85 2389 }
e294cc5d
JH
2390#ifdef EBCDIC
2391 if (!dorange)
2392 native_range = FALSE; /* \N{} is guessed to be Unicode */
2393#endif
423cee85
JH
2394 Copy(str, d, len, char);
2395 d += len;
2396 SvREFCNT_dec(res);
2397 cont_scan:
2398 s = e + 1;
2399 }
2400 else
5777a3f7 2401 yyerror("Missing braces on \\N{}");
423cee85
JH
2402 continue;
2403
02aa26ce 2404 /* \c is a control character */
79072805
LW
2405 case 'c':
2406 s++;
961ce445 2407 if (s < send) {
ba210ebe 2408 U8 c = *s++;
c7f1f016
NIS
2409#ifdef EBCDIC
2410 if (isLOWER(c))
2411 c = toUPPER(c);
2412#endif
db42d148 2413 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2414 }
961ce445
RGS
2415 else {
2416 yyerror("Missing control char name in \\c");
2417 }
79072805 2418 continue;
02aa26ce
NT
2419
2420 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2421 case 'b':
db42d148 2422 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2423 break;
2424 case 'n':
db42d148 2425 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2426 break;
2427 case 'r':
db42d148 2428 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2429 break;
2430 case 'f':
db42d148 2431 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2432 break;
2433 case 't':
db42d148 2434 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2435 break;
34a3fe2a 2436 case 'e':
db42d148 2437 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2438 break;
2439 case 'a':
db42d148 2440 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2441 break;
02aa26ce
NT
2442 } /* end switch */
2443
79072805
LW
2444 s++;
2445 continue;
02aa26ce 2446 } /* end if (backslash) */
4c3a8340
TS
2447#ifdef EBCDIC
2448 else
2449 literal_endpoint++;
2450#endif
02aa26ce 2451
f9a63242 2452 default_action:
77a135fe
KW
2453 /* If we started with encoded form, or already know we want it,
2454 then encode the next character */
2455 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 2456 STRLEN len = 1;
77a135fe
KW
2457
2458
2459 /* One might think that it is wasted effort in the case of the
2460 * source being utf8 (this_utf8 == TRUE) to take the next character
2461 * in the source, convert it to an unsigned value, and then convert
2462 * it back again. But the source has not been validated here. The
2463 * routine that does the conversion checks for errors like
2464 * malformed utf8 */
2465
5f66b61c
AL
2466 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2467 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
2468 if (!has_utf8) {
2469 SvCUR_set(sv, d - SvPVX_const(sv));
2470 SvPOK_on(sv);
2471 *d = '\0';
77a135fe 2472 /* See Note on sizing above. */
7bf79863
KW
2473 sv_utf8_upgrade_flags_grow(sv,
2474 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2475 need + (STRLEN)(send - s) + 1);
77a135fe
KW
2476 d = SvPVX(sv) + SvCUR(sv);
2477 has_utf8 = TRUE;
2478 } else if (need > len) {
2479 /* encoded value larger than old, may need extra space (NOTE:
2480 * SvCUR() is not set correctly here). See Note on sizing
2481 * above. */
9d4ba2ae 2482 const STRLEN off = d - SvPVX_const(sv);
77a135fe 2483 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 2484 }
77a135fe
KW
2485 s += len;
2486
5f66b61c 2487 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
2488#ifdef EBCDIC
2489 if (uv > 255 && !dorange)
2490 native_range = FALSE;
2491#endif
2b9d42f0
NIS
2492 }
2493 else {
2494 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2495 }
02aa26ce
NT
2496 } /* while loop to process each character */
2497
2498 /* terminate the string and set up the sv */
79072805 2499 *d = '\0';
95a20fc0 2500 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2501 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2502 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2503
79072805 2504 SvPOK_on(sv);
9f4817db 2505 if (PL_encoding && !has_utf8) {
d0063567
DK
2506 sv_recode_to_utf8(sv, PL_encoding);
2507 if (SvUTF8(sv))
2508 has_utf8 = TRUE;
9f4817db 2509 }
2b9d42f0 2510 if (has_utf8) {
7e2040f0 2511 SvUTF8_on(sv);
2b9d42f0 2512 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2513 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2514 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2515 }
2516 }
79072805 2517
02aa26ce 2518 /* shrink the sv if we allocated more than we used */
79072805 2519 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2520 SvPV_shrink_to_cur(sv);
79072805 2521 }
02aa26ce 2522
6154021b 2523 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 2524 if (s > PL_bufptr) {
eb0d8d16
NC
2525 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2526 const char *const key = PL_lex_inpat ? "qr" : "q";
2527 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2528 const char *type;
2529 STRLEN typelen;
2530
2531 if (PL_lex_inwhat == OP_TRANS) {
2532 type = "tr";
2533 typelen = 2;
2534 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2535 type = "s";
2536 typelen = 1;
2537 } else {
2538 type = "qq";
2539 typelen = 2;
2540 }
2541
2542 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2543 type, typelen);
2544 }
6154021b 2545 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2546 } else
8990e307 2547 SvREFCNT_dec(sv);
79072805
LW
2548 return s;
2549}
2550
ffb4593c
NT
2551/* S_intuit_more
2552 * Returns TRUE if there's more to the expression (e.g., a subscript),
2553 * FALSE otherwise.
ffb4593c
NT
2554 *
2555 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2556 *
2557 * ->[ and ->{ return TRUE
2558 * { and [ outside a pattern are always subscripts, so return TRUE
2559 * if we're outside a pattern and it's not { or [, then return FALSE
2560 * if we're in a pattern and the first char is a {
2561 * {4,5} (any digits around the comma) returns FALSE
2562 * if we're in a pattern and the first char is a [
2563 * [] returns FALSE
2564 * [SOMETHING] has a funky algorithm to decide whether it's a
2565 * character class or not. It has to deal with things like
2566 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2567 * anything else returns TRUE
2568 */
2569
9cbb5ea2
GS
2570/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2571
76e3520e 2572STATIC int
cea2e8a9 2573S_intuit_more(pTHX_ register char *s)
79072805 2574{
97aff369 2575 dVAR;
7918f24d
NC
2576
2577 PERL_ARGS_ASSERT_INTUIT_MORE;
2578
3280af22 2579 if (PL_lex_brackets)
79072805
LW
2580 return TRUE;
2581 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2582 return TRUE;
2583 if (*s != '{' && *s != '[')
2584 return FALSE;
3280af22 2585 if (!PL_lex_inpat)
79072805
LW
2586 return TRUE;
2587
2588 /* In a pattern, so maybe we have {n,m}. */
2589 if (*s == '{') {
2590 s++;
2591 if (!isDIGIT(*s))
2592 return TRUE;
2593 while (isDIGIT(*s))
2594 s++;
2595 if (*s == ',')
2596 s++;
2597 while (isDIGIT(*s))
2598 s++;
2599 if (*s == '}')
2600 return FALSE;
2601 return TRUE;
2602
2603 }
2604
2605 /* On the other hand, maybe we have a character class */
2606
2607 s++;
2608 if (*s == ']' || *s == '^')
2609 return FALSE;
2610 else {
ffb4593c 2611 /* this is terrifying, and it works */
79072805
LW
2612 int weight = 2; /* let's weigh the evidence */
2613 char seen[256];
f27ffc4a 2614 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2615 const char * const send = strchr(s,']');
3280af22 2616 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2617
2618 if (!send) /* has to be an expression */
2619 return TRUE;
2620
2621 Zero(seen,256,char);
2622 if (*s == '$')
2623 weight -= 3;
2624 else if (isDIGIT(*s)) {
2625 if (s[1] != ']') {
2626 if (isDIGIT(s[1]) && s[2] == ']')
2627 weight -= 10;
2628 }
2629 else
2630 weight -= 100;
2631 }
2632 for (; s < send; s++) {
2633 last_un_char = un_char;
2634 un_char = (unsigned char)*s;
2635 switch (*s) {
2636 case '@':
2637 case '&':
2638 case '$':
2639 weight -= seen[un_char] * 10;
7e2040f0 2640 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2641 int len;
8903cb82 2642 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2643 len = (int)strlen(tmpbuf);
2644 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2645 weight -= 100;
2646 else
2647 weight -= 10;
2648 }
2649 else if (*s == '$' && s[1] &&
93a17b20
LW
2650 strchr("[#!%*<>()-=",s[1])) {
2651 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2652 weight -= 10;
2653 else
2654 weight -= 1;
2655 }
2656 break;
2657 case '\\':
2658 un_char = 254;
2659 if (s[1]) {
93a17b20 2660 if (strchr("wds]",s[1]))
79072805 2661 weight += 100;
10edeb5d 2662 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2663 weight += 1;
93a17b20 2664 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2665 weight += 40;
2666 else if (isDIGIT(s[1])) {
2667 weight += 40;
2668 while (s[1] && isDIGIT(s[1]))
2669 s++;
2670 }
2671 }
2672 else
2673 weight += 100;
2674 break;
2675 case '-':
2676 if (s[1] == '\\')
2677 weight += 50;
93a17b20 2678 if (strchr("aA01! ",last_un_char))
79072805 2679 weight += 30;
93a17b20 2680 if (strchr("zZ79~",s[1]))
79072805 2681 weight += 30;
f27ffc4a
GS
2682 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2683 weight -= 5; /* cope with negative subscript */
79072805
LW
2684 break;
2685 default:
3792a11b
NC
2686 if (!isALNUM(last_un_char)
2687 && !(last_un_char == '$' || last_un_char == '@'
2688 || last_un_char == '&')
2689 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2690 char *d = tmpbuf;
2691 while (isALPHA(*s))
2692 *d++ = *s++;
2693 *d = '\0';
5458a98a 2694 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
2695 weight -= 150;
2696 }
2697 if (un_char == last_un_char + 1)
2698 weight += 5;
2699 weight -= seen[un_char];
2700 break;
2701 }
2702 seen[un_char]++;
2703 }
2704 if (weight >= 0) /* probably a character class */
2705 return FALSE;
2706 }
2707
2708 return TRUE;
2709}
ffed7fef 2710
ffb4593c
NT
2711/*
2712 * S_intuit_method
2713 *
2714 * Does all the checking to disambiguate
2715 * foo bar
2716 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2717 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2718 *
2719 * First argument is the stuff after the first token, e.g. "bar".
2720 *
2721 * Not a method if bar is a filehandle.
2722 * Not a method if foo is a subroutine prototyped to take a filehandle.
2723 * Not a method if it's really "Foo $bar"
2724 * Method if it's "foo $bar"
2725 * Not a method if it's really "print foo $bar"
2726 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2727 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2728 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2729 * =>
2730 */
2731
76e3520e 2732STATIC int
62d55b22 2733S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2734{
97aff369 2735 dVAR;
a0d0e21e 2736 char *s = start + (*start == '$');
3280af22 2737 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2738 STRLEN len;
2739 GV* indirgv;
5db06880
NC
2740#ifdef PERL_MAD
2741 int soff;
2742#endif
a0d0e21e 2743
7918f24d
NC
2744 PERL_ARGS_ASSERT_INTUIT_METHOD;
2745
a0d0e21e 2746 if (gv) {
62d55b22 2747 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2748 return 0;
62d55b22
NC
2749 if (cv) {
2750 if (SvPOK(cv)) {
2751 const char *proto = SvPVX_const(cv);
2752 if (proto) {
2753 if (*proto == ';')
2754 proto++;
2755 if (*proto == '*')
2756 return 0;
2757 }
b6c543e3
IZ
2758 }
2759 } else
c35e046a 2760 gv = NULL;
a0d0e21e 2761 }
8903cb82 2762 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2763 /* start is the beginning of the possible filehandle/object,
2764 * and s is the end of it
2765 * tmpbuf is a copy of it
2766 */
2767
a0d0e21e 2768 if (*start == '$') {
3ef1310e
RGS
2769 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2770 isUPPER(*PL_tokenbuf))
a0d0e21e 2771 return 0;
5db06880
NC
2772#ifdef PERL_MAD
2773 len = start - SvPVX(PL_linestr);
2774#endif
29595ff2 2775 s = PEEKSPACE(s);
f0092767 2776#ifdef PERL_MAD
5db06880
NC
2777 start = SvPVX(PL_linestr) + len;
2778#endif
3280af22
NIS
2779 PL_bufptr = start;
2780 PL_expect = XREF;
a0d0e21e
LW
2781 return *s == '(' ? FUNCMETH : METHOD;
2782 }
5458a98a 2783 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
2784 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2785 len -= 2;
2786 tmpbuf[len] = '\0';
5db06880
NC
2787#ifdef PERL_MAD
2788 soff = s - SvPVX(PL_linestr);
2789#endif
c3e0f903
GS
2790 goto bare_package;
2791 }
90e5519e 2792 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2793 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2794 return 0;
2795 /* filehandle or package name makes it a method */
da51bb9b 2796 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
2797#ifdef PERL_MAD
2798 soff = s - SvPVX(PL_linestr);
2799#endif
29595ff2 2800 s = PEEKSPACE(s);
3280af22 2801 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2802 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2803 bare_package:
cd81e915 2804 start_force(PL_curforce);
9ded7720 2805 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 2806 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 2807 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2808 if (PL_madskills)
2809 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2810 PL_expect = XTERM;
a0d0e21e 2811 force_next(WORD);
3280af22 2812 PL_bufptr = s;
5db06880
NC
2813#ifdef PERL_MAD
2814 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2815#endif
a0d0e21e
LW
2816 return *s == '(' ? FUNCMETH : METHOD;
2817 }
2818 }
2819 return 0;
2820}
2821
16d20bd9 2822/* Encoded script support. filter_add() effectively inserts a
4e553d73 2823 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2824 * Note that the filter function only applies to the current source file
2825 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2826 *
2827 * The datasv parameter (which may be NULL) can be used to pass
2828 * private data to this instance of the filter. The filter function
2829 * can recover the SV using the FILTER_DATA macro and use it to
2830 * store private buffers and state information.
2831 *
2832 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2833 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2834 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2835 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2836 * private use must be set using malloc'd pointers.
2837 */
16d20bd9
AD
2838
2839SV *
864dbfa3 2840Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2841{
97aff369 2842 dVAR;
f4c556ac 2843 if (!funcp)
a0714e2c 2844 return NULL;
f4c556ac 2845
5486870f
DM
2846 if (!PL_parser)
2847 return NULL;
2848
3280af22
NIS
2849 if (!PL_rsfp_filters)
2850 PL_rsfp_filters = newAV();
16d20bd9 2851 if (!datasv)
561b68a9 2852 datasv = newSV(0);
862a34c6 2853 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2854 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2855 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2856 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2857 FPTR2DPTR(void *, IoANY(datasv)),
2858 SvPV_nolen(datasv)));
3280af22
NIS
2859 av_unshift(PL_rsfp_filters, 1);
2860 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2861 return(datasv);
2862}
4e553d73 2863
16d20bd9
AD
2864
2865/* Delete most recently added instance of this filter function. */
a0d0e21e 2866void
864dbfa3 2867Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2868{
97aff369 2869 dVAR;
e0c19803 2870 SV *datasv;
24801a4b 2871
7918f24d
NC
2872 PERL_ARGS_ASSERT_FILTER_DEL;
2873
33073adb 2874#ifdef DEBUGGING
55662e27
JH
2875 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2876 FPTR2DPTR(void*, funcp)));
33073adb 2877#endif
5486870f 2878 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2879 return;
2880 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2881 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2882 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2883 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2884 IoANY(datasv) = (void *)NULL;
3280af22 2885 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2886
16d20bd9
AD
2887 return;
2888 }
2889 /* we need to search for the correct entry and clear it */
cea2e8a9 2890 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2891}
2892
2893
1de9afcd
RGS
2894/* Invoke the idxth filter function for the current rsfp. */
2895/* maxlen 0 = read one text line */
16d20bd9 2896I32
864dbfa3 2897Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2898{
97aff369 2899 dVAR;
16d20bd9
AD
2900 filter_t funcp;
2901 SV *datasv = NULL;
f482118e
NC
2902 /* This API is bad. It should have been using unsigned int for maxlen.
2903 Not sure if we want to change the API, but if not we should sanity
2904 check the value here. */
39cd7a59
NC
2905 const unsigned int correct_length
2906 = maxlen < 0 ?
2907#ifdef PERL_MICRO
2908 0x7FFFFFFF
2909#else
2910 INT_MAX
2911#endif
2912 : maxlen;
e50aee73 2913
7918f24d
NC
2914 PERL_ARGS_ASSERT_FILTER_READ;
2915
5486870f 2916 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 2917 return -1;
1de9afcd 2918 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2919 /* Provide a default input filter to make life easy. */
2920 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2921 DEBUG_P(PerlIO_printf(Perl_debug_log,
2922 "filter_read %d: from rsfp\n", idx));
f482118e 2923 if (correct_length) {
16d20bd9
AD
2924 /* Want a block */
2925 int len ;
f54cb97a 2926 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2927
2928 /* ensure buf_sv is large enough */
f482118e
NC
2929 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2930 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2931 correct_length)) <= 0) {
3280af22 2932 if (PerlIO_error(PL_rsfp))
37120919
AD
2933 return -1; /* error */
2934 else
2935 return 0 ; /* end of file */
2936 }
16d20bd9
AD
2937 SvCUR_set(buf_sv, old_len + len) ;
2938 } else {
2939 /* Want a line */
3280af22
NIS
2940 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2941 if (PerlIO_error(PL_rsfp))
37120919
AD
2942 return -1; /* error */
2943 else
2944 return 0 ; /* end of file */
2945 }
16d20bd9
AD
2946 }
2947 return SvCUR(buf_sv);
2948 }
2949 /* Skip this filter slot if filter has been deleted */
1de9afcd 2950 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2951 DEBUG_P(PerlIO_printf(Perl_debug_log,
2952 "filter_read %d: skipped (filter deleted)\n",
2953 idx));
f482118e 2954 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2955 }
2956 /* Get function pointer hidden within datasv */
8141890a 2957 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2958 DEBUG_P(PerlIO_printf(Perl_debug_log,
2959 "filter_read %d: via function %p (%s)\n",
ca0270c4 2960 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2961 /* Call function. The function is expected to */
2962 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2963 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2964 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2965}
2966
76e3520e 2967STATIC char *
cea2e8a9 2968S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2969{
97aff369 2970 dVAR;
7918f24d
NC
2971
2972 PERL_ARGS_ASSERT_FILTER_GETS;
2973
c39cd008 2974#ifdef PERL_CR_FILTER
3280af22 2975 if (!PL_rsfp_filters) {
c39cd008 2976 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2977 }
2978#endif
3280af22 2979 if (PL_rsfp_filters) {
55497cff 2980 if (!append)
2981 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2982 if (FILTER_READ(0, sv, 0) > 0)
2983 return ( SvPVX(sv) ) ;
2984 else
bd61b366 2985 return NULL ;
16d20bd9 2986 }
9d116dd7 2987 else
fd049845 2988 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2989}
2990
01ec43d0 2991STATIC HV *
9bde8eb0 2992S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 2993{
97aff369 2994 dVAR;
def3634b
GS
2995 GV *gv;
2996
7918f24d
NC
2997 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
2998
01ec43d0 2999 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
3000 return PL_curstash;
3001
3002 if (len > 2 &&
3003 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3004 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3005 {
3006 return GvHV(gv); /* Foo:: */
def3634b
GS
3007 }
3008
3009 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3010 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3011 if (gv && GvCV(gv)) {
3012 SV * const sv = cv_const_sv(GvCV(gv));
3013 if (sv)
9bde8eb0 3014 pkgname = SvPV_const(sv, len);
def3634b
GS
3015 }
3016
9bde8eb0 3017 return gv_stashpvn(pkgname, len, 0);
def3634b 3018}
a0d0e21e 3019
e3f73d4e
RGS
3020/*
3021 * S_readpipe_override
3022 * Check whether readpipe() is overriden, and generates the appropriate
3023 * optree, provided sublex_start() is called afterwards.
3024 */
3025STATIC void
1d51329b 3026S_readpipe_override(pTHX)
e3f73d4e
RGS
3027{
3028 GV **gvp;
3029 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 3030 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
3031 if ((gv_readpipe
3032 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3033 ||
3034 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 3035 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
3036 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3037 {
3038 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3039 append_elem(OP_LIST,
3040 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3041 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3042 }
e3f73d4e
RGS
3043}
3044
5db06880
NC
3045#ifdef PERL_MAD
3046 /*
3047 * Perl_madlex
3048 * The intent of this yylex wrapper is to minimize the changes to the
3049 * tokener when we aren't interested in collecting madprops. It remains
3050 * to be seen how successful this strategy will be...
3051 */
3052
3053int
3054Perl_madlex(pTHX)
3055{
3056 int optype;
3057 char *s = PL_bufptr;
3058
cd81e915
NC
3059 /* make sure PL_thiswhite is initialized */
3060 PL_thiswhite = 0;
3061 PL_thismad = 0;
5db06880 3062
cd81e915 3063 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
3064 if (PL_pending_ident)
3065 return S_pending_ident(aTHX);
3066
3067 /* previous token ate up our whitespace? */
cd81e915
NC
3068 if (!PL_lasttoke && PL_nextwhite) {
3069 PL_thiswhite = PL_nextwhite;
3070 PL_nextwhite = 0;
5db06880
NC
3071 }
3072
3073 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3074 PL_realtokenstart = -1;
3075 PL_thistoken = 0;
5db06880
NC
3076 optype = yylex();
3077 s = PL_bufptr;
cd81e915 3078 assert(PL_curforce < 0);
5db06880 3079
cd81e915
NC
3080 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3081 if (!PL_thistoken) {
3082 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3083 PL_thistoken = newSVpvs("");
5db06880 3084 else {
c35e046a 3085 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3086 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3087 }
3088 }
cd81e915
NC
3089 if (PL_thismad) /* install head */
3090 CURMAD('X', PL_thistoken);
5db06880
NC
3091 }
3092
3093 /* last whitespace of a sublex? */
cd81e915
NC
3094 if (optype == ')' && PL_endwhite) {
3095 CURMAD('X', PL_endwhite);
5db06880
NC
3096 }
3097
cd81e915 3098 if (!PL_thismad) {
5db06880
NC
3099
3100 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3101 if (!PL_thiswhite && !PL_endwhite && !optype) {
3102 sv_free(PL_thistoken);
3103 PL_thistoken = 0;
5db06880
NC
3104 return 0;
3105 }
3106
3107 /* put off final whitespace till peg */
3108 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3109 PL_nextwhite = PL_thiswhite;
3110 PL_thiswhite = 0;
5db06880 3111 }
cd81e915
NC
3112 else if (PL_thisopen) {
3113 CURMAD('q', PL_thisopen);
3114 if (PL_thistoken)
3115 sv_free(PL_thistoken);
3116 PL_thistoken = 0;
5db06880
NC
3117 }
3118 else {
3119 /* Store actual token text as madprop X */
cd81e915 3120 CURMAD('X', PL_thistoken);
5db06880
NC
3121 }
3122
cd81e915 3123 if (PL_thiswhite) {
5db06880 3124 /* add preceding whitespace as madprop _ */
cd81e915 3125 CURMAD('_', PL_thiswhite);
5db06880
NC
3126 }
3127
cd81e915 3128 if (PL_thisstuff) {
5db06880 3129 /* add quoted material as madprop = */
cd81e915 3130 CURMAD('=', PL_thisstuff);
5db06880
NC
3131 }
3132
cd81e915 3133 if (PL_thisclose) {
5db06880 3134 /* add terminating quote as madprop Q */
cd81e915 3135 CURMAD('Q', PL_thisclose);
5db06880
NC
3136 }
3137 }
3138
3139 /* special processing based on optype */
3140
3141 switch (optype) {
3142
3143 /* opval doesn't need a TOKEN since it can already store mp */
3144 case WORD:
3145 case METHOD:
3146 case FUNCMETH:
3147 case THING:
3148 case PMFUNC:
3149 case PRIVATEREF:
3150 case FUNC0SUB:
3151 case UNIOPSUB:
3152 case LSTOPSUB:
6154021b
RGS
3153 if (pl_yylval.opval)
3154 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 3155 PL_thismad = 0;
5db06880
NC
3156 return optype;
3157
3158 /* fake EOF */
3159 case 0:
3160 optype = PEG;
cd81e915
NC
3161 if (PL_endwhite) {
3162 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3163 PL_endwhite = 0;
5db06880
NC
3164 }
3165 break;
3166
3167 case ']':
3168 case '}':
cd81e915 3169 if (PL_faketokens)
5db06880
NC
3170 break;
3171 /* remember any fake bracket that lexer is about to discard */
3172 if (PL_lex_brackets == 1 &&
3173 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3174 {
3175 s = PL_bufptr;
3176 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3177 s++;
3178 if (*s == '}') {
cd81e915
NC
3179 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3180 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3181 PL_thiswhite = 0;
5db06880
NC
3182 PL_bufptr = s - 1;
3183 break; /* don't bother looking for trailing comment */
3184 }
3185 else
3186 s = PL_bufptr;
3187 }
3188 if (optype == ']')
3189 break;
3190 /* FALLTHROUGH */
3191
3192 /* attach a trailing comment to its statement instead of next token */
3193 case ';':
cd81e915 3194 if (PL_faketokens)
5db06880
NC
3195 break;
3196 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3197 s = PL_bufptr;
3198 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3199 s++;
3200 if (*s == '\n' || *s == '#') {
3201 while (s < PL_bufend && *s != '\n')
3202 s++;
3203 if (s < PL_bufend)
3204 s++;
cd81e915
NC
3205 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3206 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3207 PL_thiswhite = 0;
5db06880
NC
3208 PL_bufptr = s;
3209 }
3210 }
3211 break;
3212
3213 /* pval */
3214 case LABEL:
3215 break;
3216
3217 /* ival */
3218 default:
3219 break;
3220
3221 }
3222
3223 /* Create new token struct. Note: opvals return early above. */
6154021b 3224 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 3225 PL_thismad = 0;
5db06880
NC
3226 return optype;
3227}
3228#endif
3229
468aa647 3230STATIC char *
cc6ed77d 3231S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3232 dVAR;
7918f24d
NC
3233
3234 PERL_ARGS_ASSERT_TOKENIZE_USE;
3235
468aa647
RGS
3236 if (PL_expect != XSTATE)
3237 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3238 is_use ? "use" : "no"));
29595ff2 3239 s = SKIPSPACE1(s);
468aa647
RGS
3240 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3241 s = force_version(s, TRUE);
29595ff2 3242 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3243 start_force(PL_curforce);
9ded7720 3244 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3245 force_next(WORD);
3246 }
3247 else if (*s == 'v') {
3248 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3249 s = force_version(s, FALSE);
3250 }
3251 }
3252 else {
3253 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3254 s = force_version(s, FALSE);
3255 }
6154021b 3256 pl_yylval.ival = is_use;
468aa647
RGS
3257 return s;
3258}
748a9306 3259#ifdef DEBUGGING
27da23d5 3260 static const char* const exp_name[] =
09bef843 3261 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3262 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3263 };
748a9306 3264#endif
463ee0b2 3265
02aa26ce
NT
3266/*
3267 yylex
3268
3269 Works out what to call the token just pulled out of the input
3270 stream. The yacc parser takes care of taking the ops we return and
3271 stitching them into a tree.
3272
3273 Returns:
3274 PRIVATEREF
3275
3276 Structure:
3277 if read an identifier
3278 if we're in a my declaration
3279 croak if they tried to say my($foo::bar)
3280 build the ops for a my() declaration
3281 if it's an access to a my() variable
3282 are we in a sort block?
3283 croak if my($a); $a <=> $b
3284 build ops for access to a my() variable
3285 if in a dq string, and they've said @foo and we can't find @foo
3286 croak
3287 build ops for a bareword
3288 if we already built the token before, use it.
3289*/
3290
20141f0e 3291
dba4d153
JH
3292#ifdef __SC__
3293#pragma segment Perl_yylex
3294#endif
dba4d153 3295int
dba4d153 3296Perl_yylex(pTHX)
20141f0e 3297{
97aff369 3298 dVAR;
3afc138a 3299 register char *s = PL_bufptr;
378cc40b 3300 register char *d;
463ee0b2 3301 STRLEN len;
aa7440fb 3302 bool bof = FALSE;
a687059c 3303
10edeb5d
JH
3304 /* orig_keyword, gvp, and gv are initialized here because
3305 * jump to the label just_a_word_zero can bypass their
3306 * initialization later. */
3307 I32 orig_keyword = 0;
3308 GV *gv = NULL;
3309 GV **gvp = NULL;
3310
bbf60fe6 3311 DEBUG_T( {
396482e1 3312 SV* tmp = newSVpvs("");
b6007c36
DM
3313 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3314 (IV)CopLINE(PL_curcop),
3315 lex_state_names[PL_lex_state],
3316 exp_name[PL_expect],
3317 pv_display(tmp, s, strlen(s), 0, 60));
3318 SvREFCNT_dec(tmp);
bbf60fe6 3319 } );
02aa26ce 3320 /* check if there's an identifier for us to look at */
ba979b31 3321 if (PL_pending_ident)
bbf60fe6 3322 return REPORT(S_pending_ident(aTHX));
bbce6d69 3323
02aa26ce
NT
3324 /* no identifier pending identification */
3325
3280af22 3326 switch (PL_lex_state) {
79072805
LW
3327#ifdef COMMENTARY
3328 case LEX_NORMAL: /* Some compilers will produce faster */
3329 case LEX_INTERPNORMAL: /* code if we comment these out. */
3330 break;
3331#endif
3332
09bef843 3333 /* when we've already built the next token, just pull it out of the queue */
79072805 3334 case LEX_KNOWNEXT:
5db06880
NC
3335#ifdef PERL_MAD
3336 PL_lasttoke--;
6154021b 3337 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 3338 if (PL_madskills) {
cd81e915 3339 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3340 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 3341 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 3342 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
3343 PL_thismad->mad_val = 0;
3344 mad_free(PL_thismad);
3345 PL_thismad = 0;
5db06880
NC
3346 }
3347 }
3348 if (!PL_lasttoke) {
3349 PL_lex_state = PL_lex_defer;
3350 PL_expect = PL_lex_expect;
3351 PL_lex_defer = LEX_NORMAL;
3352 if (!PL_nexttoke[PL_lasttoke].next_type)
3353 return yylex();
3354 }
3355#else
3280af22 3356 PL_nexttoke--;
6154021b 3357 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3358 if (!PL_nexttoke) {
3359 PL_lex_state = PL_lex_defer;
3360 PL_expect = PL_lex_expect;
3361 PL_lex_defer = LEX_NORMAL;
463ee0b2 3362 }
5db06880
NC
3363#endif
3364#ifdef PERL_MAD
3365 /* FIXME - can these be merged? */
3366 return(PL_nexttoke[PL_lasttoke].next_type);
3367#else
bbf60fe6 3368 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3369#endif
79072805 3370
02aa26ce 3371 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3372 when we get here, PL_bufptr is at the \
02aa26ce 3373 */
79072805
LW
3374 case LEX_INTERPCASEMOD:
3375#ifdef DEBUGGING
3280af22 3376 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3377 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3378#endif
02aa26ce 3379 /* handle \E or end of string */
3280af22 3380 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3381 /* if at a \E */
3280af22 3382 if (PL_lex_casemods) {
f54cb97a 3383 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3384 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3385
3792a11b
NC
3386 if (PL_bufptr != PL_bufend
3387 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3388 PL_bufptr += 2;
3389 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3390#ifdef PERL_MAD
3391 if (PL_madskills)
6b29d1f5 3392 PL_thistoken = newSVpvs("\\E");
5db06880 3393#endif
a0d0e21e 3394 }
bbf60fe6 3395 return REPORT(')');
79072805 3396 }
5db06880
NC
3397#ifdef PERL_MAD
3398 while (PL_bufptr != PL_bufend &&
3399 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3400 if (!PL_thiswhite)
6b29d1f5 3401 PL_thiswhite = newSVpvs("");
cd81e915 3402 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3403 PL_bufptr += 2;
3404 }
3405#else
3280af22
NIS
3406 if (PL_bufptr != PL_bufend)
3407 PL_bufptr += 2;
5db06880 3408#endif
3280af22 3409 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3410 return yylex();
79072805
LW
3411 }
3412 else {
607df283 3413 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3414 "### Saw case modifier\n"); });
3280af22 3415 s = PL_bufptr + 1;
6e909404 3416 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3417#ifdef PERL_MAD
cd81e915 3418 if (!PL_thiswhite)
6b29d1f5 3419 PL_thiswhite = newSVpvs("");
cd81e915 3420 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3421#endif
89122651 3422 PL_bufptr = s + 3;
6e909404
JH
3423 PL_lex_state = LEX_INTERPCONCAT;
3424 return yylex();
a0d0e21e 3425 }
6e909404 3426 else {
90771dc0 3427 I32 tmp;
5db06880
NC
3428 if (!PL_madskills) /* when just compiling don't need correct */
3429 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3430 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3431 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3432 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3433 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3434 return REPORT(')');
6e909404
JH
3435 }
3436 if (PL_lex_casemods > 10)
3437 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3438 PL_lex_casestack[PL_lex_casemods++] = *s;
3439 PL_lex_casestack[PL_lex_casemods] = '\0';
3440 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3441 start_force(PL_curforce);
9ded7720 3442 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3443 force_next('(');
cd81e915 3444 start_force(PL_curforce);
6e909404 3445 if (*s == 'l')
9ded7720 3446 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3447 else if (*s == 'u')
9ded7720 3448 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3449 else if (*s == 'L')
9ded7720 3450 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3451 else if (*s == 'U')
9ded7720 3452 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3453 else if (*s == 'Q')
9ded7720 3454 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3455 else
3456 Perl_croak(aTHX_ "panic: yylex");
5db06880 3457 if (PL_madskills) {
a5849ce5
NC
3458 SV* const tmpsv = newSVpvs("\\ ");
3459 /* replace the space with the character we want to escape
3460 */
3461 SvPVX(tmpsv)[1] = *s;
5db06880
NC
3462 curmad('_', tmpsv);
3463 }
6e909404 3464 PL_bufptr = s + 1;
a0d0e21e 3465 }
79072805 3466 force_next(FUNC);
3280af22
NIS
3467 if (PL_lex_starts) {
3468 s = PL_bufptr;
3469 PL_lex_starts = 0;
5db06880
NC
3470#ifdef PERL_MAD
3471 if (PL_madskills) {
cd81e915
NC
3472 if (PL_thistoken)
3473 sv_free(PL_thistoken);
6b29d1f5 3474 PL_thistoken = newSVpvs("");
5db06880
NC
3475 }
3476#endif
131b3ad0
DM
3477 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3478 if (PL_lex_casemods == 1 && PL_lex_inpat)
3479 OPERATOR(',');
3480 else
3481 Aop(OP_CONCAT);
79072805
LW
3482 }
3483 else
cea2e8a9 3484 return yylex();
79072805
LW
3485 }
3486
55497cff 3487 case LEX_INTERPPUSH:
bbf60fe6 3488 return REPORT(sublex_push());
55497cff 3489
79072805 3490 case LEX_INTERPSTART:
3280af22 3491 if (PL_bufptr == PL_bufend)
bbf60fe6 3492 return REPORT(sublex_done());
607df283 3493 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3494 "### Interpolated variable\n"); });
3280af22
NIS
3495 PL_expect = XTERM;
3496 PL_lex_dojoin = (*PL_bufptr == '@');
3497 PL_lex_state = LEX_INTERPNORMAL;
3498 if (PL_lex_dojoin) {
cd81e915 3499 start_force(PL_curforce);
9ded7720 3500 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3501 force_next(',');
cd81e915 3502 start_force(PL_curforce);
a0d0e21e 3503 force_ident("\"", '$');
cd81e915 3504 start_force(PL_curforce);
9ded7720 3505 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3506 force_next('$');
cd81e915 3507 start_force(PL_curforce);
9ded7720 3508 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3509 force_next('(');
cd81e915 3510 start_force(PL_curforce);
9ded7720 3511 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3512 force_next(FUNC);
3513 }
3280af22
NIS
3514 if (PL_lex_starts++) {
3515 s = PL_bufptr;
5db06880
NC
3516#ifdef PERL_MAD
3517 if (PL_madskills) {
cd81e915
NC
3518 if (PL_thistoken)
3519 sv_free(PL_thistoken);
6b29d1f5 3520 PL_thistoken = newSVpvs("");
5db06880
NC
3521 }
3522#endif
131b3ad0
DM
3523 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3524 if (!PL_lex_casemods && PL_lex_inpat)
3525 OPERATOR(',');
3526 else
3527 Aop(OP_CONCAT);
79072805 3528 }
cea2e8a9 3529 return yylex();
79072805
LW
3530
3531 ca