This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: [PATCH] xsubpp prototypes warnings
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
be3c0a43 3 * Copyright (c) 1991-2002, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
4e553d73 16 * parser, perly.y.
ffb4593c
NT
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
acfe0abc 30static void restore_rsfp(pTHX_ void *f);
6e3aabd6 31#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
32static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
39e02b42
JH
39#ifdef USE_UTF8_SCRIPTS
40# define UTF (!IN_BYTES)
2b9d42f0 41#else
39e02b42
JH
42# ifdef EBCDIC /* For now 'use utf8' does not affect tokenizer on EBCDIC */
43# define UTF (PL_linestr && DO_UTF8(PL_linestr))
44# else
45# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
46# endif
2b9d42f0 47#endif
a0ed51b3 48
61f0cdd9 49/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
50 * 1999-02-27 mjd-perl-patch@plover.com */
51#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
52
bf4acbe4
GS
53/* On MacOS, respect nonbreaking spaces */
54#ifdef MACOS_TRADITIONAL
55#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
56#else
57#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
58#endif
59
ffb4593c
NT
60/* LEX_* are values for PL_lex_state, the state of the lexer.
61 * They are arranged oddly so that the guard on the switch statement
79072805
LW
62 * can get by with a single comparison (if the compiler is smart enough).
63 */
64
fb73857a
PP
65/* #define LEX_NOTPARSING 11 is done in perl.h. */
66
55497cff
PP
67#define LEX_NORMAL 10
68#define LEX_INTERPNORMAL 9
69#define LEX_INTERPCASEMOD 8
70#define LEX_INTERPPUSH 7
71#define LEX_INTERPSTART 6
72#define LEX_INTERPEND 5
73#define LEX_INTERPENDMAYBE 4
74#define LEX_INTERPCONCAT 3
75#define LEX_INTERPCONST 2
76#define LEX_FORMLINE 1
77#define LEX_KNOWNEXT 0
79072805 78
79072805
LW
79#ifdef ff_next
80#undef ff_next
d48672a2
LW
81#endif
82
a1a0e61e 83#ifdef USE_PURE_BISON
dba4d153
JH
84# ifndef YYMAXLEVEL
85# define YYMAXLEVEL 100
86# endif
20141f0e
RI
87YYSTYPE* yylval_pointer[YYMAXLEVEL];
88int* yychar_pointer[YYMAXLEVEL];
6f202aea 89int yyactlevel = -1;
22c35a8c
GS
90# undef yylval
91# undef yychar
20141f0e
RI
92# define yylval (*yylval_pointer[yyactlevel])
93# define yychar (*yychar_pointer[yyactlevel])
94# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 95# undef yylex
dba4d153 96# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
97#endif
98
79072805 99#include "keywords.h"
fe14fcc3 100
ffb4593c
NT
101/* CLINE is a macro that ensures PL_copline has a sane value */
102
ae986130
LW
103#ifdef CLINE
104#undef CLINE
105#endif
57843af0 106#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 107
ffb4593c
NT
108/*
109 * Convenience functions to return different tokens and prime the
9cbb5ea2 110 * lexer for the next token. They all take an argument.
ffb4593c
NT
111 *
112 * TOKEN : generic token (used for '(', DOLSHARP, etc)
113 * OPERATOR : generic operator
114 * AOPERATOR : assignment operator
115 * PREBLOCK : beginning the block after an if, while, foreach, ...
116 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
117 * PREREF : *EXPR where EXPR is not a simple identifier
118 * TERM : expression term
119 * LOOPX : loop exiting command (goto, last, dump, etc)
120 * FTST : file test operator
121 * FUN0 : zero-argument function
2d2e263d 122 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
123 * BOop : bitwise or or xor
124 * BAop : bitwise and
125 * SHop : shift operator
126 * PWop : power operator
9cbb5ea2 127 * PMop : pattern-matching operator
ffb4593c
NT
128 * Aop : addition-level operator
129 * Mop : multiplication-level operator
130 * Eop : equality-testing operator
e5edeb50 131 * Rop : relational operator <= != gt
ffb4593c
NT
132 *
133 * Also see LOP and lop() below.
134 */
135
075953c3
JH
136/* Note that REPORT() and REPORT2() will be expressions that supply
137 * their own trailing comma, not suitable for statements as such. */
998054bd 138#ifdef DEBUGGING /* Serve -DT. */
075953c3
JH
139# define REPORT(x,retval) tokereport(x,s,(int)retval),
140# define REPORT2(x,retval) tokereport(x,s, yylval.ival),
998054bd 141#else
075953c3
JH
142# define REPORT(x,retval)
143# define REPORT2(x,retval)
998054bd
SC
144#endif
145
075953c3
JH
146#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
147#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
148#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
149#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
150#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
151#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
152#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
153#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
154#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
155#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
156#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
157#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
158#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
159#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
160#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
161#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
162#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
163#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
164#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
165#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 166
a687059c
LW
167/* This bit of chicanery makes a unary function followed by
168 * a parenthesis into a function with one argument, highest precedence.
169 */
2f3197b3 170#define UNI(f) return(yylval.ival = f, \
075953c3 171 REPORT("uni",f) \
3280af22
NIS
172 PL_expect = XTERM, \
173 PL_bufptr = s, \
174 PL_last_uni = PL_oldbufptr, \
175 PL_last_lop_op = f, \
a687059c
LW
176 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
177
79072805 178#define UNIBRACK(f) return(yylval.ival = f, \
075953c3 179 REPORT("uni",f) \
3280af22
NIS
180 PL_bufptr = s, \
181 PL_last_uni = PL_oldbufptr, \
79072805
LW
182 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
183
9f68db38 184/* grandfather return to old style */
3280af22 185#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 186
8fa7f367
JH
187#ifdef DEBUGGING
188
2d00ba3b 189STATIC void
61b2116b 190S_tokereport(pTHX_ char *thing, char* s, I32 rv)
9041c2e3 191{
998054bd 192 DEBUG_T({
9c5ffd7c 193 SV* report = newSVpv(thing, 0);
29b291f7
RB
194 Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop),
195 (IV)rv);
998054bd
SC
196
197 if (s - PL_bufptr > 0)
198 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
199 else {
200 if (PL_oldbufptr && *PL_oldbufptr)
201 sv_catpv(report, PL_tokenbuf);
202 }
203 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
5f80b19c 204 });
998054bd
SC
205}
206
8fa7f367
JH
207#endif
208
ffb4593c
NT
209/*
210 * S_ao
211 *
212 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
213 * into an OP_ANDASSIGN or OP_ORASSIGN
214 */
215
76e3520e 216STATIC int
cea2e8a9 217S_ao(pTHX_ int toketype)
a0d0e21e 218{
3280af22
NIS
219 if (*PL_bufptr == '=') {
220 PL_bufptr++;
a0d0e21e
LW
221 if (toketype == ANDAND)
222 yylval.ival = OP_ANDASSIGN;
223 else if (toketype == OROR)
224 yylval.ival = OP_ORASSIGN;
225 toketype = ASSIGNOP;
226 }
227 return toketype;
228}
229
ffb4593c
NT
230/*
231 * S_no_op
232 * When Perl expects an operator and finds something else, no_op
233 * prints the warning. It always prints "<something> found where
234 * operator expected. It prints "Missing semicolon on previous line?"
235 * if the surprise occurs at the start of the line. "do you need to
236 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
237 * where the compiler doesn't know if foo is a method call or a function.
238 * It prints "Missing operator before end of line" if there's nothing
239 * after the missing operator, or "... before <...>" if there is something
240 * after the missing operator.
241 */
242
76e3520e 243STATIC void
cea2e8a9 244S_no_op(pTHX_ char *what, char *s)
463ee0b2 245{
3280af22
NIS
246 char *oldbp = PL_bufptr;
247 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 248
1189a94a
GS
249 if (!s)
250 s = oldbp;
07c798fb 251 else
1189a94a 252 PL_bufptr = s;
cea2e8a9 253 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 254 if (is_first)
cea2e8a9 255 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 256 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 257 char *t;
7e2040f0 258 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 259 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 260 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 261 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 262 }
07c798fb
HS
263 else {
264 assert(s >= oldbp);
cea2e8a9 265 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 266 }
3280af22 267 PL_bufptr = oldbp;
8990e307
LW
268}
269
ffb4593c
NT
270/*
271 * S_missingterm
272 * Complain about missing quote/regexp/heredoc terminator.
273 * If it's called with (char *)NULL then it cauterizes the line buffer.
274 * If we're in a delimited string and the delimiter is a control
275 * character, it's reformatted into a two-char sequence like ^C.
276 * This is fatal.
277 */
278
76e3520e 279STATIC void
cea2e8a9 280S_missingterm(pTHX_ char *s)
8990e307
LW
281{
282 char tmpbuf[3];
283 char q;
284 if (s) {
285 char *nl = strrchr(s,'\n');
d2719217 286 if (nl)
8990e307
LW
287 *nl = '\0';
288 }
9d116dd7
JH
289 else if (
290#ifdef EBCDIC
291 iscntrl(PL_multi_close)
292#else
293 PL_multi_close < 32 || PL_multi_close == 127
294#endif
295 ) {
8990e307 296 *tmpbuf = '^';
3280af22 297 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
298 s = "\\n";
299 tmpbuf[2] = '\0';
300 s = tmpbuf;
301 }
302 else {
3280af22 303 *tmpbuf = PL_multi_close;
8990e307
LW
304 tmpbuf[1] = '\0';
305 s = tmpbuf;
306 }
307 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 308 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 309}
79072805 310
ffb4593c
NT
311/*
312 * Perl_deprecate
ffb4593c
NT
313 */
314
79072805 315void
864dbfa3 316Perl_deprecate(pTHX_ char *s)
a0d0e21e 317{
599cee73 318 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 319 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
320}
321
12bcd1a6
PM
322void
323Perl_deprecate_old(pTHX_ char *s)
324{
325 /* This function should NOT be called for any new deprecated warnings */
326 /* Use Perl_deprecate instead */
327 /* */
328 /* It is here to maintain backward compatibility with the pre-5.8 */
329 /* warnings category hierarchy. The "deprecated" category used to */
330 /* live under the "syntax" category. It is now a top-level category */
331 /* in its own right. */
332
333 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
334 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
335 "Use of %s is deprecated", s);
336}
337
ffb4593c
NT
338/*
339 * depcom
9cbb5ea2 340 * Deprecate a comma-less variable list.
ffb4593c
NT
341 */
342
76e3520e 343STATIC void
cea2e8a9 344S_depcom(pTHX)
a0d0e21e 345{
12bcd1a6 346 deprecate_old("comma-less variable list");
a0d0e21e
LW
347}
348
ffb4593c 349/*
9cbb5ea2
GS
350 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
351 * utf16-to-utf8-reversed.
ffb4593c
NT
352 */
353
c39cd008
GS
354#ifdef PERL_CR_FILTER
355static void
356strip_return(SV *sv)
357{
358 register char *s = SvPVX(sv);
359 register char *e = s + SvCUR(sv);
360 /* outer loop optimized to do nothing if there are no CR-LFs */
361 while (s < e) {
362 if (*s++ == '\r' && *s == '\n') {
363 /* hit a CR-LF, need to copy the rest */
364 register char *d = s - 1;
365 *d++ = *s++;
366 while (s < e) {
367 if (*s == '\r' && s[1] == '\n')
368 s++;
369 *d++ = *s++;
370 }
371 SvCUR(sv) -= s - d;
372 return;
373 }
374 }
375}
a868473f 376
76e3520e 377STATIC I32
c39cd008 378S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 379{
c39cd008
GS
380 I32 count = FILTER_READ(idx+1, sv, maxlen);
381 if (count > 0 && !maxlen)
382 strip_return(sv);
383 return count;
a868473f
NIS
384}
385#endif
386
ffb4593c
NT
387/*
388 * Perl_lex_start
9cbb5ea2
GS
389 * Initialize variables. Uses the Perl save_stack to save its state (for
390 * recursive calls to the parser).
ffb4593c
NT
391 */
392
a0d0e21e 393void
864dbfa3 394Perl_lex_start(pTHX_ SV *line)
79072805 395{
8990e307
LW
396 char *s;
397 STRLEN len;
398
3280af22
NIS
399 SAVEI32(PL_lex_dojoin);
400 SAVEI32(PL_lex_brackets);
3280af22
NIS
401 SAVEI32(PL_lex_casemods);
402 SAVEI32(PL_lex_starts);
403 SAVEI32(PL_lex_state);
7766f137 404 SAVEVPTR(PL_lex_inpat);
3280af22 405 SAVEI32(PL_lex_inwhat);
18b09519
GS
406 if (PL_lex_state == LEX_KNOWNEXT) {
407 I32 toke = PL_nexttoke;
408 while (--toke >= 0) {
409 SAVEI32(PL_nexttype[toke]);
410 SAVEVPTR(PL_nextval[toke]);
411 }
412 SAVEI32(PL_nexttoke);
18b09519 413 }
57843af0 414 SAVECOPLINE(PL_curcop);
3280af22
NIS
415 SAVEPPTR(PL_bufptr);
416 SAVEPPTR(PL_bufend);
417 SAVEPPTR(PL_oldbufptr);
418 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
419 SAVEPPTR(PL_last_lop);
420 SAVEPPTR(PL_last_uni);
3280af22
NIS
421 SAVEPPTR(PL_linestart);
422 SAVESPTR(PL_linestr);
423 SAVEPPTR(PL_lex_brackstack);
424 SAVEPPTR(PL_lex_casestack);
c76ac1ee 425 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
426 SAVESPTR(PL_lex_stuff);
427 SAVEI32(PL_lex_defer);
09bef843 428 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 429 SAVESPTR(PL_lex_repl);
bebdddfc
GS
430 SAVEINT(PL_expect);
431 SAVEINT(PL_lex_expect);
3280af22
NIS
432
433 PL_lex_state = LEX_NORMAL;
434 PL_lex_defer = 0;
435 PL_expect = XSTATE;
436 PL_lex_brackets = 0;
3280af22
NIS
437 New(899, PL_lex_brackstack, 120, char);
438 New(899, PL_lex_casestack, 12, char);
439 SAVEFREEPV(PL_lex_brackstack);
440 SAVEFREEPV(PL_lex_casestack);
441 PL_lex_casemods = 0;
442 *PL_lex_casestack = '\0';
443 PL_lex_dojoin = 0;
444 PL_lex_starts = 0;
445 PL_lex_stuff = Nullsv;
446 PL_lex_repl = Nullsv;
447 PL_lex_inpat = 0;
76be56bc 448 PL_nexttoke = 0;
3280af22 449 PL_lex_inwhat = 0;
09bef843 450 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
451 PL_linestr = line;
452 if (SvREADONLY(PL_linestr))
453 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
454 s = SvPV(PL_linestr, len);
8990e307 455 if (len && s[len-1] != ';') {
3280af22
NIS
456 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
457 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
458 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 459 }
3280af22
NIS
460 SvTEMP_off(PL_linestr);
461 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
462 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 463 PL_last_lop = PL_last_uni = Nullch;
3280af22 464 PL_rsfp = 0;
79072805 465}
a687059c 466
ffb4593c
NT
467/*
468 * Perl_lex_end
9cbb5ea2
GS
469 * Finalizer for lexing operations. Must be called when the parser is
470 * done with the lexer.
ffb4593c
NT
471 */
472
463ee0b2 473void
864dbfa3 474Perl_lex_end(pTHX)
463ee0b2 475{
3280af22 476 PL_doextract = FALSE;
463ee0b2
LW
477}
478
ffb4593c
NT
479/*
480 * S_incline
481 * This subroutine has nothing to do with tilting, whether at windmills
482 * or pinball tables. Its name is short for "increment line". It
57843af0 483 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 484 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
485 * # line 500 "foo.pm"
486 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
487 */
488
76e3520e 489STATIC void
cea2e8a9 490S_incline(pTHX_ char *s)
463ee0b2
LW
491{
492 char *t;
493 char *n;
73659bf1 494 char *e;
463ee0b2 495 char ch;
463ee0b2 496
57843af0 497 CopLINE_inc(PL_curcop);
463ee0b2
LW
498 if (*s++ != '#')
499 return;
bf4acbe4 500 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
501 if (strnEQ(s, "line", 4))
502 s += 4;
503 else
504 return;
084592ab 505 if (SPACE_OR_TAB(*s))
73659bf1 506 s++;
4e553d73 507 else
73659bf1 508 return;
bf4acbe4 509 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
510 if (!isDIGIT(*s))
511 return;
512 n = s;
513 while (isDIGIT(*s))
514 s++;
bf4acbe4 515 while (SPACE_OR_TAB(*s))
463ee0b2 516 s++;
73659bf1 517 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 518 s++;
73659bf1
GS
519 e = t + 1;
520 }
463ee0b2 521 else {
463ee0b2 522 for (t = s; !isSPACE(*t); t++) ;
73659bf1 523 e = t;
463ee0b2 524 }
bf4acbe4 525 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
526 e++;
527 if (*e != '\n' && *e != '\0')
528 return; /* false alarm */
529
463ee0b2
LW
530 ch = *t;
531 *t = '\0';
f4dd75d9 532 if (t - s > 0) {
05ec9bb3 533 CopFILE_free(PL_curcop);
57843af0 534 CopFILE_set(PL_curcop, s);
f4dd75d9 535 }
463ee0b2 536 *t = ch;
57843af0 537 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
538}
539
ffb4593c
NT
540/*
541 * S_skipspace
542 * Called to gobble the appropriate amount and type of whitespace.
543 * Skips comments as well.
544 */
545
76e3520e 546STATIC char *
cea2e8a9 547S_skipspace(pTHX_ register char *s)
a687059c 548{
3280af22 549 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 550 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
551 s++;
552 return s;
553 }
554 for (;;) {
fd049845 555 STRLEN prevlen;
09bef843 556 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 557 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
558 while (s < PL_bufend && isSPACE(*s)) {
559 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
560 incline(s);
561 }
ffb4593c
NT
562
563 /* comment */
3280af22
NIS
564 if (s < PL_bufend && *s == '#') {
565 while (s < PL_bufend && *s != '\n')
463ee0b2 566 s++;
60e6418e 567 if (s < PL_bufend) {
463ee0b2 568 s++;
60e6418e
GS
569 if (PL_in_eval && !PL_rsfp) {
570 incline(s);
571 continue;
572 }
573 }
463ee0b2 574 }
ffb4593c
NT
575
576 /* only continue to recharge the buffer if we're at the end
577 * of the buffer, we're not reading from a source filter, and
578 * we're in normal lexing mode
579 */
09bef843
SB
580 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
581 PL_lex_state == LEX_FORMLINE)
463ee0b2 582 return s;
ffb4593c
NT
583
584 /* try to recharge the buffer */
9cbb5ea2
GS
585 if ((s = filter_gets(PL_linestr, PL_rsfp,
586 (prevlen = SvCUR(PL_linestr)))) == Nullch)
587 {
588 /* end of file. Add on the -p or -n magic */
3280af22
NIS
589 if (PL_minus_n || PL_minus_p) {
590 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
591 ";}continue{print or die qq(-p destination: $!\\n)" :
592 "");
3280af22
NIS
593 sv_catpv(PL_linestr,";}");
594 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
595 }
596 else
3280af22 597 sv_setpv(PL_linestr,";");
ffb4593c
NT
598
599 /* reset variables for next time we lex */
9cbb5ea2
GS
600 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
601 = SvPVX(PL_linestr);
3280af22 602 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 603 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
604
605 /* Close the filehandle. Could be from -P preprocessor,
606 * STDIN, or a regular file. If we were reading code from
607 * STDIN (because the commandline held no -e or filename)
608 * then we don't close it, we reset it so the code can
609 * read from STDIN too.
610 */
611
3280af22
NIS
612 if (PL_preprocess && !PL_in_eval)
613 (void)PerlProc_pclose(PL_rsfp);
614 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
615 PerlIO_clearerr(PL_rsfp);
8990e307 616 else
3280af22
NIS
617 (void)PerlIO_close(PL_rsfp);
618 PL_rsfp = Nullfp;
463ee0b2
LW
619 return s;
620 }
ffb4593c
NT
621
622 /* not at end of file, so we only read another line */
09bef843
SB
623 /* make corresponding updates to old pointers, for yyerror() */
624 oldprevlen = PL_oldbufptr - PL_bufend;
625 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
626 if (PL_last_uni)
627 oldunilen = PL_last_uni - PL_bufend;
628 if (PL_last_lop)
629 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
630 PL_linestart = PL_bufptr = s + prevlen;
631 PL_bufend = s + SvCUR(PL_linestr);
632 s = PL_bufptr;
09bef843
SB
633 PL_oldbufptr = s + oldprevlen;
634 PL_oldoldbufptr = s + oldoldprevlen;
635 if (PL_last_uni)
636 PL_last_uni = s + oldunilen;
637 if (PL_last_lop)
638 PL_last_lop = s + oldloplen;
a0d0e21e 639 incline(s);
ffb4593c
NT
640
641 /* debugger active and we're not compiling the debugger code,
642 * so store the line into the debugger's array of lines
643 */
3280af22 644 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
645 SV *sv = NEWSV(85,0);
646
647 sv_upgrade(sv, SVt_PVMG);
3280af22 648 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a
MJD
649 (void)SvIOK_on(sv);
650 SvIVX(sv) = 0;
57843af0 651 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 652 }
463ee0b2 653 }
a687059c 654}
378cc40b 655
ffb4593c
NT
656/*
657 * S_check_uni
658 * Check the unary operators to ensure there's no ambiguity in how they're
659 * used. An ambiguous piece of code would be:
660 * rand + 5
661 * This doesn't mean rand() + 5. Because rand() is a unary operator,
662 * the +5 is its argument.
663 */
664
76e3520e 665STATIC void
cea2e8a9 666S_check_uni(pTHX)
ba106d47 667{
2f3197b3 668 char *s;
a0d0e21e 669 char *t;
2f3197b3 670
3280af22 671 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 672 return;
3280af22
NIS
673 while (isSPACE(*PL_last_uni))
674 PL_last_uni++;
7e2040f0 675 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 676 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 677 return;
0453d815 678 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 679 char ch = *s;
0453d815 680 *s = '\0';
4e553d73
NIS
681 Perl_warner(aTHX_ WARN_AMBIGUOUS,
682 "Warning: Use of \"%s\" without parens is ambiguous",
0453d815
PM
683 PL_last_uni);
684 *s = ch;
685 }
2f3197b3
LW
686}
687
ffb4593c
NT
688/* workaround to replace the UNI() macro with a function. Only the
689 * hints/uts.sh file mentions this. Other comments elsewhere in the
690 * source indicate Microport Unix might need it too.
691 */
692
ffed7fef
LW
693#ifdef CRIPPLED_CC
694
695#undef UNI
ffed7fef 696#define UNI(f) return uni(f,s)
ffed7fef 697
76e3520e 698STATIC int
cea2e8a9 699S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
700{
701 yylval.ival = f;
3280af22
NIS
702 PL_expect = XTERM;
703 PL_bufptr = s;
8f872242
NIS
704 PL_last_uni = PL_oldbufptr;
705 PL_last_lop_op = f;
ffed7fef
LW
706 if (*s == '(')
707 return FUNC1;
708 s = skipspace(s);
709 if (*s == '(')
710 return FUNC1;
711 else
712 return UNIOP;
713}
714
a0d0e21e
LW
715#endif /* CRIPPLED_CC */
716
ffb4593c
NT
717/*
718 * LOP : macro to build a list operator. Its behaviour has been replaced
719 * with a subroutine, S_lop() for which LOP is just another name.
720 */
721
a0d0e21e
LW
722#define LOP(f,x) return lop(f,x,s)
723
ffb4593c
NT
724/*
725 * S_lop
726 * Build a list operator (or something that might be one). The rules:
727 * - if we have a next token, then it's a list operator [why?]
728 * - if the next thing is an opening paren, then it's a function
729 * - else it's a list operator
730 */
731
76e3520e 732STATIC I32
a0be28da 733S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 734{
79072805 735 yylval.ival = f;
35c8bce7 736 CLINE;
075953c3 737 REPORT("lop", f)
3280af22
NIS
738 PL_expect = x;
739 PL_bufptr = s;
740 PL_last_lop = PL_oldbufptr;
741 PL_last_lop_op = f;
742 if (PL_nexttoke)
a0d0e21e 743 return LSTOP;
79072805
LW
744 if (*s == '(')
745 return FUNC;
746 s = skipspace(s);
747 if (*s == '(')
748 return FUNC;
749 else
750 return LSTOP;
751}
752
ffb4593c
NT
753/*
754 * S_force_next
9cbb5ea2 755 * When the lexer realizes it knows the next token (for instance,
ffb4593c 756 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
757 * to know what token to return the next time the lexer is called. Caller
758 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
759 * handles the token correctly.
ffb4593c
NT
760 */
761
4e553d73 762STATIC void
cea2e8a9 763S_force_next(pTHX_ I32 type)
79072805 764{
3280af22
NIS
765 PL_nexttype[PL_nexttoke] = type;
766 PL_nexttoke++;
767 if (PL_lex_state != LEX_KNOWNEXT) {
768 PL_lex_defer = PL_lex_state;
769 PL_lex_expect = PL_expect;
770 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
771 }
772}
773
ffb4593c
NT
774/*
775 * S_force_word
776 * When the lexer knows the next thing is a word (for instance, it has
777 * just seen -> and it knows that the next char is a word char, then
778 * it calls S_force_word to stick the next word into the PL_next lookahead.
779 *
780 * Arguments:
b1b65b59 781 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
782 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
783 * int check_keyword : if true, Perl checks to make sure the word isn't
784 * a keyword (do this if the word is a label, e.g. goto FOO)
785 * int allow_pack : if true, : characters will also be allowed (require,
786 * use, etc. do this)
9cbb5ea2 787 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
788 */
789
76e3520e 790STATIC char *
cea2e8a9 791S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 792{
463ee0b2
LW
793 register char *s;
794 STRLEN len;
4e553d73 795
463ee0b2
LW
796 start = skipspace(start);
797 s = start;
7e2040f0 798 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 799 (allow_pack && *s == ':') ||
15f0808c 800 (allow_initial_tick && *s == '\'') )
a0d0e21e 801 {
3280af22
NIS
802 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
803 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
804 return start;
805 if (token == METHOD) {
806 s = skipspace(s);
807 if (*s == '(')
3280af22 808 PL_expect = XTERM;
463ee0b2 809 else {
3280af22 810 PL_expect = XOPERATOR;
463ee0b2 811 }
79072805 812 }
3280af22
NIS
813 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
814 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
815 force_next(token);
816 }
817 return s;
818}
819
ffb4593c
NT
820/*
821 * S_force_ident
9cbb5ea2 822 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
823 * text only contains the "foo" portion. The first argument is a pointer
824 * to the "foo", and the second argument is the type symbol to prefix.
825 * Forces the next token to be a "WORD".
9cbb5ea2 826 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
827 */
828
76e3520e 829STATIC void
cea2e8a9 830S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
831{
832 if (s && *s) {
11343788 833 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 834 PL_nextval[PL_nexttoke].opval = o;
79072805 835 force_next(WORD);
748a9306 836 if (kind) {
11343788 837 o->op_private = OPpCONST_ENTERED;
55497cff
PP
838 /* XXX see note in pp_entereval() for why we forgo typo
839 warnings if the symbol must be introduced in an eval.
840 GSAR 96-10-12 */
3280af22 841 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
842 kind == '$' ? SVt_PV :
843 kind == '@' ? SVt_PVAV :
844 kind == '%' ? SVt_PVHV :
845 SVt_PVGV
846 );
748a9306 847 }
79072805
LW
848 }
849}
850
1571675a
GS
851NV
852Perl_str_to_version(pTHX_ SV *sv)
853{
854 NV retval = 0.0;
855 NV nshift = 1.0;
856 STRLEN len;
857 char *start = SvPVx(sv,len);
3aa33fe5 858 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
859 char *end = start + len;
860 while (start < end) {
ba210ebe 861 STRLEN skip;
1571675a
GS
862 UV n;
863 if (utf)
9041c2e3 864 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
865 else {
866 n = *(U8*)start;
867 skip = 1;
868 }
869 retval += ((NV)n)/nshift;
870 start += skip;
871 nshift *= 1000;
872 }
873 return retval;
874}
875
4e553d73 876/*
ffb4593c
NT
877 * S_force_version
878 * Forces the next token to be a version number.
e759cc13
RGS
879 * If the next token appears to be an invalid version number, (e.g. "v2b"),
880 * and if "guessing" is TRUE, then no new token is created (and the caller
881 * must use an alternative parsing method).
ffb4593c
NT
882 */
883
76e3520e 884STATIC char *
e759cc13 885S_force_version(pTHX_ char *s, int guessing)
89bfa8cd
PP
886{
887 OP *version = Nullop;
44dcb63b 888 char *d;
89bfa8cd
PP
889
890 s = skipspace(s);
891
44dcb63b 892 d = s;
dd629d5b 893 if (*d == 'v')
44dcb63b 894 d++;
44dcb63b 895 if (isDIGIT(*d)) {
e759cc13
RGS
896 while (isDIGIT(*d) || *d == '_' || *d == '.')
897 d++;
9f3d182e 898 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 899 SV *ver;
b73d6f50 900 s = scan_num(s, &yylval);
89bfa8cd 901 version = yylval.opval;
dd629d5b
GS
902 ver = cSVOPx(version)->op_sv;
903 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 904 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
905 SvNVX(ver) = str_to_version(ver);
906 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 907 }
89bfa8cd 908 }
e759cc13
RGS
909 else if (guessing)
910 return s;
89bfa8cd
PP
911 }
912
913 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 914 PL_nextval[PL_nexttoke].opval = version;
4e553d73 915 force_next(WORD);
89bfa8cd 916
e759cc13 917 return s;
89bfa8cd
PP
918}
919
ffb4593c
NT
920/*
921 * S_tokeq
922 * Tokenize a quoted string passed in as an SV. It finds the next
923 * chunk, up to end of string or a backslash. It may make a new
924 * SV containing that chunk (if HINT_NEW_STRING is on). It also
925 * turns \\ into \.
926 */
927
76e3520e 928STATIC SV *
cea2e8a9 929S_tokeq(pTHX_ SV *sv)
79072805
LW
930{
931 register char *s;
932 register char *send;
933 register char *d;
b3ac6de7
IZ
934 STRLEN len = 0;
935 SV *pv = sv;
79072805
LW
936
937 if (!SvLEN(sv))
b3ac6de7 938 goto finish;
79072805 939
a0d0e21e 940 s = SvPV_force(sv, len);
21a311ee 941 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 942 goto finish;
463ee0b2 943 send = s + len;
79072805
LW
944 while (s < send && *s != '\\')
945 s++;
946 if (s == send)
b3ac6de7 947 goto finish;
79072805 948 d = s;
be4731d2 949 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 950 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
951 if (SvUTF8(sv))
952 SvUTF8_on(pv);
953 }
79072805
LW
954 while (s < send) {
955 if (*s == '\\') {
a0d0e21e 956 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
957 s++; /* all that, just for this */
958 }
959 *d++ = *s++;
960 }
961 *d = '\0';
463ee0b2 962 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 963 finish:
3280af22 964 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 965 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
966 return sv;
967}
968
ffb4593c
NT
969/*
970 * Now come three functions related to double-quote context,
971 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
972 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
973 * interact with PL_lex_state, and create fake ( ... ) argument lists
974 * to handle functions and concatenation.
975 * They assume that whoever calls them will be setting up a fake
976 * join call, because each subthing puts a ',' after it. This lets
977 * "lower \luPpEr"
978 * become
979 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
980 *
981 * (I'm not sure whether the spurious commas at the end of lcfirst's
982 * arguments and join's arguments are created or not).
983 */
984
985/*
986 * S_sublex_start
987 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
988 *
989 * Pattern matching will set PL_lex_op to the pattern-matching op to
990 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
991 *
992 * OP_CONST and OP_READLINE are easy--just make the new op and return.
993 *
994 * Everything else becomes a FUNC.
995 *
996 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
997 * had an OP_CONST or OP_READLINE). This just sets us up for a
998 * call to S_sublex_push().
999 */
1000
76e3520e 1001STATIC I32
cea2e8a9 1002S_sublex_start(pTHX)
79072805
LW
1003{
1004 register I32 op_type = yylval.ival;
79072805
LW
1005
1006 if (op_type == OP_NULL) {
3280af22
NIS
1007 yylval.opval = PL_lex_op;
1008 PL_lex_op = Nullop;
79072805
LW
1009 return THING;
1010 }
1011 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1012 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1013
1014 if (SvTYPE(sv) == SVt_PVIV) {
1015 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1016 STRLEN len;
1017 char *p;
1018 SV *nsv;
1019
1020 p = SvPV(sv, len);
79cb57f6 1021 nsv = newSVpvn(p, len);
01ec43d0
GS
1022 if (SvUTF8(sv))
1023 SvUTF8_on(nsv);
b3ac6de7
IZ
1024 SvREFCNT_dec(sv);
1025 sv = nsv;
4e553d73 1026 }
b3ac6de7 1027 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1028 PL_lex_stuff = Nullsv;
79072805
LW
1029 return THING;
1030 }
1031
3280af22
NIS
1032 PL_sublex_info.super_state = PL_lex_state;
1033 PL_sublex_info.sub_inwhat = op_type;
1034 PL_sublex_info.sub_op = PL_lex_op;
1035 PL_lex_state = LEX_INTERPPUSH;
55497cff 1036
3280af22
NIS
1037 PL_expect = XTERM;
1038 if (PL_lex_op) {
1039 yylval.opval = PL_lex_op;
1040 PL_lex_op = Nullop;
55497cff
PP
1041 return PMFUNC;
1042 }
1043 else
1044 return FUNC;
1045}
1046
ffb4593c
NT
1047/*
1048 * S_sublex_push
1049 * Create a new scope to save the lexing state. The scope will be
1050 * ended in S_sublex_done. Returns a '(', starting the function arguments
1051 * to the uc, lc, etc. found before.
1052 * Sets PL_lex_state to LEX_INTERPCONCAT.
1053 */
1054
76e3520e 1055STATIC I32
cea2e8a9 1056S_sublex_push(pTHX)
55497cff 1057{
f46d017c 1058 ENTER;
55497cff 1059
3280af22
NIS
1060 PL_lex_state = PL_sublex_info.super_state;
1061 SAVEI32(PL_lex_dojoin);
1062 SAVEI32(PL_lex_brackets);
3280af22
NIS
1063 SAVEI32(PL_lex_casemods);
1064 SAVEI32(PL_lex_starts);
1065 SAVEI32(PL_lex_state);
7766f137 1066 SAVEVPTR(PL_lex_inpat);
3280af22 1067 SAVEI32(PL_lex_inwhat);
57843af0 1068 SAVECOPLINE(PL_curcop);
3280af22 1069 SAVEPPTR(PL_bufptr);
8452ff4b 1070 SAVEPPTR(PL_bufend);
3280af22
NIS
1071 SAVEPPTR(PL_oldbufptr);
1072 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1073 SAVEPPTR(PL_last_lop);
1074 SAVEPPTR(PL_last_uni);
3280af22
NIS
1075 SAVEPPTR(PL_linestart);
1076 SAVESPTR(PL_linestr);
1077 SAVEPPTR(PL_lex_brackstack);
1078 SAVEPPTR(PL_lex_casestack);
1079
1080 PL_linestr = PL_lex_stuff;
1081 PL_lex_stuff = Nullsv;
1082
9cbb5ea2
GS
1083 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1084 = SvPVX(PL_linestr);
3280af22 1085 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1086 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1087 SAVEFREESV(PL_linestr);
1088
1089 PL_lex_dojoin = FALSE;
1090 PL_lex_brackets = 0;
3280af22
NIS
1091 New(899, PL_lex_brackstack, 120, char);
1092 New(899, PL_lex_casestack, 12, char);
1093 SAVEFREEPV(PL_lex_brackstack);
1094 SAVEFREEPV(PL_lex_casestack);
1095 PL_lex_casemods = 0;
1096 *PL_lex_casestack = '\0';
1097 PL_lex_starts = 0;
1098 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1099 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1100
1101 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1102 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1103 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1104 else
3280af22 1105 PL_lex_inpat = Nullop;
79072805 1106
55497cff 1107 return '(';
79072805
LW
1108}
1109
ffb4593c
NT
1110/*
1111 * S_sublex_done
1112 * Restores lexer state after a S_sublex_push.
1113 */
1114
76e3520e 1115STATIC I32
cea2e8a9 1116S_sublex_done(pTHX)
79072805 1117{
3280af22 1118 if (!PL_lex_starts++) {
9aa983d2
JH
1119 SV *sv = newSVpvn("",0);
1120 if (SvUTF8(PL_linestr))
1121 SvUTF8_on(sv);
3280af22 1122 PL_expect = XOPERATOR;
9aa983d2 1123 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1124 return THING;
1125 }
1126
3280af22
NIS
1127 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1128 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1129 return yylex();
79072805
LW
1130 }
1131
ffb4593c 1132 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1133 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1134 PL_linestr = PL_lex_repl;
1135 PL_lex_inpat = 0;
1136 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1137 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1138 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1139 SAVEFREESV(PL_linestr);
1140 PL_lex_dojoin = FALSE;
1141 PL_lex_brackets = 0;
3280af22
NIS
1142 PL_lex_casemods = 0;
1143 *PL_lex_casestack = '\0';
1144 PL_lex_starts = 0;
25da4f38 1145 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1146 PL_lex_state = LEX_INTERPNORMAL;
1147 PL_lex_starts++;
e9fa98b2
HS
1148 /* we don't clear PL_lex_repl here, so that we can check later
1149 whether this is an evalled subst; that means we rely on the
1150 logic to ensure sublex_done() is called again only via the
1151 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1152 }
e9fa98b2 1153 else {
3280af22 1154 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1155 PL_lex_repl = Nullsv;
1156 }
79072805 1157 return ',';
ffed7fef
LW
1158 }
1159 else {
f46d017c 1160 LEAVE;
3280af22
NIS
1161 PL_bufend = SvPVX(PL_linestr);
1162 PL_bufend += SvCUR(PL_linestr);
1163 PL_expect = XOPERATOR;
09bef843 1164 PL_sublex_info.sub_inwhat = 0;
79072805 1165 return ')';
ffed7fef
LW
1166 }
1167}
1168
02aa26ce
NT
1169/*
1170 scan_const
1171
1172 Extracts a pattern, double-quoted string, or transliteration. This
1173 is terrifying code.
1174
3280af22
NIS
1175 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1176 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1177 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1178
9b599b2a
GS
1179 Returns a pointer to the character scanned up to. Iff this is
1180 advanced from the start pointer supplied (ie if anything was
1181 successfully parsed), will leave an OP for the substring scanned
1182 in yylval. Caller must intuit reason for not parsing further
1183 by looking at the next characters herself.
1184
02aa26ce
NT
1185 In patterns:
1186 backslashes:
1187 double-quoted style: \r and \n
1188 regexp special ones: \D \s
1189 constants: \x3
1190 backrefs: \1 (deprecated in substitution replacements)
1191 case and quoting: \U \Q \E
1192 stops on @ and $, but not for $ as tail anchor
1193
1194 In transliterations:
1195 characters are VERY literal, except for - not at the start or end
1196 of the string, which indicates a range. scan_const expands the
1197 range to the full set of intermediate characters.
1198
1199 In double-quoted strings:
1200 backslashes:
1201 double-quoted style: \r and \n
1202 constants: \x3
1203 backrefs: \1 (deprecated)
1204 case and quoting: \U \Q \E
1205 stops on @ and $
1206
1207 scan_const does *not* construct ops to handle interpolated strings.
1208 It stops processing as soon as it finds an embedded $ or @ variable
1209 and leaves it to the caller to work out what's going on.
1210
1211 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1212
1213 $ in pattern could be $foo or could be tail anchor. Assumption:
1214 it's a tail anchor if $ is the last thing in the string, or if it's
1215 followed by one of ")| \n\t"
1216
1217 \1 (backreferences) are turned into $1
1218
1219 The structure of the code is
1220 while (there's a character to process) {
1221 handle transliteration ranges
1222 skip regexp comments
1223 skip # initiated comments in //x patterns
1224 check for embedded @foo
1225 check for embedded scalars
1226 if (backslash) {
1227 leave intact backslashes from leave (below)
1228 deprecate \1 in strings and sub replacements
1229 handle string-changing backslashes \l \U \Q \E, etc.
1230 switch (what was escaped) {
1231 handle - in a transliteration (becomes a literal -)
1232 handle \132 octal characters
1233 handle 0x15 hex characters
1234 handle \cV (control V)
1235 handle printf backslashes (\f, \r, \n, etc)
1236 } (end switch)
1237 } (end if backslash)
1238 } (end while character to read)
4e553d73 1239
02aa26ce
NT
1240*/
1241
76e3520e 1242STATIC char *
cea2e8a9 1243S_scan_const(pTHX_ char *start)
79072805 1244{
3280af22 1245 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1246 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1247 register char *s = start; /* start of the constant */
1248 register char *d = SvPVX(sv); /* destination for copies */
1249 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1250 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1251 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1252 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1253 UV uv;
1254
dff6d3cd 1255 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1256 PL_lex_inpat
4a2d328f 1257 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1258 : "";
79072805 1259
2b9d42f0
NIS
1260 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1261 /* If we are doing a trans and we know we want UTF8 set expectation */
1262 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1263 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1264 }
1265
1266
79072805 1267 while (s < send || dorange) {
02aa26ce 1268 /* get transliterations out of the way (they're most literal) */
3280af22 1269 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1270 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1271 if (dorange) {
1ba5c669
JH
1272 I32 i; /* current expanded character */
1273 I32 min; /* first character in range */
1274 I32 max; /* last character in range */
02aa26ce 1275
2b9d42f0 1276 if (has_utf8) {
8973db79
JH
1277 char *c = (char*)utf8_hop((U8*)d, -1);
1278 char *e = d++;
1279 while (e-- > c)
1280 *(e + 1) = *e;
25716404 1281 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1282 /* mark the range as done, and continue */
1283 dorange = FALSE;
1284 didrange = TRUE;
1285 continue;
1286 }
2b9d42f0 1287
02aa26ce 1288 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1289 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1290 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1291 d -= 2; /* eat the first char and the - */
1292
8ada0baa
JH
1293 min = (U8)*d; /* first char in range */
1294 max = (U8)d[1]; /* last char in range */
1295
c2e66d9e 1296 if (min > max) {
01ec43d0 1297 Perl_croak(aTHX_
1ba5c669
JH
1298 "Invalid [] range \"%c-%c\" in transliteration operator",
1299 (char)min, (char)max);
c2e66d9e
GS
1300 }
1301
c7f1f016 1302#ifdef EBCDIC
8ada0baa
JH
1303 if ((isLOWER(min) && isLOWER(max)) ||
1304 (isUPPER(min) && isUPPER(max))) {
1305 if (isLOWER(min)) {
1306 for (i = min; i <= max; i++)
1307 if (isLOWER(i))
db42d148 1308 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1309 } else {
1310 for (i = min; i <= max; i++)
1311 if (isUPPER(i))
db42d148 1312 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1313 }
1314 }
1315 else
1316#endif
1317 for (i = min; i <= max; i++)
1318 *d++ = i;
02aa26ce
NT
1319
1320 /* mark the range as done, and continue */
79072805 1321 dorange = FALSE;
01ec43d0 1322 didrange = TRUE;
79072805 1323 continue;
4e553d73 1324 }
02aa26ce
NT
1325
1326 /* range begins (ignore - as first or last char) */
79072805 1327 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1328 if (didrange) {
1fafa243 1329 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1330 }
2b9d42f0 1331 if (has_utf8) {
25716404 1332 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1333 s++;
1334 continue;
1335 }
79072805
LW
1336 dorange = TRUE;
1337 s++;
01ec43d0
GS
1338 }
1339 else {
1340 didrange = FALSE;
1341 }
79072805 1342 }
02aa26ce
NT
1343
1344 /* if we get here, we're not doing a transliteration */
1345
0f5d15d6
IZ
1346 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1347 except for the last char, which will be done separately. */
3280af22 1348 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1349 if (s[2] == '#') {
1350 while (s < send && *s != ')')
db42d148 1351 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1352 }
1353 else if (s[2] == '{' /* This should match regcomp.c */
1354 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1355 {
cc6b7395 1356 I32 count = 1;
0f5d15d6 1357 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1358 char c;
1359
d9f97599
GS
1360 while (count && (c = *regparse)) {
1361 if (c == '\\' && regparse[1])
1362 regparse++;
4e553d73 1363 else if (c == '{')
cc6b7395 1364 count++;
4e553d73 1365 else if (c == '}')
cc6b7395 1366 count--;
d9f97599 1367 regparse++;
cc6b7395 1368 }
5bdf89e7
IZ
1369 if (*regparse != ')') {
1370 regparse--; /* Leave one char for continuation. */
cc6b7395 1371 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1372 }
0f5d15d6 1373 while (s < regparse)
db42d148 1374 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1375 }
748a9306 1376 }
02aa26ce
NT
1377
1378 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1379 else if (*s == '#' && PL_lex_inpat &&
1380 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1381 while (s+1 < send && *s != '\n')
db42d148 1382 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1383 }
02aa26ce 1384
5d1d4326
JH
1385 /* check for embedded arrays
1386 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1387 */
7e2040f0 1388 else if (*s == '@' && s[1]
5d1d4326 1389 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1390 break;
02aa26ce
NT
1391
1392 /* check for embedded scalars. only stop if we're sure it's a
1393 variable.
1394 */
79072805 1395 else if (*s == '$') {
3280af22 1396 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1397 break;
6002328a 1398 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1399 break; /* in regexp, $ might be tail anchor */
1400 }
02aa26ce 1401
2b9d42f0
NIS
1402 /* End of else if chain - OP_TRANS rejoin rest */
1403
02aa26ce 1404 /* backslashes */
79072805
LW
1405 if (*s == '\\' && s+1 < send) {
1406 s++;
02aa26ce
NT
1407
1408 /* some backslashes we leave behind */
c9f97d15 1409 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1410 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1411 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1412 continue;
1413 }
02aa26ce
NT
1414
1415 /* deprecate \1 in strings and substitution replacements */
3280af22 1416 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1417 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1418 {
599cee73 1419 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1420 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1421 *--s = '$';
1422 break;
1423 }
02aa26ce
NT
1424
1425 /* string-change backslash escapes */
3280af22 1426 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1427 --s;
1428 break;
1429 }
02aa26ce
NT
1430
1431 /* if we get here, it's either a quoted -, or a digit */
79072805 1432 switch (*s) {
02aa26ce
NT
1433
1434 /* quoted - in transliterations */
79072805 1435 case '-':
3280af22 1436 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1437 *d++ = *s++;
1438 continue;
1439 }
1440 /* FALL THROUGH */
1441 default:
11b8faa4 1442 {
707afd92
MS
1443 if (ckWARN(WARN_MISC) &&
1444 isALNUM(*s) &&
1445 *s != '_')
4e553d73 1446 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1447 "Unrecognized escape \\%c passed through",
1448 *s);
1449 /* default action is to copy the quoted character */
f9a63242 1450 goto default_action;
11b8faa4 1451 }
02aa26ce
NT
1452
1453 /* \132 indicates an octal constant */
79072805
LW
1454 case '0': case '1': case '2': case '3':
1455 case '4': case '5': case '6': case '7':
ba210ebe 1456 {
53305cf1
NC
1457 I32 flags = 0;
1458 STRLEN len = 3;
1459 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1460 s += len;
1461 }
012bcf8d 1462 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1463
1464 /* \x24 indicates a hex constant */
79072805 1465 case 'x':
a0ed51b3
LW
1466 ++s;
1467 if (*s == '{') {
1468 char* e = strchr(s, '}');
a4c04bdc
NC
1469 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1470 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1471 STRLEN len;
355860ce 1472
53305cf1 1473 ++s;
adaeee49 1474 if (!e) {
a0ed51b3 1475 yyerror("Missing right brace on \\x{}");
355860ce 1476 continue;
ba210ebe 1477 }
53305cf1
NC
1478 len = e - s;
1479 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1480 s = e + 1;
a0ed51b3
LW
1481 }
1482 else {
ba210ebe 1483 {
53305cf1 1484 STRLEN len = 2;
a4c04bdc 1485 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1486 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1487 s += len;
1488 }
012bcf8d
GS
1489 }
1490
1491 NUM_ESCAPE_INSERT:
1492 /* Insert oct or hex escaped character.
301d3d20 1493 * There will always enough room in sv since such
db42d148 1494 * escapes will be longer than any UTF-8 sequence
301d3d20 1495 * they can end up as. */
ba7cea30 1496
c7f1f016
NIS
1497 /* We need to map to chars to ASCII before doing the tests
1498 to cover EBCDIC
1499 */
c4d5f83a 1500 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1501 if (!has_utf8 && uv > 255) {
301d3d20
JH
1502 /* Might need to recode whatever we have
1503 * accumulated so far if it contains any
1504 * hibit chars.
1505 *
1506 * (Can't we keep track of that and avoid
1507 * this rescan? --jhi)
012bcf8d 1508 */
c7f1f016 1509 int hicount = 0;
63cd0674
NIS
1510 U8 *c;
1511 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1512 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1513 hicount++;
db42d148 1514 }
012bcf8d 1515 }
63cd0674 1516 if (hicount) {
db42d148
NIS
1517 STRLEN offset = d - SvPVX(sv);
1518 U8 *src, *dst;
1519 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1520 src = (U8 *)d - 1;
1521 dst = src+hicount;
1522 d += hicount;
1523 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1524 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1525 U8 ch = NATIVE_TO_ASCII(*src);
db42d148
NIS
1526 *dst-- = UTF8_EIGHT_BIT_LO(ch);
1527 *dst-- = UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1528 }
1529 else {
63cd0674 1530 *dst-- = *src;
012bcf8d 1531 }
c7f1f016 1532 src--;
012bcf8d
GS
1533 }
1534 }
1535 }
1536
9aa983d2 1537 if (has_utf8 || uv > 255) {
9041c2e3 1538 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1539 has_utf8 = TRUE;
f9a63242
JH
1540 if (PL_lex_inwhat == OP_TRANS &&
1541 PL_sublex_info.sub_op) {
1542 PL_sublex_info.sub_op->op_private |=
1543 (PL_lex_repl ? OPpTRANS_FROM_UTF
1544 : OPpTRANS_TO_UTF);
f9a63242 1545 }
012bcf8d 1546 }
a0ed51b3 1547 else {
012bcf8d 1548 *d++ = (char)uv;
a0ed51b3 1549 }
012bcf8d
GS
1550 }
1551 else {
c4d5f83a 1552 *d++ = (char) uv;
a0ed51b3 1553 }
79072805 1554 continue;
02aa26ce 1555
b239daa5 1556 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1557 case 'N':
55eda711 1558 ++s;
423cee85
JH
1559 if (*s == '{') {
1560 char* e = strchr(s, '}');
155aba94 1561 SV *res;
423cee85
JH
1562 STRLEN len;
1563 char *str;
4e553d73 1564
423cee85 1565 if (!e) {
5777a3f7 1566 yyerror("Missing right brace on \\N{}");
423cee85
JH
1567 e = s - 1;
1568 goto cont_scan;
1569 }
55eda711
JH
1570 res = newSVpvn(s + 1, e - s - 1);
1571 res = new_constant( Nullch, 0, "charnames",
1572 res, Nullsv, "\\N{...}" );
f9a63242
JH
1573 if (has_utf8)
1574 sv_utf8_upgrade(res);
423cee85 1575 str = SvPV(res,len);
1c47067b
JH
1576#ifdef EBCDIC_NEVER_MIND
1577 /* charnames uses pack U and that has been
1578 * recently changed to do the below uni->native
1579 * mapping, so this would be redundant (and wrong,
1580 * the code point would be doubly converted).
1581 * But leave this in just in case the pack U change
1582 * gets revoked, but the semantics is still
1583 * desireable for charnames. --jhi */
cddc7ef4
JH
1584 {
1585 UV uv = utf8_to_uvchr((U8*)str, 0);
1586
1587 if (uv < 0x100) {
1588 U8 tmpbuf[UTF8_MAXLEN+1], *d;
1589
1590 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1591 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1592 str = SvPV(res, len);
1593 }
1594 }
1595#endif
89491803 1596 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1597 char *ostart = SvPVX(sv);
1598 SvCUR_set(sv, d - ostart);
1599 SvPOK_on(sv);
e4f3eed8 1600 *d = '\0';
f08d6ad9 1601 sv_utf8_upgrade(sv);
d2f449dd
SB
1602 /* this just broke our allocation above... */
1603 SvGROW(sv, send - start);
f08d6ad9 1604 d = SvPVX(sv) + SvCUR(sv);
89491803 1605 has_utf8 = TRUE;
f08d6ad9 1606 }
b239daa5 1607 if (len > e - s + 4) { /* I _guess_ 4 is \N{} --jhi */
423cee85
JH
1608 char *odest = SvPVX(sv);
1609
8973db79 1610 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1611 d = SvPVX(sv) + (d - odest);
1612 }
1613 Copy(str, d, len, char);
1614 d += len;
1615 SvREFCNT_dec(res);
1616 cont_scan:
1617 s = e + 1;
1618 }
1619 else
5777a3f7 1620 yyerror("Missing braces on \\N{}");
423cee85
JH
1621 continue;
1622
02aa26ce 1623 /* \c is a control character */
79072805
LW
1624 case 'c':
1625 s++;
ba210ebe
JH
1626 {
1627 U8 c = *s++;
c7f1f016
NIS
1628#ifdef EBCDIC
1629 if (isLOWER(c))
1630 c = toUPPER(c);
1631#endif
db42d148 1632 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1633 }
79072805 1634 continue;
02aa26ce
NT
1635
1636 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1637 case 'b':
db42d148 1638 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1639 break;
1640 case 'n':
db42d148 1641 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1642 break;
1643 case 'r':
db42d148 1644 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1645 break;
1646 case 'f':
db42d148 1647 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1648 break;
1649 case 't':
db42d148 1650 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1651 break;
34a3fe2a 1652 case 'e':
db42d148 1653 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1654 break;
1655 case 'a':
db42d148 1656 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1657 break;
02aa26ce
NT
1658 } /* end switch */
1659
79072805
LW
1660 s++;
1661 continue;
02aa26ce
NT
1662 } /* end if (backslash) */
1663
f9a63242 1664 default_action:
2b9d42f0
NIS
1665 /* If we started with encoded form, or already know we want it
1666 and then encode the next character */
1667 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1668 STRLEN len = 1;
1669 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1670 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1671 s += len;
1672 if (need > len) {
1673 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1674 STRLEN off = d - SvPVX(sv);
1675 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1676 }
1677 d = (char*)uvchr_to_utf8((U8*)d, uv);
1678 has_utf8 = TRUE;
1679 }
1680 else {
1681 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1682 }
02aa26ce
NT
1683 } /* while loop to process each character */
1684
1685 /* terminate the string and set up the sv */
79072805 1686 *d = '\0';
463ee0b2 1687 SvCUR_set(sv, d - SvPVX(sv));
2b9d42f0 1688 if (SvCUR(sv) >= SvLEN(sv))
585602fa 1689 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1690
79072805 1691 SvPOK_on(sv);
9f4817db 1692 if (PL_encoding && !has_utf8) {
799ef3cb 1693 sv_recode_to_utf8(sv, PL_encoding);
9f4817db
JH
1694 has_utf8 = TRUE;
1695 }
2b9d42f0 1696 if (has_utf8) {
7e2040f0 1697 SvUTF8_on(sv);
2b9d42f0
NIS
1698 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1699 PL_sublex_info.sub_op->op_private |=
1700 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1701 }
1702 }
79072805 1703
02aa26ce 1704 /* shrink the sv if we allocated more than we used */
79072805
LW
1705 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1706 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1707 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1708 }
02aa26ce 1709
9b599b2a 1710 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1711 if (s > PL_bufptr) {
1712 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1713 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1714 sv, Nullsv,
4e553d73 1715 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1716 ? "tr"
3280af22 1717 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1718 ? "s"
1719 : "qq")));
79072805 1720 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1721 } else
8990e307 1722 SvREFCNT_dec(sv);
79072805
LW
1723 return s;
1724}
1725
ffb4593c
NT
1726/* S_intuit_more
1727 * Returns TRUE if there's more to the expression (e.g., a subscript),
1728 * FALSE otherwise.
ffb4593c
NT
1729 *
1730 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1731 *
1732 * ->[ and ->{ return TRUE
1733 * { and [ outside a pattern are always subscripts, so return TRUE
1734 * if we're outside a pattern and it's not { or [, then return FALSE
1735 * if we're in a pattern and the first char is a {
1736 * {4,5} (any digits around the comma) returns FALSE
1737 * if we're in a pattern and the first char is a [
1738 * [] returns FALSE
1739 * [SOMETHING] has a funky algorithm to decide whether it's a
1740 * character class or not. It has to deal with things like
1741 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1742 * anything else returns TRUE
1743 */
1744
9cbb5ea2
GS
1745/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1746
76e3520e 1747STATIC int
cea2e8a9 1748S_intuit_more(pTHX_ register char *s)
79072805 1749{
3280af22 1750 if (PL_lex_brackets)
79072805
LW
1751 return TRUE;
1752 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1753 return TRUE;
1754 if (*s != '{' && *s != '[')
1755 return FALSE;
3280af22 1756 if (!PL_lex_inpat)
79072805
LW
1757 return TRUE;
1758
1759 /* In a pattern, so maybe we have {n,m}. */
1760 if (*s == '{') {
1761 s++;
1762 if (!isDIGIT(*s))
1763 return TRUE;
1764 while (isDIGIT(*s))
1765 s++;
1766 if (*s == ',')
1767 s++;
1768 while (isDIGIT(*s))
1769 s++;
1770 if (*s == '}')
1771 return FALSE;
1772 return TRUE;
1773
1774 }
1775
1776 /* On the other hand, maybe we have a character class */
1777
1778 s++;
1779 if (*s == ']' || *s == '^')
1780 return FALSE;
1781 else {
ffb4593c 1782 /* this is terrifying, and it works */
79072805
LW
1783 int weight = 2; /* let's weigh the evidence */
1784 char seen[256];
f27ffc4a 1785 unsigned char un_char = 255, last_un_char;
93a17b20 1786 char *send = strchr(s,']');
3280af22 1787 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1788
1789 if (!send) /* has to be an expression */
1790 return TRUE;
1791
1792 Zero(seen,256,char);
1793 if (*s == '$')
1794 weight -= 3;
1795 else if (isDIGIT(*s)) {
1796 if (s[1] != ']') {
1797 if (isDIGIT(s[1]) && s[2] == ']')
1798 weight -= 10;
1799 }
1800 else
1801 weight -= 100;
1802 }
1803 for (; s < send; s++) {
1804 last_un_char = un_char;
1805 un_char = (unsigned char)*s;
1806 switch (*s) {
1807 case '@':
1808 case '&':
1809 case '$':
1810 weight -= seen[un_char] * 10;
7e2040f0 1811 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1812 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1813 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1814 weight -= 100;
1815 else
1816 weight -= 10;
1817 }
1818 else if (*s == '$' && s[1] &&
93a17b20
LW
1819 strchr("[#!%*<>()-=",s[1])) {
1820 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1821 weight -= 10;
1822 else
1823 weight -= 1;
1824 }
1825 break;
1826 case '\\':
1827 un_char = 254;
1828 if (s[1]) {
93a17b20 1829 if (strchr("wds]",s[1]))
79072805
LW
1830 weight += 100;
1831 else if (seen['\''] || seen['"'])
1832 weight += 1;
93a17b20 1833 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1834 weight += 40;
1835 else if (isDIGIT(s[1])) {
1836 weight += 40;
1837 while (s[1] && isDIGIT(s[1]))
1838 s++;
1839 }
1840 }
1841 else
1842 weight += 100;
1843 break;
1844 case '-':
1845 if (s[1] == '\\')
1846 weight += 50;
93a17b20 1847 if (strchr("aA01! ",last_un_char))
79072805 1848 weight += 30;
93a17b20 1849 if (strchr("zZ79~",s[1]))
79072805 1850 weight += 30;
f27ffc4a
GS
1851 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1852 weight -= 5; /* cope with negative subscript */
79072805
LW
1853 break;
1854 default:
93a17b20 1855 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1856 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1857 char *d = tmpbuf;
1858 while (isALPHA(*s))
1859 *d++ = *s++;
1860 *d = '\0';
1861 if (keyword(tmpbuf, d - tmpbuf))
1862 weight -= 150;
1863 }
1864 if (un_char == last_un_char + 1)
1865 weight += 5;
1866 weight -= seen[un_char];
1867 break;
1868 }
1869 seen[un_char]++;
1870 }
1871 if (weight >= 0) /* probably a character class */
1872 return FALSE;
1873 }
1874
1875 return TRUE;
1876}
ffed7fef 1877
ffb4593c
NT
1878/*
1879 * S_intuit_method
1880 *
1881 * Does all the checking to disambiguate
1882 * foo bar
1883 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1884 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1885 *
1886 * First argument is the stuff after the first token, e.g. "bar".
1887 *
1888 * Not a method if bar is a filehandle.
1889 * Not a method if foo is a subroutine prototyped to take a filehandle.
1890 * Not a method if it's really "Foo $bar"
1891 * Method if it's "foo $bar"
1892 * Not a method if it's really "print foo $bar"
1893 * Method if it's really "foo package::" (interpreted as package->foo)
1894 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1895 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1896 * =>
1897 */
1898
76e3520e 1899STATIC int
cea2e8a9 1900S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1901{
1902 char *s = start + (*start == '$');
3280af22 1903 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1904 STRLEN len;
1905 GV* indirgv;
1906
1907 if (gv) {
b6c543e3 1908 CV *cv;
a0d0e21e
LW
1909 if (GvIO(gv))
1910 return 0;
b6c543e3
IZ
1911 if ((cv = GvCVu(gv))) {
1912 char *proto = SvPVX(cv);
1913 if (proto) {
1914 if (*proto == ';')
1915 proto++;
1916 if (*proto == '*')
1917 return 0;
1918 }
1919 } else
a0d0e21e
LW
1920 gv = 0;
1921 }
8903cb82 1922 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1923 /* start is the beginning of the possible filehandle/object,
1924 * and s is the end of it
1925 * tmpbuf is a copy of it
1926 */
1927
a0d0e21e 1928 if (*start == '$') {
3280af22 1929 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1930 return 0;
1931 s = skipspace(s);
3280af22
NIS
1932 PL_bufptr = start;
1933 PL_expect = XREF;
a0d0e21e
LW
1934 return *s == '(' ? FUNCMETH : METHOD;
1935 }
1936 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1937 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1938 len -= 2;
1939 tmpbuf[len] = '\0';
1940 goto bare_package;
1941 }
1942 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1943 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1944 return 0;
1945 /* filehandle or package name makes it a method */
89bfa8cd 1946 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1947 s = skipspace(s);
3280af22 1948 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1949 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1950 bare_package:
3280af22 1951 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1952 newSVpvn(tmpbuf,len));
3280af22
NIS
1953 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1954 PL_expect = XTERM;
a0d0e21e 1955 force_next(WORD);
3280af22 1956 PL_bufptr = s;
a0d0e21e
LW
1957 return *s == '(' ? FUNCMETH : METHOD;
1958 }
1959 }
1960 return 0;
1961}
1962
ffb4593c
NT
1963/*
1964 * S_incl_perldb
1965 * Return a string of Perl code to load the debugger. If PERL5DB
1966 * is set, it will return the contents of that, otherwise a
1967 * compile-time require of perl5db.pl.
1968 */
1969
76e3520e 1970STATIC char*
cea2e8a9 1971S_incl_perldb(pTHX)
a0d0e21e 1972{
3280af22 1973 if (PL_perldb) {
76e3520e 1974 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1975
1976 if (pdb)
1977 return pdb;
91487cfc 1978 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1979 return "BEGIN { require 'perl5db.pl' }";
1980 }
1981 return "";
1982}
1983
1984
16d20bd9 1985/* Encoded script support. filter_add() effectively inserts a
4e553d73 1986 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1987 * Note that the filter function only applies to the current source file
1988 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1989 *
1990 * The datasv parameter (which may be NULL) can be used to pass
1991 * private data to this instance of the filter. The filter function
1992 * can recover the SV using the FILTER_DATA macro and use it to
1993 * store private buffers and state information.
1994 *
1995 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1996 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1997 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1998 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1999 * private use must be set using malloc'd pointers.
2000 */
16d20bd9
AD
2001
2002SV *
864dbfa3 2003Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2004{
f4c556ac
GS
2005 if (!funcp)
2006 return Nullsv;
2007
3280af22
NIS
2008 if (!PL_rsfp_filters)
2009 PL_rsfp_filters = newAV();
16d20bd9 2010 if (!datasv)
8c52afec 2011 datasv = NEWSV(255,0);
16d20bd9 2012 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 2013 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 2014 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 2015 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2016 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
fe5a182c 2017 (void*)funcp, SvPV_nolen(datasv)));
3280af22
NIS
2018 av_unshift(PL_rsfp_filters, 1);
2019 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2020 return(datasv);
2021}
4e553d73 2022
16d20bd9
AD
2023
2024/* Delete most recently added instance of this filter function. */
a0d0e21e 2025void
864dbfa3 2026Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2027{
e0c19803 2028 SV *datasv;
fe5a182c 2029 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
3280af22 2030 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2031 return;
2032 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2033 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 2034 if (IoANY(datasv) == (void *)funcp) {
e0c19803 2035 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2036 IoANY(datasv) = (void *)NULL;
3280af22 2037 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2038
16d20bd9
AD
2039 return;
2040 }
2041 /* we need to search for the correct entry and clear it */
cea2e8a9 2042 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2043}
2044
2045
2046/* Invoke the n'th filter function for the current rsfp. */
2047I32
864dbfa3 2048Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
2049
2050
8ac85365 2051 /* 0 = read one text line */
a0d0e21e 2052{
16d20bd9
AD
2053 filter_t funcp;
2054 SV *datasv = NULL;
e50aee73 2055
3280af22 2056 if (!PL_rsfp_filters)
16d20bd9 2057 return -1;
3280af22 2058 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
2059 /* Provide a default input filter to make life easy. */
2060 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2061 DEBUG_P(PerlIO_printf(Perl_debug_log,
2062 "filter_read %d: from rsfp\n", idx));
4e553d73 2063 if (maxlen) {
16d20bd9
AD
2064 /* Want a block */
2065 int len ;
2066 int old_len = SvCUR(buf_sv) ;
2067
2068 /* ensure buf_sv is large enough */
2069 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
2070 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2071 if (PerlIO_error(PL_rsfp))
37120919
AD
2072 return -1; /* error */
2073 else
2074 return 0 ; /* end of file */
2075 }
16d20bd9
AD
2076 SvCUR_set(buf_sv, old_len + len) ;
2077 } else {
2078 /* Want a line */
3280af22
NIS
2079 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2080 if (PerlIO_error(PL_rsfp))
37120919
AD
2081 return -1; /* error */
2082 else
2083 return 0 ; /* end of file */
2084 }
16d20bd9
AD
2085 }
2086 return SvCUR(buf_sv);
2087 }
2088 /* Skip this filter slot if filter has been deleted */
3280af22 2089 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
2090 DEBUG_P(PerlIO_printf(Perl_debug_log,
2091 "filter_read %d: skipped (filter deleted)\n",
2092 idx));
16d20bd9
AD
2093 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2094 }
2095 /* Get function pointer hidden within datasv */
4755096e 2096 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2097 DEBUG_P(PerlIO_printf(Perl_debug_log,
2098 "filter_read %d: via function %p (%s)\n",
fe5a182c 2099 idx, (void*)funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2100 /* Call function. The function is expected to */
2101 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2102 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2103 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2104}
2105
76e3520e 2106STATIC char *
cea2e8a9 2107S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2108{
c39cd008 2109#ifdef PERL_CR_FILTER
3280af22 2110 if (!PL_rsfp_filters) {
c39cd008 2111 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2112 }
2113#endif
3280af22 2114 if (PL_rsfp_filters) {
16d20bd9 2115
55497cff
PP
2116 if (!append)
2117 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2118 if (FILTER_READ(0, sv, 0) > 0)
2119 return ( SvPVX(sv) ) ;
2120 else
2121 return Nullch ;
2122 }
9d116dd7 2123 else
fd049845 2124 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2125}
2126
01ec43d0
GS
2127STATIC HV *
2128S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2129{
2130 GV *gv;
2131
01ec43d0 2132 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2133 return PL_curstash;
2134
2135 if (len > 2 &&
2136 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2137 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2138 {
2139 return GvHV(gv); /* Foo:: */
def3634b
GS
2140 }
2141
2142 /* use constant CLASS => 'MyClass' */
2143 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2144 SV *sv;
2145 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2146 pkgname = SvPV_nolen(sv);
2147 }
2148 }
2149
2150 return gv_stashpv(pkgname, FALSE);
2151}
a0d0e21e 2152
748a9306
LW
2153#ifdef DEBUGGING
2154 static char* exp_name[] =
09bef843
SB
2155 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2156 "ATTRTERM", "TERMBLOCK"
2157 };
748a9306 2158#endif
463ee0b2 2159
02aa26ce
NT
2160/*
2161 yylex
2162
2163 Works out what to call the token just pulled out of the input
2164 stream. The yacc parser takes care of taking the ops we return and
2165 stitching them into a tree.
2166
2167 Returns:
2168 PRIVATEREF
2169
2170 Structure:
2171 if read an identifier
2172 if we're in a my declaration
2173 croak if they tried to say my($foo::bar)
2174 build the ops for a my() declaration
2175 if it's an access to a my() variable
2176 are we in a sort block?
2177 croak if my($a); $a <=> $b
2178 build ops for access to a my() variable
2179 if in a dq string, and they've said @foo and we can't find @foo
2180 croak
2181 build ops for a bareword
2182 if we already built the token before, use it.
2183*/
2184
dba4d153 2185#ifdef USE_PURE_BISON
864dbfa3 2186int
dba4d153 2187Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2188{
20141f0e
RI
2189 int r;
2190
6f202aea 2191 yyactlevel++;
20141f0e
RI
2192 yylval_pointer[yyactlevel] = lvalp;
2193 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
RI
2194 if (yyactlevel >= YYMAXLEVEL)
2195 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2196
dba4d153 2197 r = Perl_yylex(aTHX);
20141f0e 2198
d8ae6756
RI
2199 if (yyactlevel > 0)
2200 yyactlevel--;
20141f0e
RI
2201
2202 return r;
2203}
dba4d153 2204#endif
20141f0e 2205
dba4d153
JH
2206#ifdef __SC__
2207#pragma segment Perl_yylex
2208#endif
dba4d153 2209int
dba4d153 2210Perl_yylex(pTHX)
20141f0e 2211{
79072805 2212 register char *s;
378cc40b 2213 register char *d;
79072805 2214 register I32 tmp;
463ee0b2 2215 STRLEN len;
161b471a
NIS
2216 GV *gv = Nullgv;
2217 GV **gvp = 0;
aa7440fb 2218 bool bof = FALSE;
a687059c 2219
02aa26ce 2220 /* check if there's an identifier for us to look at */
ba979b31 2221 if (PL_pending_ident)
e930465f 2222 return S_pending_ident(aTHX);
bbce6d69 2223
02aa26ce
NT
2224 /* no identifier pending identification */
2225
3280af22 2226 switch (PL_lex_state) {
79072805
LW
2227#ifdef COMMENTARY
2228 case LEX_NORMAL: /* Some compilers will produce faster */
2229 case LEX_INTERPNORMAL: /* code if we comment these out. */
2230 break;
2231#endif
2232
09bef843 2233 /* when we've already built the next token, just pull it out of the queue */
79072805 2234 case LEX_KNOWNEXT:
3280af22
NIS
2235 PL_nexttoke--;
2236 yylval = PL_nextval[PL_nexttoke];
2237 if (!PL_nexttoke) {
2238 PL_lex_state = PL_lex_defer;
2239 PL_expect = PL_lex_expect;
2240 PL_lex_defer = LEX_NORMAL;
463ee0b2 2241 }
607df283 2242 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2243 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2244 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2245
3280af22 2246 return(PL_nexttype[PL_nexttoke]);
79072805 2247
02aa26ce 2248 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2249 when we get here, PL_bufptr is at the \
02aa26ce 2250 */
79072805
LW
2251 case LEX_INTERPCASEMOD:
2252#ifdef DEBUGGING
3280af22 2253 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2254 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2255#endif
02aa26ce 2256 /* handle \E or end of string */
3280af22 2257 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2258 char oldmod;
02aa26ce
NT
2259
2260 /* if at a \E */
3280af22
NIS
2261 if (PL_lex_casemods) {
2262 oldmod = PL_lex_casestack[--PL_lex_casemods];
2263 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2264
3280af22
NIS
2265 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2266 PL_bufptr += 2;
2267 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2268 }
79072805
LW
2269 return ')';
2270 }
3280af22
NIS
2271 if (PL_bufptr != PL_bufend)
2272 PL_bufptr += 2;
2273 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2274 return yylex();
79072805
LW
2275 }
2276 else {
607df283 2277 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2278 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2279 s = PL_bufptr + 1;
79072805
LW
2280 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2281 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2282 if (strchr("LU", *s) &&
3280af22 2283 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2284 {
3280af22 2285 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2286 return ')';
2287 }
3280af22
NIS
2288 if (PL_lex_casemods > 10) {
2289 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2290 if (newlb != PL_lex_casestack) {
a0d0e21e 2291 SAVEFREEPV(newlb);
3280af22 2292 PL_lex_casestack = newlb;
a0d0e21e
LW
2293 }
2294 }
3280af22
NIS
2295 PL_lex_casestack[PL_lex_casemods++] = *s;
2296 PL_lex_casestack[PL_lex_casemods] = '\0';
2297 PL_lex_state = LEX_INTERPCONCAT;
2298 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2299 force_next('(');
2300 if (*s == 'l')
3280af22 2301 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2302 else if (*s == 'u')
3280af22 2303 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2304 else if (*s == 'L')
3280af22 2305 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2306 else if (*s == 'U')
3280af22 2307 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2308 else if (*s == 'Q')
3280af22 2309 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2310 else
cea2e8a9 2311 Perl_croak(aTHX_ "panic: yylex");
3280af22 2312 PL_bufptr = s + 1;
79072805 2313 force_next(FUNC);
3280af22
NIS
2314 if (PL_lex_starts) {
2315 s = PL_bufptr;
2316 PL_lex_starts = 0;
79072805
LW
2317 Aop(OP_CONCAT);
2318 }
2319 else
cea2e8a9 2320 return yylex();
79072805
LW
2321 }
2322
55497cff
PP
2323 case LEX_INTERPPUSH:
2324 return sublex_push();
2325
79072805 2326 case LEX_INTERPSTART:
3280af22 2327 if (PL_bufptr == PL_bufend)
79072805 2328 return sublex_done();
607df283 2329 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2330 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2331 PL_expect = XTERM;
2332 PL_lex_dojoin = (*PL_bufptr == '@');
2333 PL_lex_state = LEX_INTERPNORMAL;
2334 if (PL_lex_dojoin) {
2335 PL_nextval[PL_nexttoke].ival = 0;
79072805 2336 force_next(',');
4d1ff10f 2337#ifdef USE_5005THREADS
533c011a
NIS
2338 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2339 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2340 force_next(PRIVATEREF);
2341#else
a0d0e21e 2342 force_ident("\"", '$');
4d1ff10f 2343#endif /* USE_5005THREADS */
3280af22 2344 PL_nextval[PL_nexttoke].ival = 0;
79072805 2345 force_next('$');
3280af22 2346 PL_nextval[PL_nexttoke].ival = 0;
79072805 2347 force_next('(');
3280af22 2348 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2349 force_next(FUNC);
2350 }
3280af22
NIS
2351 if (PL_lex_starts++) {
2352 s = PL_bufptr;
79072805
LW
2353 Aop(OP_CONCAT);
2354 }
cea2e8a9 2355 return yylex();
79072805
LW
2356
2357 case LEX_INTERPENDMAYBE:
3280af22
NIS
2358 if (intuit_more(PL_bufptr)) {
2359 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2360 break;
2361 }
2362 /* FALL THROUGH */
2363
2364 case LEX_INTERPEND:
3280af22
NIS
2365 if (PL_lex_dojoin) {
2366 PL_lex_dojoin = FALSE;
2367 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2368 return ')';
2369 }
43a16006 2370 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2371 && SvEVALED(PL_lex_repl))
43a16006 2372 {
e9fa98b2 2373 if (PL_bufptr != PL_bufend)
cea2e8a9 2374 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2375 PL_lex_repl = Nullsv;
2376 }
79072805
LW
2377 /* FALLTHROUGH */
2378 case LEX_INTERPCONCAT:
2379#ifdef DEBUGGING
3280af22 2380 if (PL_lex_brackets)
cea2e8a9 2381 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2382#endif
3280af22 2383 if (PL_bufptr == PL_bufend)
79072805
LW
2384 return sublex_done();
2385
3280af22
NIS
2386 if (SvIVX(PL_linestr) == '\'') {
2387 SV *sv = newSVsv(PL_linestr);
2388 if (!PL_lex_inpat)
76e3520e 2389 sv = tokeq(sv);
3280af22 2390 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2391 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2392 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2393 s = PL_bufend;
79072805
LW
2394 }
2395 else {
3280af22 2396 s = scan_const(PL_bufptr);
79072805 2397 if (*s == '\\')
3280af22 2398 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2399 else
3280af22 2400 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2401 }
2402
3280af22
NIS
2403 if (s != PL_bufptr) {
2404 PL_nextval[PL_nexttoke] = yylval;
2405 PL_expect = XTERM;
79072805 2406 force_next(THING);
3280af22 2407 if (PL_lex_starts++)
79072805
LW
2408 Aop(OP_CONCAT);
2409 else {
3280af22 2410 PL_bufptr = s;
cea2e8a9 2411 return yylex();
79072805
LW
2412 }
2413 }
2414
cea2e8a9 2415 return yylex();
a0d0e21e 2416 case LEX_FORMLINE:
3280af22
NIS
2417 PL_lex_state = LEX_NORMAL;
2418 s = scan_formline(PL_bufptr);
2419 if (!PL_lex_formbrack)
a0d0e21e
LW
2420 goto rightbracket;
2421 OPERATOR(';');
79072805
LW
2422 }
2423
3280af22
NIS
2424 s = PL_bufptr;
2425 PL_oldoldbufptr = PL_oldbufptr;
2426 PL_oldbufptr = s;
607df283 2427 DEBUG_T( {
bf49b057
GS
2428 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2429 exp_name[PL_expect], s);
5f80b19c 2430 } );
463ee0b2
LW
2431
2432 retry:
378cc40b
LW
2433 switch (*s) {
2434 default:
7e2040f0 2435 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2436 goto keylookup;
cea2e8a9 2437 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2438 case 4:
2439 case 26:
2440 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2441 case 0:
3280af22
NIS
2442 if (!PL_rsfp) {
2443 PL_last_uni = 0;
2444 PL_last_lop = 0;
2445 if (PL_lex_brackets)
d98d5fff 2446 yyerror("Missing right curly or square bracket");
4e553d73 2447 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2448 "### Tokener got EOF\n");
5f80b19c 2449 } );
79072805 2450 TOKEN(0);
463ee0b2 2451 }
3280af22 2452 if (s++ < PL_bufend)
a687059c 2453 goto retry; /* ignore stray nulls */
3280af22
NIS
2454 PL_last_uni = 0;
2455 PL_last_lop = 0;
2456 if (!PL_in_eval && !PL_preambled) {
2457 PL_preambled = TRUE;
2458 sv_setpv(PL_linestr,incl_perldb());
2459 if (SvCUR(PL_linestr))
2460 sv_catpv(PL_linestr,";");
2461 if (PL_preambleav){
2462 while(AvFILLp(PL_preambleav) >= 0) {
2463 SV *tmpsv = av_shift(PL_preambleav);
2464 sv_catsv(PL_linestr, tmpsv);
2465 sv_catpv(PL_linestr, ";");
91b7def8
PP
2466 sv_free(tmpsv);
2467 }
3280af22
NIS
2468 sv_free((SV*)PL_preambleav);
2469 PL_preambleav = NULL;
91b7def8 2470 }
3280af22
NIS
2471 if (PL_minus_n || PL_minus_p) {
2472 sv_catpv(PL_linestr, "LINE: while (<>) {");
2473 if (PL_minus_l)
2474 sv_catpv(PL_linestr,"chomp;");
2475 if (PL_minus_a) {
3280af22
NIS
2476 if (PL_minus_F) {
2477 if (strchr("/'\"", *PL_splitstr)
2478 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2479 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121
PP
2480 else {
2481 char delim;
2482 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2483 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2484 delim = *s;
75c72d73 2485 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c",
46fc3d4c 2486 "q" + (delim == '\''), delim);
3280af22 2487 for (s = PL_splitstr; *s; s++) {
54310121 2488 if (*s == '\\')
3280af22
NIS
2489 sv_catpvn(PL_linestr, "\\", 1);
2490 sv_catpvn(PL_linestr, s, 1);
54310121 2491 }
cea2e8a9 2492 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2493 }
2304df62
AD
2494 }
2495 else
75c72d73 2496 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2497 }
79072805 2498 }
3280af22
NIS
2499 sv_catpv(PL_linestr, "\n");
2500 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2501 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2502 PL_last_lop = PL_last_uni = Nullch;
3280af22 2503 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2504 SV *sv = NEWSV(85,0);
2505
2506 sv_upgrade(sv, SVt_PVMG);
3280af22 2507 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2508 (void)SvIOK_on(sv);
2509 SvIVX(sv) = 0;
57843af0 2510 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2511 }
79072805 2512 goto retry;
a687059c 2513 }
e929a76b 2514 do {
aa7440fb 2515 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2516 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2517 fake_eof:
2518 if (PL_rsfp) {
2519 if (PL_preprocess && !PL_in_eval)
2520 (void)PerlProc_pclose(PL_rsfp);
2521 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2522 PerlIO_clearerr(PL_rsfp);
2523 else
2524 (void)PerlIO_close(PL_rsfp);
2525 PL_rsfp = Nullfp;
2526 PL_doextract = FALSE;
2527 }
2528 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2529 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2530 sv_catpv(PL_linestr,";}");
2531 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2532 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2533 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2534 PL_minus_n = PL_minus_p = 0;
2535 goto retry;
2536 }
2537 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2538 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2539 sv_setpv(PL_linestr,"");
2540 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2541 }
2542 /* if it looks like the start of a BOM, check if it in fact is */
2543 else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
226017aa 2544#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2545# ifdef __GNU_LIBRARY__
2546# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2547# define FTELL_FOR_PIPE_IS_BROKEN
2548# endif
e3f494f1
JH
2549# else
2550# ifdef __GLIBC__
2551# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2552# define FTELL_FOR_PIPE_IS_BROKEN
2553# endif
2554# endif
226017aa
DD
2555# endif
2556#endif
2557#ifdef FTELL_FOR_PIPE_IS_BROKEN
2558 /* This loses the possibility to detect the bof
2559 * situation on perl -P when the libc5 is being used.
2560 * Workaround? Maybe attach some extra state to PL_rsfp?
2561 */
2562 if (!PL_preprocess)
7e28d3af 2563 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2564#else
7e28d3af 2565 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2566#endif
7e28d3af 2567 if (bof) {
3280af22 2568 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2569 s = swallow_bom((U8*)s);
e929a76b 2570 }
378cc40b 2571 }
3280af22 2572 if (PL_doextract) {
a0d0e21e 2573 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2574 PL_doextract = FALSE;
a0d0e21e
LW
2575
2576 /* Incest with pod. */
2577 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2578 sv_setpv(PL_linestr, "");
2579 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2580 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2581 PL_last_lop = PL_last_uni = Nullch;
3280af22 2582 PL_doextract = FALSE;
a0d0e21e 2583 }
4e553d73 2584 }
463ee0b2 2585 incline(s);
3280af22
NIS
2586 } while (PL_doextract);
2587 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2588 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2589 SV *sv = NEWSV(85,0);
a687059c 2590
93a17b20 2591 sv_upgrade(sv, SVt_PVMG);
3280af22 2592 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2593 (void)SvIOK_on(sv);
2594 SvIVX(sv) = 0;
57843af0 2595 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2596 }
3280af22 2597 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2598 PL_last_lop = PL_last_uni = Nullch;
57843af0 2599 if (CopLINE(PL_curcop) == 1) {
3280af22 2600 while (s < PL_bufend && isSPACE(*s))
79072805 2601 s++;
a0d0e21e 2602 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2603 s++;
44a8e56a 2604 d = Nullch;
3280af22 2605 if (!PL_in_eval) {
44a8e56a
PP
2606 if (*s == '#' && *(s+1) == '!')
2607 d = s + 2;
2608#ifdef ALTERNATE_SHEBANG
2609 else {
2610 static char as[] = ALTERNATE_SHEBANG;
2611 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2612 d = s + (sizeof(as) - 1);
2613 }
2614#endif /* ALTERNATE_SHEBANG */
2615 }
2616 if (d) {
b8378b72 2617 char *ipath;
774d564b 2618 char *ipathend;
b8378b72 2619
774d564b 2620 while (isSPACE(*d))
b8378b72
CS
2621 d++;
2622 ipath = d;
774d564b
PP
2623 while (*d && !isSPACE(*d))
2624 d++;
2625 ipathend = d;
2626
2627#ifdef ARG_ZERO_IS_SCRIPT
2628 if (ipathend > ipath) {
2629 /*
2630 * HP-UX (at least) sets argv[0] to the script name,
2631 * which makes $^X incorrect. And Digital UNIX and Linux,
2632 * at least, set argv[0] to the basename of the Perl
2633 * interpreter. So, having found "#!", we'll set it right.
2634 */
ee2f7564 2635 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2636 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2637 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2638 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2639 SvSETMAGIC(x);
2640 }
774d564b 2641 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2642 }
774d564b 2643#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2644
2645 /*
2646 * Look for options.
2647 */
748a9306 2648 d = instr(s,"perl -");
84e30d1a 2649 if (!d) {
748a9306 2650 d = instr(s,"perl");
84e30d1a
GS
2651#if defined(DOSISH)
2652 /* avoid getting into infinite loops when shebang
2653 * line contains "Perl" rather than "perl" */
2654 if (!d) {
2655 for (d = ipathend-4; d >= ipath; --d) {
2656 if ((*d == 'p' || *d == 'P')
2657 && !ibcmp(d, "perl", 4))
2658 {
2659 break;
2660 }
2661 }
2662 if (d < ipath)
2663 d = Nullch;
2664 }
2665#endif
2666 }
44a8e56a
PP
2667#ifdef ALTERNATE_SHEBANG
2668 /*
2669 * If the ALTERNATE_SHEBANG on this system starts with a
2670 * character that can be part of a Perl expression, then if
2671 * we see it but not "perl", we're probably looking at the
2672 * start of Perl code, not a request to hand off to some
2673 * other interpreter. Similarly, if "perl" is there, but
2674 * not in the first 'word' of the line, we assume the line
2675 * contains the start of the Perl program.
44a8e56a
PP
2676 */
2677 if (d && *s != '#') {
774d564b 2678 char *c = ipath;
44a8e56a
PP
2679 while (*c && !strchr("; \t\r\n\f\v#", *c))
2680 c++;
2681 if (c < d)
2682 d = Nullch; /* "perl" not in first word; ignore */
2683 else
2684 *s = '#'; /* Don't try to parse shebang line */
2685 }
774d564b 2686#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2687#ifndef MACOS_TRADITIONAL
748a9306 2688 if (!d &&
44a8e56a 2689 *s == '#' &&
774d564b 2690 ipathend > ipath &&
3280af22 2691 !PL_minus_c &&
748a9306 2692 !instr(s,"indir") &&
3280af22 2693 instr(PL_origargv[0],"perl"))
748a9306 2694 {
9f68db38 2695 char **newargv;
9f68db38 2696
774d564b
PP
2697 *ipathend = '\0';
2698 s = ipathend + 1;
3280af22 2699 while (s < PL_bufend && isSPACE(*s))
9f68db38 2700 s++;
3280af22
NIS
2701 if (s < PL_bufend) {
2702 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2703 newargv[1] = s;
3280af22 2704 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2705 s++;
2706 *s = '\0';
3280af22 2707 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2708 }
2709 else
3280af22 2710 newargv = PL_origargv;
774d564b 2711 newargv[0] = ipath;
b4748376 2712 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2713 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2714 }
bf4acbe4 2715#endif
748a9306 2716 if (d) {
3280af22
NIS
2717 U32 oldpdb = PL_perldb;
2718 bool oldn = PL_minus_n;
2719 bool oldp = PL_minus_p;
748a9306
LW
2720
2721 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2722 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2723
2724 if (*d++ == '-') {
a11ec5a9 2725 bool switches_done = PL_doswitches;
8cc95fdb
PP
2726 do {
2727 if (*d == 'M' || *d == 'm') {
2728 char *m = d;
2729 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2730 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2731 (int)(d - m), m);
2732 }
2733 d = moreswitches(d);
2734 } while (d);
155aba94
GS
2735 if ((PERLDB_LINE && !oldpdb) ||
2736 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2737 /* if we have already added "LINE: while (<>) {",
2738 we must not do it again */
748a9306 2739 {
3280af22
NIS
2740 sv_setpv(PL_linestr, "");
2741 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2742 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2743 PL_last_lop = PL_last_uni = Nullch;
3280af22 2744 PL_preambled = FALSE;
84902520 2745 if (PERLDB_LINE)
3280af22 2746 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2747 goto retry;
2748 }
a11ec5a9
RGS
2749 if (PL_doswitches && !switches_done) {
2750 int argc = PL_origargc;
2751 char **argv = PL_origargv;
2752 do {
2753 argc--,argv++;
2754 } while (argc && argv[0][0] == '-' && argv[0][1]);
2755 init_argv_symbols(argc,argv);
2756 }
a0d0e21e 2757 }
79072805 2758 }
9f68db38 2759 }
79072805 2760 }
3280af22
NIS
2761 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2762 PL_bufptr = s;
2763 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2764 return yylex();
ae986130 2765 }
378cc40b 2766 goto retry;
4fdae800 2767 case '\r':
6a27c188 2768#ifdef PERL_STRICT_CR
cea2e8a9 2769 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2770 Perl_croak(aTHX_
cc507455 2771 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2772#endif
4fdae800 2773 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2774#ifdef MACOS_TRADITIONAL
2775 case '\312':
2776#endif
378cc40b
LW
2777 s++;
2778 goto retry;
378cc40b 2779 case '#':
e929a76b 2780 case '\n':
3280af22 2781 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2782 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2783 /* handle eval qq[#line 1 "foo"\n ...] */
2784 CopLINE_dec(PL_curcop);
2785 incline(s);
2786 }
3280af22 2787 d = PL_bufend;
a687059c 2788 while (s < d && *s != '\n')
378cc40b 2789 s++;
0f85fab0 2790 if (s < d)
378cc40b 2791 s++;
78c267c1 2792 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2793 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2794 incline(s);
3280af22
NIS
2795 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2796 PL_bufptr = s;
2797 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2798 return yylex();
a687059c 2799 }
378cc40b 2800 }
a687059c 2801 else {
378cc40b 2802 *s = '\0';
3280af22 2803 PL_bufend = s;
a687059c 2804 }
378cc40b
LW
2805 goto retry;
2806 case '-':
79072805 2807 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2808 I32 ftst = 0;
2809
378cc40b 2810 s++;
3280af22 2811 PL_bufptr = s;
748a9306
LW
2812 tmp = *s++;
2813
bf4acbe4 2814 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2815 s++;
2816
2817 if (strnEQ(s,"=>",2)) {
3280af22 2818 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2819 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2820 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 2821 } );
748a9306
LW
2822 OPERATOR('-'); /* unary minus */
2823 }
3280af22 2824 PL_last_uni = PL_oldbufptr;
748a9306 2825 switch (tmp) {
e5edeb50
JH
2826 case 'r': ftst = OP_FTEREAD; break;
2827 case 'w': ftst = OP_FTEWRITE; break;
2828 case 'x': ftst = OP_FTEEXEC; break;
2829 case 'o': ftst = OP_FTEOWNED; break;
2830 case 'R': ftst = OP_FTRREAD; break;
2831 case 'W': ftst = OP_FTRWRITE; break;
2832 case 'X': ftst = OP_FTREXEC; break;
2833 case 'O': ftst = OP_FTROWNED; break;
2834 case 'e': ftst = OP_FTIS; break;
2835 case 'z': ftst = OP_FTZERO; break;
2836 case 's': ftst = OP_FTSIZE; break;
2837 case 'f': ftst = OP_FTFILE; break;
2838 case 'd': ftst = OP_FTDIR; break;
2839 case 'l': ftst = OP_FTLINK; break;
2840 case 'p': ftst = OP_FTPIPE; break;
2841 case 'S': ftst = OP_FTSOCK; break;
2842 case 'u': ftst = OP_FTSUID; break;
2843 case 'g': ftst = OP_FTSGID; break;
2844 case 'k': ftst = OP_FTSVTX; break;
2845 case 'b': ftst = OP_FTBLK; break;
2846 case 'c': ftst = OP_FTCHR; break;
2847 case 't': ftst = OP_FTTTY; break;
2848 case 'T': ftst = OP_FTTEXT; break;
2849 case 'B': ftst = OP_FTBINARY; break;
2850 case 'M': case 'A': case 'C':
2851 gv_fetchpv("\024",TRUE, SVt_PV);
2852 switch (tmp) {
2853 case 'M': ftst = OP_FTMTIME; break;
2854 case 'A': ftst = OP_FTATIME; break;
2855 case 'C': ftst = OP_FTCTIME; break;
2856 default: break;
2857 }
2858 break;
378cc40b 2859 default:
378cc40b
LW
2860 break;
2861 }
e5edeb50
JH
2862 if (ftst) {
2863 PL_last_lop_op = ftst;
4e553d73 2864 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2865 "### Saw file test %c\n", (int)ftst);
5f80b19c 2866 } );
e5edeb50
JH
2867 FTST(ftst);
2868 }
2869 else {
2870 /* Assume it was a minus followed by a one-letter named
2871 * subroutine call (or a -bareword), then. */
95c31fe3 2872 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848
RB
2873 "### %c looked like a file test but was not\n",
2874 (int)ftst);
5f80b19c 2875 } );
e5edeb50
JH
2876 s -= 2;
2877 }
378cc40b 2878 }
a687059c
LW
2879 tmp = *s++;
2880 if (*s == tmp) {
2881 s++;
3280af22 2882 if (PL_expect == XOPERATOR)
79072805
LW
2883 TERM(POSTDEC);
2884 else
2885 OPERATOR(PREDEC);
2886 }
2887 else if (*s == '>') {
2888 s++;
2889 s = skipspace(s);
7e2040f0 2890 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2891 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2892 TOKEN(ARROW);
79072805 2893 }
748a9306
LW
2894 else if (*s == '$')
2895 OPERATOR(ARROW);
463ee0b2 2896 else
748a9306 2897 TERM(ARROW);
a687059c 2898 }
3280af22 2899 if (PL_expect == XOPERATOR)
79072805
LW
2900 Aop(OP_SUBTRACT);
2901 else {
3280af22 2902 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2903 check_uni();
79072805 2904 OPERATOR('-'); /* unary minus */
2f3197b3 2905 }
79072805 2906
378cc40b 2907 case '+':
a687059c
LW
2908 tmp = *s++;
2909 if (*s == tmp) {
378cc40b 2910 s++;
3280af22 2911 if (PL_expect == XOPERATOR)
79072805
LW
2912 TERM(POSTINC);
2913 else
2914 OPERATOR(PREINC);
378cc40b 2915 }
3280af22 2916 if (PL_expect == XOPERATOR)
79072805
LW
2917 Aop(OP_ADD);
2918 else {
3280af22 2919 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2920 check_uni();
a687059c 2921 OPERATOR('+');
2f3197b3 2922 }
a687059c 2923
378cc40b 2924 case '*':
3280af22
NIS
2925 if (PL_expect != XOPERATOR) {
2926 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2927 PL_expect = XOPERATOR;
2928 force_ident(PL_tokenbuf, '*');
2929 if (!*PL_tokenbuf)
a0d0e21e 2930 PREREF('*');
79072805 2931 TERM('*');
a687059c 2932 }
79072805
LW
2933 s++;
2934 if (*s == '*') {
a687059c 2935 s++;
79072805 2936 PWop(OP_POW);
a687059c 2937 }
79072805
LW
2938 Mop(OP_MULTIPLY);
2939
378cc40b 2940 case '%':
3280af22 2941 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2942 ++s;
2943 Mop(OP_MODULO);
a687059c 2944 }
3280af22
NIS
2945 PL_tokenbuf[0] = '%';
2946 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2947 if (!PL_tokenbuf[1]) {
2948 if (s == PL_bufend)
bbce6d69
PP
2949 yyerror("Final % should be \\% or %name");
2950 PREREF('%');
a687059c 2951 }
3280af22 2952 PL_pending_ident = '%';
bbce6d69 2953 TERM('%');
a687059c 2954
378cc40b 2955 case '^':
79072805 2956 s++;
a0d0e21e 2957 BOop(OP_BIT_XOR);
79072805 2958 case '[':
3280af22 2959 PL_lex_brackets++;
79072805 2960 /* FALL THROUGH */
378cc40b 2961 case '~':
378cc40b 2962 case ',':
378cc40b
LW
2963 tmp = *s++;
2964 OPERATOR(tmp);
a0d0e21e
LW
2965 case ':':
2966 if (s[1] == ':') {
2967 len = 0;
2968 goto just_a_word;
2969 }
2970 s++;
09bef843
SB
2971 switch (PL_expect) {
2972 OP *attrs;
2973 case XOPERATOR:
2974 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2975 break;
2976 PL_bufptr = s; /* update in case we back off */
2977 goto grabattrs;
2978 case XATTRBLOCK:
2979 PL_expect = XBLOCK;
2980 goto grabattrs;
2981 case XATTRTERM:
2982 PL_expect = XTERMBLOCK;
2983 grabattrs:
2984 s = skipspace(s);
2985 attrs = Nullop;
7e2040f0 2986 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2987 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2988 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2989 if (tmp < 0) tmp = -tmp;
2990 switch (tmp) {
2991 case KEY_or:
2992 case KEY_and:
2993 case KEY_for:
2994 case KEY_unless:
2995 case KEY_if:
2996 case KEY_while:
2997 case KEY_until:
2998 goto got_attrs;
2999 default:
3000 break;
3001 }
3002 }
09bef843
SB
3003 if (*d == '(') {
3004 d = scan_str(d,TRUE,TRUE);
3005 if (!d) {
09bef843
SB
3006 /* MUST advance bufptr here to avoid bogus
3007 "at end of line" context messages from yyerror().
3008 */
3009 PL_bufptr = s + len;
3010 yyerror("Unterminated attribute parameter in attribute list");
3011 if (attrs)
3012 op_free(attrs);
3013 return 0; /* EOF indicator */
3014 }
3015 }
3016 if (PL_lex_stuff) {
3017 SV *sv = newSVpvn(s, len);
3018 sv_catsv(sv, PL_lex_stuff);
3019 attrs = append_elem(OP_LIST, attrs,
3020 newSVOP(OP_CONST, 0, sv));
3021 SvREFCNT_dec(PL_lex_stuff);
3022 PL_lex_stuff = Nullsv;
3023 }
3024 else {
78f9721b
SM
3025 if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3026 CvLVALUE_on(PL_compcv);
3027 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3028 CvLOCKED_on(PL_compcv);
3029 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3030 CvMETHOD_on(PL_compcv);
87ecf892 3031#ifdef USE_ITHREADS
c8a3bf85 3032 else if (PL_in_my == KEY_our && len == 6 && strnEQ(s, "unique", len))
7fb37951 3033 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
87ecf892 3034#endif
78f9721b
SM
3035 /* After we've set the flags, it could be argued that
3036 we don't need to do the attributes.pm-based setting
3037 process, and shouldn't bother appending recognized
3038 flags. To experiment with that, uncomment the
3039 following "else": */
0256094b 3040 else
78f9721b
SM
3041 attrs = append_elem(OP_LIST, attrs,
3042 newSVOP(OP_CONST, 0,
3043 newSVpvn(s, len)));
09bef843
SB
3044 }
3045 s = skipspace(d);
0120eecf 3046 if (*s == ':' && s[1] != ':')
09bef843 3047 s = skipspace(s+1);
0120eecf
GS
3048 else if (s == d)
3049 break; /* require real whitespace or :'s */
09bef843 3050 }
f9829d6b
GS
3051 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3052 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3053 char q = ((*s == '\'') ? '"' : '\'');
3054 /* If here for an expression, and parsed no attrs, back off. */
3055 if (tmp == '=' && !attrs) {
3056 s = PL_bufptr;
3057 break;
3058 }
3059 /* MUST advance bufptr here to avoid bogus "at end of line"
3060 context messages from yyerror().
3061 */
3062 PL_bufptr = s;
3063 if (!*s)
3064 yyerror("Unterminated attribute list");
3065 else
3066 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3067 q, *s, q));
3068 if (attrs)
3069 op_free(attrs);
3070 OPERATOR(':');
3071 }
f9829d6b 3072 got_attrs:
09bef843
SB
3073 if (attrs) {
3074 PL_nextval[PL_nexttoke].opval = attrs;
3075 force_next(THING);
3076 }
3077 TOKEN(COLONATTR);
3078 }
a0d0e21e 3079 OPERATOR(':');
8990e307
LW
3080 case '(':
3081 s++;
3280af22
NIS
3082 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3083 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3084 else
3280af22 3085 PL_expect = XTERM;
a0d0e21e 3086 TOKEN('(');
378cc40b 3087 case ';':
f4dd75d9 3088 CLINE;
378cc40b
LW
3089 tmp = *s++;
3090 OPERATOR(tmp);
3091 case ')':
378cc40b 3092 tmp = *s++;
16d20bd9
AD
3093 s = skipspace(s);
3094 if (*s == '{')
3095 PREBLOCK(tmp);
378cc40b 3096 TERM(tmp);
79072805
LW
3097 case ']':
3098 s++;
3280af22 3099 if (PL_lex_brackets <= 0)
d98d5fff 3100 yyerror("Unmatched right square bracket");
463ee0b2 3101 else
3280af22
NIS
3102 --PL_lex_brackets;
3103 if (PL_lex_state == LEX_INTERPNORMAL) {
3104 if (PL_lex_brackets == 0) {
a0d0e21e 3105 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3106 PL_lex_state = LEX_INTERPEND;
79072805
LW
3107 }
3108 }
4633a7c4 3109 TERM(']');
79072805
LW
3110 case '{':
3111 leftbracket:
79072805 3112 s++;
3280af22
NIS
3113 if (PL_lex_brackets > 100) {
3114 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3115 if (newlb != PL_lex_brackstack) {
8990e307 3116 SAVEFREEPV(newlb);
3280af22 3117 PL_lex_brackstack = newlb;
8990e307
LW
3118 }
3119 }
3280af22 3120 switch (PL_expect) {
a0d0e21e 3121 case XTERM:
3280af22 3122 if (PL_lex_formbrack) {
a0d0e21e
LW
3123 s--;
3124 PRETERMBLOCK(DO);
3125 }
3280af22
NIS
3126 if (PL_oldoldbufptr == PL_last_lop)
3127 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3128 else
3280af22 3129 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3130 OPERATOR(HASHBRACK);
a0d0e21e 3131 case XOPERATOR:
bf4acbe4 3132 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3133 s++;
44a8e56a 3134 d = s;
3280af22
NIS
3135 PL_tokenbuf[0] = '\0';
3136 if (d < PL_bufend && *d == '-') {
3137 PL_tokenbuf[0] = '-';
44a8e56a 3138 d++;
bf4acbe4 3139 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3140 d++;
3141 }
7e2040f0 3142 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3143 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3144 FALSE, &len);
bf4acbe4 3145 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3146 d++;
3147 if (*d == '}') {
3280af22 3148 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3149 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3150 if (minus)
3151 force_next('-');
748a9306
LW
3152 }
3153 }
3154 /* FALL THROUGH */
09bef843 3155 case XATTRBLOCK:
748a9306 3156 case XBLOCK:
3280af22
NIS
3157 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3158 PL_expect = XSTATE;
a0d0e21e 3159 break;
09bef843 3160 case XATTRTERM:
a0d0e21e 3161 case XTERMBLOCK:
3280af22
NIS
3162 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3163 PL_expect = XSTATE;
a0d0e21e
LW
3164 break;
3165 default: {
3166 char *t;
3280af22
NIS
3167 if (PL_oldoldbufptr == PL_last_lop)
3168 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3169 else
3280af22 3170 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3171 s = skipspace(s);
8452ff4b
SB
3172 if (*s == '}') {
3173 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3174 PL_expect = XTERM;
3175 /* This hack is to get the ${} in the message. */
3176 PL_bufptr = s+1;
3177 yyerror("syntax error");
3178 break;
3179 }
a0d0e21e 3180 OPERATOR(HASHBRACK);
8452ff4b 3181 }
b8a4b1be
GS
3182 /* This hack serves to disambiguate a pair of curlies
3183 * as being a block or an anon hash. Normally, expectation
3184 * determines that, but in cases where we're not in a
3185 * position to expect anything in particular (like inside
3186 * eval"") we have to resolve the ambiguity. This code
3187 * covers the case where the first term in the curlies is a
3188 * quoted string. Most other cases need to be explicitly
3189 * disambiguated by prepending a `+' before the opening
3190 * curly in order to force resolution as an anon hash.
3191 *
3192 * XXX should probably propagate the outer expectation
3193 * into eval"" to rely less on this hack, but that could
3194 * potentially break current behavior of eval"".
3195 * GSAR 97-07-21
3196 */
3197 t = s;
3198 if (*s == '\'' || *s == '"' || *s == '`') {
3199 /* common case: get past first string, handling escapes */
3280af22 3200 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3201 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3202 t++;
3203 t++;
a0d0e21e 3204 }
b8a4b1be 3205 else if (*s == 'q') {
3280af22 3206 if (++t < PL_bufend
b8a4b1be 3207 && (!isALNUM(*t)
3280af22 3208 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3209 && !isALNUM(*t))))
3210 {
b8a4b1be
GS
3211 char *tmps;
3212 char open, close, term;
3213 I32 brackets = 1;
3214
3280af22 3215 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3216 t++;
3217 term = *t;
3218 open = term;
3219 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3220 term = tmps[5];
3221 close = term;
3222 if (open == close)
3280af22
NIS
3223 for (t++; t < PL_bufend; t++) {
3224 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3225 t++;
6d07e5e9 3226 else if (*t == open)
b8a4b1be
GS
3227 break;
3228 }
3229 else
3280af22
NIS
3230 for (t++; t < PL_bufend; t++) {
3231 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3232 t++;
6d07e5e9 3233 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3234 break;
3235 else if (*t == open)
3236 brackets++;
3237 }
3238 }
3239 t++;
a0d0e21e 3240 }
7e2040f0 3241 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3242 t += UTF8SKIP(t);
7e2040f0 3243 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3244 t += UTF8SKIP(t);
a0d0e21e 3245 }
3280af22 3246 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3247 t++;
b8a4b1be
GS
3248 /* if comma follows first term, call it an anon hash */
3249 /* XXX it could be a comma expression with loop modifiers */
3280af22 3250 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3251 || (*t == '=' && t[1] == '>')))
a0d0e21e 3252 OPERATOR(HASHBRACK);
3280af22 3253 if (PL_expect == XREF)
4e4e412b 3254 PL_expect = XTERM;
a0d0e21e 3255 else {
3280af22
NIS
3256 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3257 PL_expect = XSTATE;
a0d0e21e 3258 }
8990e307 3259 }
a0d0e21e 3260 break;
463ee0b2 3261 }
57843af0 3262 yylval.ival = CopLINE(PL_curcop);
79072805 3263 if (isSPACE(*s) || *s == '#')
3280af22 3264 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3265 TOKEN('{');
378cc40b 3266 case '}':
79072805
LW
3267 rightbracket:
3268 s++;
3280af22 3269 if (PL_lex_brackets <= 0)
d98d5fff 3270 yyerror("Unmatched right curly bracket");
463ee0b2 3271 else
3280af22 3272 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3273 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3274 PL_lex_formbrack = 0;
3275 if (PL_lex_state == LEX_INTERPNORMAL) {
3276 if (PL_lex_brackets == 0) {
9059aa12
LW
3277 if (PL_expect & XFAKEBRACK) {
3278 PL_expect &= XENUMMASK;
3280af22
NIS
3279 PL_lex_state = LEX_INTERPEND;
3280 PL_bufptr = s;
cea2e8a9 3281 return yylex(); /* ignore fake brackets */
79072805 3282 }
fa83b5b6 3283 if (*s == '-' && s[1] == '>')
3280af22 3284 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3285 else if (*s != '[' && *s != '{')
3280af22 3286 PL_lex_state = LEX_INTERPEND;
79072805
LW
3287 }
3288 }
9059aa12
LW
3289 if (PL_expect & XFAKEBRACK) {
3290 PL_expect &= XENUMMASK;
3280af22 3291 PL_bufptr = s;
cea2e8a9 3292 return yylex(); /* ignore fake brackets */
748a9306 3293 }
79072805
LW
3294 force_next('}');
3295 TOKEN(';');
378cc40b
LW
3296 case '&':
3297 s++;
3298 tmp = *s++;
3299 if (tmp == '&')
a0d0e21e 3300 AOPERATOR(ANDAND);
378cc40b 3301 s--;
3280af22 3302 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3303 if (ckWARN(WARN_SEMICOLON)
3304 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3305 {
57843af0 3306 CopLINE_dec(PL_curcop);
cea2e8a9 3307 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3308 CopLINE_inc(PL_curcop);
463ee0b2 3309 }
79072805 3310 BAop(OP_BIT_AND);
463ee0b2 3311 }
79072805 3312
3280af22
NIS
3313 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3314 if (*PL_tokenbuf) {
3315 PL_expect = XOPERATOR;
3316 force_ident(PL_tokenbuf, '&');
463ee0b2 3317 }
79072805
LW
3318 else
3319 PREREF('&');
c07a80fd 3320 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3321 TERM('&');
3322
378cc40b
LW
3323 case '|':
3324 s++;
3325 tmp = *s++;
3326 if (tmp == '|')
a0d0e21e 3327 AOPERATOR(OROR);
378cc40b 3328 s--;
79072805 3329 BOop(OP_BIT_OR);
378cc40b
LW
3330 case '=':
3331 s++;
3332 tmp = *s++;
3333 if (tmp == '=')
79072805
LW
3334 Eop(OP_EQ);
3335 if (tmp == '>')
3336 OPERATOR(',');
378cc40b 3337 if (tmp == '~')
79072805 3338 PMop(OP_MATCH);
599cee73 3339 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3340 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3341 s--;
3280af22
NIS
3342 if (PL_expect == XSTATE && isALPHA(tmp) &&
3343 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3344 {
3280af22
NIS
3345 if (PL_in_eval && !PL_rsfp) {
3346 d = PL_bufend;
a5f75d66
AD
3347 while (s < d) {
3348 if (*s++ == '\n') {
3349 incline(s);
3350 if (strnEQ(s,"=cut",4)) {
3351 s = strchr(s,'\n');
3352 if (s)
3353 s++;
3354 else
3355 s = d;
3356 incline(s);
3357 goto retry;
3358 }
3359 }
3360 }
3361 goto retry;
3362 }
3280af22
NIS
3363 s = PL_bufend;
3364 PL_doextract = TRUE;
a0d0e21e
LW
3365 goto retry;
3366 }
3280af22 3367 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3368 char *t;
51882d45 3369#ifdef PERL_STRICT_CR
bf4acbe4 3370 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3371#else
bf4acbe4 3372 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3373#endif
a0d0e21e
LW
3374 if (*t == '\n' || *t == '#') {
3375 s--;
3280af22 3376 PL_expect = XBLOCK;
a0d0e21e
LW
3377 goto leftbracket;
3378 }
79072805 3379 }
a0d0e21e
LW
3380 yylval.ival = 0;
3381 OPERATOR(ASSIGNOP);
378cc40b
LW
3382 case '!':
3383 s++;
3384 tmp = *s++;
3385 if (tmp == '=')
79072805 3386 Eop(OP_NE);
378cc40b 3387 if (tmp == '~')
79072805 3388 PMop(OP_NOT);
378cc40b
LW
3389 s--;
3390 OPERATOR('!');
3391 case '<':
3280af22 3392 if (PL_expect != XOPERATOR) {
93a17b20 3393 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3394 check_uni();
79072805
LW
3395 if (s[1] == '<')
3396 s = scan_heredoc(s);
3397 else
3398 s = scan_inputsymbol(s);
3399 TERM(sublex_start());
378cc40b
LW
3400 }
3401 s++;
3402 tmp = *s++;
3403 if (tmp == '<')
79072805 3404 SHop(OP_LEFT_SHIFT);
395c3793
LW
3405 if (tmp == '=') {
3406 tmp = *s++;
3407 if (tmp == '>')
79072805 3408 Eop(OP_NCMP);
395c3793 3409 s--;
79072805 3410 Rop(OP_LE);
395c3793 3411 }
378cc40b 3412 s--;
79072805 3413 Rop(OP_LT);
378cc40b
LW
3414 case '>':
3415 s++;
3416 tmp = *s++;
3417 if (tmp == '>')
79072805 3418 SHop(OP_RIGHT_SHIFT);
378cc40b 3419 if (tmp == '=')
79072805 3420 Rop(OP_GE);
378cc40b 3421 s--;
79072805 3422 Rop(OP_GT);
378cc40b
LW
3423
3424 case '$':
bbce6d69
PP
3425 CLINE;
3426
3280af22
NIS
3427 if (PL_expect == XOPERATOR) {
3428 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3429 PL_expect = XTERM;
a0d0e21e 3430 depcom();
bbce6d69 3431 return ','; /* grandfather non-comma-format format */
a0d0e21e 3432 }
8990e307 3433 }
a0d0e21e 3434
7e2040f0 3435 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3436 PL_tokenbuf[0] = '@';
376b8730
SM
3437 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3438 sizeof PL_tokenbuf - 1, FALSE);
3439 if (PL_expect == XOPERATOR)
3440 no_op("Array length", s);
3280af22 3441 if (!PL_tokenbuf[1])
a0d0e21e 3442 PREREF(DOLSHARP);
3280af22
NIS
3443 PL_expect = XOPERATOR;
3444 PL_pending_ident = '#';
463ee0b2 3445 TOKEN(DOLSHARP);
79072805 3446 }
bbce6d69 3447
3280af22 3448 PL_tokenbuf[0] = '$';
376b8730
SM
3449 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3450 sizeof PL_tokenbuf - 1, FALSE);
3451 if (PL_expect == XOPERATOR)
3452 no_op("Scalar", s);
3280af22
NIS
3453 if (!PL_tokenbuf[1]) {
3454 if (s == PL_bufend)
bbce6d69
PP
3455 yyerror("Final $ should be \\$ or $name");
3456 PREREF('$');
8990e307 3457 }
a0d0e21e 3458
bbce6d69 3459 /* This kludge not intended to be bulletproof. */
3280af22 3460 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3461 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3462 newSViv(PL_compiling.cop_arybase));
bbce6d69
PP
3463 yylval.opval->op_private = OPpCONST_ARYBASE;
3464 TERM(THING);
3465 }
3466
ff68c719 3467 d = s;
69d2bceb 3468 tmp = (I32)*s;
3280af22 3469 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3470 s = skipspace(s);
3471
3280af22 3472 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3473 char *t;
3474 if (*s == '[') {
3280af22 3475 PL_tokenbuf[0] = '@';
599cee73 3476 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3477 for(t = s + 1;
7e2040f0 3478 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3479 t++) ;
a0d0e21e 3480 if (*t++ == ',') {
3280af22
NIS
3481 PL_bufptr = skipspace(PL_bufptr);
3482 while (t < PL_bufend && *t != ']')
bbce6d69 3483 t++;
cea2e8a9 3484 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3485 "Multidimensional syntax %.*s not supported",
3486 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3487 }
3488 }
bbce6d69
PP
3489 }
3490 else if (*s == '{') {
3280af22 3491 PL_tokenbuf[0] = '%';
599cee73 3492 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69
PP
3493 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3494 {
3280af22 3495 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3496 STRLEN len;
3497 for (t++; isSPACE(*t); t++) ;
7e2040f0 3498 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3499 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3500 for (; isSPACE(*t); t++) ;
864dbfa3 3501 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3502 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3503 "You need to quote \"%s\"", tmpbuf);
748a9306 3504 }
93a17b20
LW
3505 }
3506 }
2f3197b3 3507 }
bbce6d69 3508
3280af22 3509 PL_expect = XOPERATOR;
69d2bceb 3510 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3511 bool islop = (PL_last_lop == PL_oldoldbufptr);
3512 if (!islop || PL_last_lop_op == OP_GREPSTART)
3513 PL_expect = XOPERATOR;
bbce6d69 3514 else if (strchr("$@\"'`q", *s))
3280af22 3515 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0