This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Turn off gcc-2.95's strict-alias optimization.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4eb8286e 3 * Copyright (c) 1991-1999, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543
GS
30static void restore_rsfp(pTHXo_ void *f);
31static void restore_expect(pTHXo_ void *e);
32static void restore_lex_expect(pTHXo_ void *e);
33
a0ed51b3 34#define UTF (PL_hints & HINT_UTF8)
834a4ddd
LW
35/*
36 * Note: we try to be careful never to call the isXXX_utf8() functions
37 * unless we're pretty sure we've seen the beginning of a UTF-8 character
38 * (that is, the two high bits are set). Otherwise we risk loading in the
39 * heavy-duty SWASHINIT and SWASHGET routines unnecessarily.
40 */
41#define isIDFIRST_lazy(p) ((!UTF || (*((U8*)p) < 0xc0)) \
42 ? isIDFIRST(*(p)) \
43 : isIDFIRST_utf8((U8*)p))
44#define isALNUM_lazy(p) ((!UTF || (*((U8*)p) < 0xc0)) \
45 ? isALNUM(*(p)) \
46 : isALNUM_utf8((U8*)p))
a0ed51b3 47
2b92dfce
GS
48/* In variables name $^X, these are the legal values for X.
49 * 1999-02-27 mjd-perl-patch@plover.com */
50#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
51
ffb4593c
NT
52/* LEX_* are values for PL_lex_state, the state of the lexer.
53 * They are arranged oddly so that the guard on the switch statement
79072805
LW
54 * can get by with a single comparison (if the compiler is smart enough).
55 */
56
fb73857a 57/* #define LEX_NOTPARSING 11 is done in perl.h. */
58
55497cff 59#define LEX_NORMAL 10
60#define LEX_INTERPNORMAL 9
61#define LEX_INTERPCASEMOD 8
62#define LEX_INTERPPUSH 7
63#define LEX_INTERPSTART 6
64#define LEX_INTERPEND 5
65#define LEX_INTERPENDMAYBE 4
66#define LEX_INTERPCONCAT 3
67#define LEX_INTERPCONST 2
68#define LEX_FORMLINE 1
69#define LEX_KNOWNEXT 0
79072805 70
395c3793
LW
71#ifdef I_FCNTL
72#include <fcntl.h>
73#endif
fe14fcc3
LW
74#ifdef I_SYS_FILE
75#include <sys/file.h>
76#endif
395c3793 77
a790bc05 78/* XXX If this causes problems, set i_unistd=undef in the hint file. */
79#ifdef I_UNISTD
80# include <unistd.h> /* Needed for execv() */
81#endif
82
83
79072805
LW
84#ifdef ff_next
85#undef ff_next
d48672a2
LW
86#endif
87
a1a0e61e
TD
88#ifdef USE_PURE_BISON
89YYSTYPE* yylval_pointer = NULL;
90int* yychar_pointer = NULL;
22c35a8c
GS
91# undef yylval
92# undef yychar
e4bfbdd4
JH
93# define yylval (*yylval_pointer)
94# define yychar (*yychar_pointer)
95# define PERL_YYLEX_PARAM yylval_pointer,yychar_pointer
cea2e8a9
GS
96# undef yylex
97# define yylex() Perl_yylex(aTHX_ yylval_pointer, yychar_pointer)
a1a0e61e
TD
98#endif
99
79072805 100#include "keywords.h"
fe14fcc3 101
ffb4593c
NT
102/* CLINE is a macro that ensures PL_copline has a sane value */
103
ae986130
LW
104#ifdef CLINE
105#undef CLINE
106#endif
3280af22
NIS
107#define CLINE (PL_copline = (PL_curcop->cop_line < PL_copline ? PL_curcop->cop_line : PL_copline))
108
ffb4593c
NT
109/*
110 * Convenience functions to return different tokens and prime the
9cbb5ea2 111 * lexer for the next token. They all take an argument.
ffb4593c
NT
112 *
113 * TOKEN : generic token (used for '(', DOLSHARP, etc)
114 * OPERATOR : generic operator
115 * AOPERATOR : assignment operator
116 * PREBLOCK : beginning the block after an if, while, foreach, ...
117 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
118 * PREREF : *EXPR where EXPR is not a simple identifier
119 * TERM : expression term
120 * LOOPX : loop exiting command (goto, last, dump, etc)
121 * FTST : file test operator
122 * FUN0 : zero-argument function
123 * FUN1 : not used
124 * BOop : bitwise or or xor
125 * BAop : bitwise and
126 * SHop : shift operator
127 * PWop : power operator
9cbb5ea2 128 * PMop : pattern-matching operator
ffb4593c
NT
129 * Aop : addition-level operator
130 * Mop : multiplication-level operator
131 * Eop : equality-testing operator
132 * Rop : relational operator <= != gt
133 *
134 * Also see LOP and lop() below.
135 */
136
3280af22
NIS
137#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
138#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
139#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
140#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
141#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
142#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
143#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
144#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
145#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
146#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
147#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
148#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
149#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
150#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
151#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
152#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
153#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
154#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
155#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
156#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 157
a687059c
LW
158/* This bit of chicanery makes a unary function followed by
159 * a parenthesis into a function with one argument, highest precedence.
160 */
2f3197b3 161#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
162 PL_expect = XTERM, \
163 PL_bufptr = s, \
164 PL_last_uni = PL_oldbufptr, \
165 PL_last_lop_op = f, \
a687059c
LW
166 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
167
79072805 168#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
169 PL_bufptr = s, \
170 PL_last_uni = PL_oldbufptr, \
79072805
LW
171 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
172
9f68db38 173/* grandfather return to old style */
3280af22 174#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 175
ffb4593c
NT
176/*
177 * S_ao
178 *
179 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
180 * into an OP_ANDASSIGN or OP_ORASSIGN
181 */
182
76e3520e 183STATIC int
cea2e8a9 184S_ao(pTHX_ int toketype)
a0d0e21e 185{
3280af22
NIS
186 if (*PL_bufptr == '=') {
187 PL_bufptr++;
a0d0e21e
LW
188 if (toketype == ANDAND)
189 yylval.ival = OP_ANDASSIGN;
190 else if (toketype == OROR)
191 yylval.ival = OP_ORASSIGN;
192 toketype = ASSIGNOP;
193 }
194 return toketype;
195}
196
ffb4593c
NT
197/*
198 * S_no_op
199 * When Perl expects an operator and finds something else, no_op
200 * prints the warning. It always prints "<something> found where
201 * operator expected. It prints "Missing semicolon on previous line?"
202 * if the surprise occurs at the start of the line. "do you need to
203 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
204 * where the compiler doesn't know if foo is a method call or a function.
205 * It prints "Missing operator before end of line" if there's nothing
206 * after the missing operator, or "... before <...>" if there is something
207 * after the missing operator.
208 */
209
76e3520e 210STATIC void
cea2e8a9 211S_no_op(pTHX_ char *what, char *s)
463ee0b2 212{
3280af22
NIS
213 char *oldbp = PL_bufptr;
214 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 215
376b8730 216 assert(s >= oldbp);
3280af22 217 PL_bufptr = s;
cea2e8a9 218 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 219 if (is_first)
cea2e8a9 220 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
834a4ddd 221 else if (PL_oldoldbufptr && isIDFIRST_lazy(PL_oldoldbufptr)) {
748a9306 222 char *t;
834a4ddd 223 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy(t) || *t == ':'); t++) ;
3280af22 224 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 225 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 226 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306
LW
227 }
228 else
cea2e8a9 229 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
3280af22 230 PL_bufptr = oldbp;
8990e307
LW
231}
232
ffb4593c
NT
233/*
234 * S_missingterm
235 * Complain about missing quote/regexp/heredoc terminator.
236 * If it's called with (char *)NULL then it cauterizes the line buffer.
237 * If we're in a delimited string and the delimiter is a control
238 * character, it's reformatted into a two-char sequence like ^C.
239 * This is fatal.
240 */
241
76e3520e 242STATIC void
cea2e8a9 243S_missingterm(pTHX_ char *s)
8990e307
LW
244{
245 char tmpbuf[3];
246 char q;
247 if (s) {
248 char *nl = strrchr(s,'\n');
d2719217 249 if (nl)
8990e307
LW
250 *nl = '\0';
251 }
9d116dd7
JH
252 else if (
253#ifdef EBCDIC
254 iscntrl(PL_multi_close)
255#else
256 PL_multi_close < 32 || PL_multi_close == 127
257#endif
258 ) {
8990e307 259 *tmpbuf = '^';
3280af22 260 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
261 s = "\\n";
262 tmpbuf[2] = '\0';
263 s = tmpbuf;
264 }
265 else {
3280af22 266 *tmpbuf = PL_multi_close;
8990e307
LW
267 tmpbuf[1] = '\0';
268 s = tmpbuf;
269 }
270 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 271 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 272}
79072805 273
ffb4593c
NT
274/*
275 * Perl_deprecate
ffb4593c
NT
276 */
277
79072805 278void
864dbfa3 279Perl_deprecate(pTHX_ char *s)
a0d0e21e 280{
d008e5eb 281 dTHR;
599cee73 282 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 283 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
284}
285
ffb4593c
NT
286/*
287 * depcom
9cbb5ea2 288 * Deprecate a comma-less variable list.
ffb4593c
NT
289 */
290
76e3520e 291STATIC void
cea2e8a9 292S_depcom(pTHX)
a0d0e21e
LW
293{
294 deprecate("comma-less variable list");
295}
296
ffb4593c 297/*
9cbb5ea2
GS
298 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
299 * utf16-to-utf8-reversed.
ffb4593c
NT
300 */
301
a868473f
NIS
302#ifdef WIN32
303
76e3520e 304STATIC I32
cea2e8a9 305S_win32_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f
NIS
306{
307 I32 count = FILTER_READ(idx+1, sv, maxlen);
308 if (count > 0 && !maxlen)
309 win32_strip_return(sv);
310 return count;
311}
312#endif
313
a0ed51b3 314STATIC I32
cea2e8a9 315S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
316{
317 I32 count = FILTER_READ(idx+1, sv, maxlen);
318 if (count) {
dfe13c55
GS
319 U8* tmps;
320 U8* tend;
321 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 322 tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 323 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
324
325 }
326 return count;
327}
328
329STATIC I32
cea2e8a9 330S_utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
331{
332 I32 count = FILTER_READ(idx+1, sv, maxlen);
333 if (count) {
dfe13c55
GS
334 U8* tmps;
335 U8* tend;
336 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 337 tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 338 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
339
340 }
341 return count;
342}
a868473f 343
ffb4593c
NT
344/*
345 * Perl_lex_start
9cbb5ea2
GS
346 * Initialize variables. Uses the Perl save_stack to save its state (for
347 * recursive calls to the parser).
ffb4593c
NT
348 */
349
a0d0e21e 350void
864dbfa3 351Perl_lex_start(pTHX_ SV *line)
79072805 352{
0f15f207 353 dTHR;
8990e307
LW
354 char *s;
355 STRLEN len;
356
3280af22
NIS
357 SAVEI32(PL_lex_dojoin);
358 SAVEI32(PL_lex_brackets);
359 SAVEI32(PL_lex_fakebrack);
360 SAVEI32(PL_lex_casemods);
361 SAVEI32(PL_lex_starts);
362 SAVEI32(PL_lex_state);
363 SAVESPTR(PL_lex_inpat);
364 SAVEI32(PL_lex_inwhat);
365 SAVEI16(PL_curcop->cop_line);
366 SAVEPPTR(PL_bufptr);
367 SAVEPPTR(PL_bufend);
368 SAVEPPTR(PL_oldbufptr);
369 SAVEPPTR(PL_oldoldbufptr);
370 SAVEPPTR(PL_linestart);
371 SAVESPTR(PL_linestr);
372 SAVEPPTR(PL_lex_brackstack);
373 SAVEPPTR(PL_lex_casestack);
51371543 374 SAVEDESTRUCTOR(restore_rsfp, PL_rsfp);
3280af22
NIS
375 SAVESPTR(PL_lex_stuff);
376 SAVEI32(PL_lex_defer);
377 SAVESPTR(PL_lex_repl);
51371543
GS
378 SAVEDESTRUCTOR(restore_expect, PL_tokenbuf + PL_expect); /* encode as pointer */
379 SAVEDESTRUCTOR(restore_lex_expect, PL_tokenbuf + PL_expect);
3280af22
NIS
380
381 PL_lex_state = LEX_NORMAL;
382 PL_lex_defer = 0;
383 PL_expect = XSTATE;
384 PL_lex_brackets = 0;
385 PL_lex_fakebrack = 0;
386 New(899, PL_lex_brackstack, 120, char);
387 New(899, PL_lex_casestack, 12, char);
388 SAVEFREEPV(PL_lex_brackstack);
389 SAVEFREEPV(PL_lex_casestack);
390 PL_lex_casemods = 0;
391 *PL_lex_casestack = '\0';
392 PL_lex_dojoin = 0;
393 PL_lex_starts = 0;
394 PL_lex_stuff = Nullsv;
395 PL_lex_repl = Nullsv;
396 PL_lex_inpat = 0;
397 PL_lex_inwhat = 0;
398 PL_linestr = line;
399 if (SvREADONLY(PL_linestr))
400 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
401 s = SvPV(PL_linestr, len);
8990e307 402 if (len && s[len-1] != ';') {
3280af22
NIS
403 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
404 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
405 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 406 }
3280af22
NIS
407 SvTEMP_off(PL_linestr);
408 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
409 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
410 SvREFCNT_dec(PL_rs);
79cb57f6 411 PL_rs = newSVpvn("\n", 1);
3280af22 412 PL_rsfp = 0;
79072805 413}
a687059c 414
ffb4593c
NT
415/*
416 * Perl_lex_end
9cbb5ea2
GS
417 * Finalizer for lexing operations. Must be called when the parser is
418 * done with the lexer.
ffb4593c
NT
419 */
420
463ee0b2 421void
864dbfa3 422Perl_lex_end(pTHX)
463ee0b2 423{
3280af22 424 PL_doextract = FALSE;
463ee0b2
LW
425}
426
ffb4593c
NT
427/*
428 * S_incline
429 * This subroutine has nothing to do with tilting, whether at windmills
430 * or pinball tables. Its name is short for "increment line". It
431 * increments the current line number in PL_curcop->cop_line and checks
432 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
433 * # line 500 "foo.pm"
434 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
435 */
436
76e3520e 437STATIC void
cea2e8a9 438S_incline(pTHX_ char *s)
463ee0b2 439{
0f15f207 440 dTHR;
463ee0b2
LW
441 char *t;
442 char *n;
443 char ch;
444 int sawline = 0;
445
3280af22 446 PL_curcop->cop_line++;
463ee0b2
LW
447 if (*s++ != '#')
448 return;
449 while (*s == ' ' || *s == '\t') s++;
450 if (strnEQ(s, "line ", 5)) {
451 s += 5;
452 sawline = 1;
453 }
454 if (!isDIGIT(*s))
455 return;
456 n = s;
457 while (isDIGIT(*s))
458 s++;
459 while (*s == ' ' || *s == '\t')
460 s++;
461 if (*s == '"' && (t = strchr(s+1, '"')))
462 s++;
463 else {
464 if (!sawline)
465 return; /* false alarm */
466 for (t = s; !isSPACE(*t); t++) ;
467 }
468 ch = *t;
469 *t = '\0';
470 if (t - s > 0)
3280af22 471 PL_curcop->cop_filegv = gv_fetchfile(s);
463ee0b2 472 else
3280af22 473 PL_curcop->cop_filegv = gv_fetchfile(PL_origfilename);
463ee0b2 474 *t = ch;
3280af22 475 PL_curcop->cop_line = atoi(n)-1;
463ee0b2
LW
476}
477
ffb4593c
NT
478/*
479 * S_skipspace
480 * Called to gobble the appropriate amount and type of whitespace.
481 * Skips comments as well.
482 */
483
76e3520e 484STATIC char *
cea2e8a9 485S_skipspace(pTHX_ register char *s)
a687059c 486{
11343788 487 dTHR;
3280af22
NIS
488 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
489 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
463ee0b2
LW
490 s++;
491 return s;
492 }
493 for (;;) {
fd049845 494 STRLEN prevlen;
60e6418e
GS
495 while (s < PL_bufend && isSPACE(*s)) {
496 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
497 incline(s);
498 }
ffb4593c
NT
499
500 /* comment */
3280af22
NIS
501 if (s < PL_bufend && *s == '#') {
502 while (s < PL_bufend && *s != '\n')
463ee0b2 503 s++;
60e6418e 504 if (s < PL_bufend) {
463ee0b2 505 s++;
60e6418e
GS
506 if (PL_in_eval && !PL_rsfp) {
507 incline(s);
508 continue;
509 }
510 }
463ee0b2 511 }
ffb4593c
NT
512
513 /* only continue to recharge the buffer if we're at the end
514 * of the buffer, we're not reading from a source filter, and
515 * we're in normal lexing mode
516 */
3280af22 517 if (s < PL_bufend || !PL_rsfp || PL_lex_state != LEX_NORMAL)
463ee0b2 518 return s;
ffb4593c
NT
519
520 /* try to recharge the buffer */
9cbb5ea2
GS
521 if ((s = filter_gets(PL_linestr, PL_rsfp,
522 (prevlen = SvCUR(PL_linestr)))) == Nullch)
523 {
524 /* end of file. Add on the -p or -n magic */
3280af22
NIS
525 if (PL_minus_n || PL_minus_p) {
526 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
527 ";}continue{print or die qq(-p destination: $!\\n)" :
528 "");
3280af22
NIS
529 sv_catpv(PL_linestr,";}");
530 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
531 }
532 else
3280af22 533 sv_setpv(PL_linestr,";");
ffb4593c
NT
534
535 /* reset variables for next time we lex */
9cbb5ea2
GS
536 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
537 = SvPVX(PL_linestr);
3280af22 538 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
539
540 /* Close the filehandle. Could be from -P preprocessor,
541 * STDIN, or a regular file. If we were reading code from
542 * STDIN (because the commandline held no -e or filename)
543 * then we don't close it, we reset it so the code can
544 * read from STDIN too.
545 */
546
3280af22
NIS
547 if (PL_preprocess && !PL_in_eval)
548 (void)PerlProc_pclose(PL_rsfp);
549 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
550 PerlIO_clearerr(PL_rsfp);
8990e307 551 else
3280af22
NIS
552 (void)PerlIO_close(PL_rsfp);
553 PL_rsfp = Nullfp;
463ee0b2
LW
554 return s;
555 }
ffb4593c
NT
556
557 /* not at end of file, so we only read another line */
3280af22
NIS
558 PL_linestart = PL_bufptr = s + prevlen;
559 PL_bufend = s + SvCUR(PL_linestr);
560 s = PL_bufptr;
a0d0e21e 561 incline(s);
ffb4593c
NT
562
563 /* debugger active and we're not compiling the debugger code,
564 * so store the line into the debugger's array of lines
565 */
3280af22 566 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
567 SV *sv = NEWSV(85,0);
568
569 sv_upgrade(sv, SVt_PVMG);
3280af22
NIS
570 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
571 av_store(GvAV(PL_curcop->cop_filegv),(I32)PL_curcop->cop_line,sv);
8990e307 572 }
463ee0b2 573 }
a687059c 574}
378cc40b 575
ffb4593c
NT
576/*
577 * S_check_uni
578 * Check the unary operators to ensure there's no ambiguity in how they're
579 * used. An ambiguous piece of code would be:
580 * rand + 5
581 * This doesn't mean rand() + 5. Because rand() is a unary operator,
582 * the +5 is its argument.
583 */
584
76e3520e 585STATIC void
cea2e8a9 586S_check_uni(pTHX)
ba106d47 587{
2f3197b3 588 char *s;
a0d0e21e 589 char *t;
0453d815 590 dTHR;
2f3197b3 591
3280af22 592 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 593 return;
3280af22
NIS
594 while (isSPACE(*PL_last_uni))
595 PL_last_uni++;
834a4ddd 596 for (s = PL_last_uni; isALNUM_lazy(s) || *s == '-'; s++) ;
3280af22 597 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 598 return;
0453d815 599 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 600 char ch = *s;
0453d815
PM
601 *s = '\0';
602 Perl_warner(aTHX_ WARN_AMBIGUOUS,
603 "Warning: Use of \"%s\" without parens is ambiguous",
604 PL_last_uni);
605 *s = ch;
606 }
2f3197b3
LW
607}
608
ffb4593c
NT
609/* workaround to replace the UNI() macro with a function. Only the
610 * hints/uts.sh file mentions this. Other comments elsewhere in the
611 * source indicate Microport Unix might need it too.
612 */
613
ffed7fef
LW
614#ifdef CRIPPLED_CC
615
616#undef UNI
ffed7fef 617#define UNI(f) return uni(f,s)
ffed7fef 618
76e3520e 619STATIC int
cea2e8a9 620S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
621{
622 yylval.ival = f;
3280af22
NIS
623 PL_expect = XTERM;
624 PL_bufptr = s;
8f872242
NIS
625 PL_last_uni = PL_oldbufptr;
626 PL_last_lop_op = f;
ffed7fef
LW
627 if (*s == '(')
628 return FUNC1;
629 s = skipspace(s);
630 if (*s == '(')
631 return FUNC1;
632 else
633 return UNIOP;
634}
635
a0d0e21e
LW
636#endif /* CRIPPLED_CC */
637
ffb4593c
NT
638/*
639 * LOP : macro to build a list operator. Its behaviour has been replaced
640 * with a subroutine, S_lop() for which LOP is just another name.
641 */
642
a0d0e21e
LW
643#define LOP(f,x) return lop(f,x,s)
644
ffb4593c
NT
645/*
646 * S_lop
647 * Build a list operator (or something that might be one). The rules:
648 * - if we have a next token, then it's a list operator [why?]
649 * - if the next thing is an opening paren, then it's a function
650 * - else it's a list operator
651 */
652
76e3520e 653STATIC I32
cea2e8a9 654S_lop(pTHX_ I32 f, expectation x, char *s)
ffed7fef 655{
0f15f207 656 dTHR;
79072805 657 yylval.ival = f;
35c8bce7 658 CLINE;
3280af22
NIS
659 PL_expect = x;
660 PL_bufptr = s;
661 PL_last_lop = PL_oldbufptr;
662 PL_last_lop_op = f;
663 if (PL_nexttoke)
a0d0e21e 664 return LSTOP;
79072805
LW
665 if (*s == '(')
666 return FUNC;
667 s = skipspace(s);
668 if (*s == '(')
669 return FUNC;
670 else
671 return LSTOP;
672}
673
ffb4593c
NT
674/*
675 * S_force_next
9cbb5ea2 676 * When the lexer realizes it knows the next token (for instance,
ffb4593c 677 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
678 * to know what token to return the next time the lexer is called. Caller
679 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
680 * handles the token correctly.
ffb4593c
NT
681 */
682
76e3520e 683STATIC void
cea2e8a9 684S_force_next(pTHX_ I32 type)
79072805 685{
3280af22
NIS
686 PL_nexttype[PL_nexttoke] = type;
687 PL_nexttoke++;
688 if (PL_lex_state != LEX_KNOWNEXT) {
689 PL_lex_defer = PL_lex_state;
690 PL_lex_expect = PL_expect;
691 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
692 }
693}
694
ffb4593c
NT
695/*
696 * S_force_word
697 * When the lexer knows the next thing is a word (for instance, it has
698 * just seen -> and it knows that the next char is a word char, then
699 * it calls S_force_word to stick the next word into the PL_next lookahead.
700 *
701 * Arguments:
702 * char *start : start of the buffer
703 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
704 * int check_keyword : if true, Perl checks to make sure the word isn't
705 * a keyword (do this if the word is a label, e.g. goto FOO)
706 * int allow_pack : if true, : characters will also be allowed (require,
707 * use, etc. do this)
9cbb5ea2 708 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
709 */
710
76e3520e 711STATIC char *
cea2e8a9 712S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 713{
463ee0b2
LW
714 register char *s;
715 STRLEN len;
716
717 start = skipspace(start);
718 s = start;
834a4ddd 719 if (isIDFIRST_lazy(s) ||
a0d0e21e 720 (allow_pack && *s == ':') ||
15f0808c 721 (allow_initial_tick && *s == '\'') )
a0d0e21e 722 {
3280af22
NIS
723 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
724 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
725 return start;
726 if (token == METHOD) {
727 s = skipspace(s);
728 if (*s == '(')
3280af22 729 PL_expect = XTERM;
463ee0b2 730 else {
3280af22 731 PL_expect = XOPERATOR;
463ee0b2 732 }
79072805 733 }
3280af22
NIS
734 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
735 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
736 force_next(token);
737 }
738 return s;
739}
740
ffb4593c
NT
741/*
742 * S_force_ident
9cbb5ea2 743 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
744 * text only contains the "foo" portion. The first argument is a pointer
745 * to the "foo", and the second argument is the type symbol to prefix.
746 * Forces the next token to be a "WORD".
9cbb5ea2 747 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
748 */
749
76e3520e 750STATIC void
cea2e8a9 751S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
752{
753 if (s && *s) {
11343788 754 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 755 PL_nextval[PL_nexttoke].opval = o;
79072805 756 force_next(WORD);
748a9306 757 if (kind) {
e858de61 758 dTHR; /* just for in_eval */
11343788 759 o->op_private = OPpCONST_ENTERED;
55497cff 760 /* XXX see note in pp_entereval() for why we forgo typo
761 warnings if the symbol must be introduced in an eval.
762 GSAR 96-10-12 */
3280af22 763 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
764 kind == '$' ? SVt_PV :
765 kind == '@' ? SVt_PVAV :
766 kind == '%' ? SVt_PVHV :
767 SVt_PVGV
768 );
748a9306 769 }
79072805
LW
770 }
771}
772
ffb4593c
NT
773/*
774 * S_force_version
775 * Forces the next token to be a version number.
776 */
777
76e3520e 778STATIC char *
cea2e8a9 779S_force_version(pTHX_ char *s)
89bfa8cd 780{
781 OP *version = Nullop;
782
783 s = skipspace(s);
784
785 /* default VERSION number -- GBARR */
786
787 if(isDIGIT(*s)) {
788 char *d;
789 int c;
55497cff 790 for( d=s, c = 1; isDIGIT(*d) || *d == '_' || (*d == '.' && c--); d++);
89bfa8cd 791 if((*d == ';' || isSPACE(*d)) && *(skipspace(d)) != ',') {
792 s = scan_num(s);
793 /* real VERSION number -- GBARR */
794 version = yylval.opval;
795 }
796 }
797
798 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 799 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd 800 force_next(WORD);
801
802 return (s);
803}
804
ffb4593c
NT
805/*
806 * S_tokeq
807 * Tokenize a quoted string passed in as an SV. It finds the next
808 * chunk, up to end of string or a backslash. It may make a new
809 * SV containing that chunk (if HINT_NEW_STRING is on). It also
810 * turns \\ into \.
811 */
812
76e3520e 813STATIC SV *
cea2e8a9 814S_tokeq(pTHX_ SV *sv)
79072805
LW
815{
816 register char *s;
817 register char *send;
818 register char *d;
b3ac6de7
IZ
819 STRLEN len = 0;
820 SV *pv = sv;
79072805
LW
821
822 if (!SvLEN(sv))
b3ac6de7 823 goto finish;
79072805 824
a0d0e21e 825 s = SvPV_force(sv, len);
748a9306 826 if (SvIVX(sv) == -1)
b3ac6de7 827 goto finish;
463ee0b2 828 send = s + len;
79072805
LW
829 while (s < send && *s != '\\')
830 s++;
831 if (s == send)
b3ac6de7 832 goto finish;
79072805 833 d = s;
3280af22 834 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 835 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
836 while (s < send) {
837 if (*s == '\\') {
a0d0e21e 838 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
839 s++; /* all that, just for this */
840 }
841 *d++ = *s++;
842 }
843 *d = '\0';
463ee0b2 844 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 845 finish:
3280af22 846 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 847 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
848 return sv;
849}
850
ffb4593c
NT
851/*
852 * Now come three functions related to double-quote context,
853 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
854 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
855 * interact with PL_lex_state, and create fake ( ... ) argument lists
856 * to handle functions and concatenation.
857 * They assume that whoever calls them will be setting up a fake
858 * join call, because each subthing puts a ',' after it. This lets
859 * "lower \luPpEr"
860 * become
861 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
862 *
863 * (I'm not sure whether the spurious commas at the end of lcfirst's
864 * arguments and join's arguments are created or not).
865 */
866
867/*
868 * S_sublex_start
869 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
870 *
871 * Pattern matching will set PL_lex_op to the pattern-matching op to
872 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
873 *
874 * OP_CONST and OP_READLINE are easy--just make the new op and return.
875 *
876 * Everything else becomes a FUNC.
877 *
878 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
879 * had an OP_CONST or OP_READLINE). This just sets us up for a
880 * call to S_sublex_push().
881 */
882
76e3520e 883STATIC I32
cea2e8a9 884S_sublex_start(pTHX)
79072805
LW
885{
886 register I32 op_type = yylval.ival;
79072805
LW
887
888 if (op_type == OP_NULL) {
3280af22
NIS
889 yylval.opval = PL_lex_op;
890 PL_lex_op = Nullop;
79072805
LW
891 return THING;
892 }
893 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 894 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
895
896 if (SvTYPE(sv) == SVt_PVIV) {
897 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
898 STRLEN len;
899 char *p;
900 SV *nsv;
901
902 p = SvPV(sv, len);
79cb57f6 903 nsv = newSVpvn(p, len);
b3ac6de7
IZ
904 SvREFCNT_dec(sv);
905 sv = nsv;
906 }
907 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 908 PL_lex_stuff = Nullsv;
79072805
LW
909 return THING;
910 }
911
3280af22
NIS
912 PL_sublex_info.super_state = PL_lex_state;
913 PL_sublex_info.sub_inwhat = op_type;
914 PL_sublex_info.sub_op = PL_lex_op;
915 PL_lex_state = LEX_INTERPPUSH;
55497cff 916
3280af22
NIS
917 PL_expect = XTERM;
918 if (PL_lex_op) {
919 yylval.opval = PL_lex_op;
920 PL_lex_op = Nullop;
55497cff 921 return PMFUNC;
922 }
923 else
924 return FUNC;
925}
926
ffb4593c
NT
927/*
928 * S_sublex_push
929 * Create a new scope to save the lexing state. The scope will be
930 * ended in S_sublex_done. Returns a '(', starting the function arguments
931 * to the uc, lc, etc. found before.
932 * Sets PL_lex_state to LEX_INTERPCONCAT.
933 */
934
76e3520e 935STATIC I32
cea2e8a9 936S_sublex_push(pTHX)
55497cff 937{
0f15f207 938 dTHR;
f46d017c 939 ENTER;
55497cff 940
3280af22
NIS
941 PL_lex_state = PL_sublex_info.super_state;
942 SAVEI32(PL_lex_dojoin);
943 SAVEI32(PL_lex_brackets);
944 SAVEI32(PL_lex_fakebrack);
945 SAVEI32(PL_lex_casemods);
946 SAVEI32(PL_lex_starts);
947 SAVEI32(PL_lex_state);
948 SAVESPTR(PL_lex_inpat);
949 SAVEI32(PL_lex_inwhat);
950 SAVEI16(PL_curcop->cop_line);
951 SAVEPPTR(PL_bufptr);
952 SAVEPPTR(PL_oldbufptr);
953 SAVEPPTR(PL_oldoldbufptr);
954 SAVEPPTR(PL_linestart);
955 SAVESPTR(PL_linestr);
956 SAVEPPTR(PL_lex_brackstack);
957 SAVEPPTR(PL_lex_casestack);
958
959 PL_linestr = PL_lex_stuff;
960 PL_lex_stuff = Nullsv;
961
9cbb5ea2
GS
962 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
963 = SvPVX(PL_linestr);
3280af22
NIS
964 PL_bufend += SvCUR(PL_linestr);
965 SAVEFREESV(PL_linestr);
966
967 PL_lex_dojoin = FALSE;
968 PL_lex_brackets = 0;
969 PL_lex_fakebrack = 0;
970 New(899, PL_lex_brackstack, 120, char);
971 New(899, PL_lex_casestack, 12, char);
972 SAVEFREEPV(PL_lex_brackstack);
973 SAVEFREEPV(PL_lex_casestack);
974 PL_lex_casemods = 0;
975 *PL_lex_casestack = '\0';
976 PL_lex_starts = 0;
977 PL_lex_state = LEX_INTERPCONCAT;
978 PL_curcop->cop_line = PL_multi_start;
979
980 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
981 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
982 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 983 else
3280af22 984 PL_lex_inpat = Nullop;
79072805 985
55497cff 986 return '(';
79072805
LW
987}
988
ffb4593c
NT
989/*
990 * S_sublex_done
991 * Restores lexer state after a S_sublex_push.
992 */
993
76e3520e 994STATIC I32
cea2e8a9 995S_sublex_done(pTHX)
79072805 996{
3280af22
NIS
997 if (!PL_lex_starts++) {
998 PL_expect = XOPERATOR;
79cb57f6 999 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1000 return THING;
1001 }
1002
3280af22
NIS
1003 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1004 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1005 return yylex();
79072805
LW
1006 }
1007
ffb4593c 1008 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1009 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1010 PL_linestr = PL_lex_repl;
1011 PL_lex_inpat = 0;
1012 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1013 PL_bufend += SvCUR(PL_linestr);
1014 SAVEFREESV(PL_linestr);
1015 PL_lex_dojoin = FALSE;
1016 PL_lex_brackets = 0;
1017 PL_lex_fakebrack = 0;
1018 PL_lex_casemods = 0;
1019 *PL_lex_casestack = '\0';
1020 PL_lex_starts = 0;
25da4f38 1021 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1022 PL_lex_state = LEX_INTERPNORMAL;
1023 PL_lex_starts++;
e9fa98b2
HS
1024 /* we don't clear PL_lex_repl here, so that we can check later
1025 whether this is an evalled subst; that means we rely on the
1026 logic to ensure sublex_done() is called again only via the
1027 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1028 }
e9fa98b2 1029 else {
3280af22 1030 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1031 PL_lex_repl = Nullsv;
1032 }
79072805 1033 return ',';
ffed7fef
LW
1034 }
1035 else {
f46d017c 1036 LEAVE;
3280af22
NIS
1037 PL_bufend = SvPVX(PL_linestr);
1038 PL_bufend += SvCUR(PL_linestr);
1039 PL_expect = XOPERATOR;
79072805 1040 return ')';
ffed7fef
LW
1041 }
1042}
1043
02aa26ce
NT
1044/*
1045 scan_const
1046
1047 Extracts a pattern, double-quoted string, or transliteration. This
1048 is terrifying code.
1049
3280af22
NIS
1050 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1051 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1052 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1053
9b599b2a
GS
1054 Returns a pointer to the character scanned up to. Iff this is
1055 advanced from the start pointer supplied (ie if anything was
1056 successfully parsed), will leave an OP for the substring scanned
1057 in yylval. Caller must intuit reason for not parsing further
1058 by looking at the next characters herself.
1059
02aa26ce
NT
1060 In patterns:
1061 backslashes:
1062 double-quoted style: \r and \n
1063 regexp special ones: \D \s
1064 constants: \x3
1065 backrefs: \1 (deprecated in substitution replacements)
1066 case and quoting: \U \Q \E
1067 stops on @ and $, but not for $ as tail anchor
1068
1069 In transliterations:
1070 characters are VERY literal, except for - not at the start or end
1071 of the string, which indicates a range. scan_const expands the
1072 range to the full set of intermediate characters.
1073
1074 In double-quoted strings:
1075 backslashes:
1076 double-quoted style: \r and \n
1077 constants: \x3
1078 backrefs: \1 (deprecated)
1079 case and quoting: \U \Q \E
1080 stops on @ and $
1081
1082 scan_const does *not* construct ops to handle interpolated strings.
1083 It stops processing as soon as it finds an embedded $ or @ variable
1084 and leaves it to the caller to work out what's going on.
1085
1086 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1087
1088 $ in pattern could be $foo or could be tail anchor. Assumption:
1089 it's a tail anchor if $ is the last thing in the string, or if it's
1090 followed by one of ")| \n\t"
1091
1092 \1 (backreferences) are turned into $1
1093
1094 The structure of the code is
1095 while (there's a character to process) {
1096 handle transliteration ranges
1097 skip regexp comments
1098 skip # initiated comments in //x patterns
1099 check for embedded @foo
1100 check for embedded scalars
1101 if (backslash) {
1102 leave intact backslashes from leave (below)
1103 deprecate \1 in strings and sub replacements
1104 handle string-changing backslashes \l \U \Q \E, etc.
1105 switch (what was escaped) {
1106 handle - in a transliteration (becomes a literal -)
1107 handle \132 octal characters
1108 handle 0x15 hex characters
1109 handle \cV (control V)
1110 handle printf backslashes (\f, \r, \n, etc)
1111 } (end switch)
1112 } (end if backslash)
1113 } (end while character to read)
1114
1115*/
1116
76e3520e 1117STATIC char *
cea2e8a9 1118S_scan_const(pTHX_ char *start)
79072805 1119{
3280af22 1120 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1121 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1122 register char *s = start; /* start of the constant */
1123 register char *d = SvPVX(sv); /* destination for copies */
1124 bool dorange = FALSE; /* are we in a translit range? */
1125 I32 len; /* ? */
ac2262e3 1126 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1127 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1128 : UTF;
ac2262e3 1129 I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1130 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1131 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1132 : UTF;
9cbb5ea2 1133 char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1134 PL_lex_inpat
423cee85 1135 ? "\\.^$@AGZdDwWsSbBpPXO+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1136 : "";
79072805
LW
1137
1138 while (s < send || dorange) {
02aa26ce 1139 /* get transliterations out of the way (they're most literal) */
3280af22 1140 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1141 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1142 if (dorange) {
02aa26ce 1143 I32 i; /* current expanded character */
8ada0baa 1144 I32 min; /* first character in range */
02aa26ce
NT
1145 I32 max; /* last character in range */
1146
1147 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1148 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1149 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1150 d -= 2; /* eat the first char and the - */
1151
8ada0baa
JH
1152 min = (U8)*d; /* first char in range */
1153 max = (U8)d[1]; /* last char in range */
1154
1155#ifndef ASCIIish
1156 if ((isLOWER(min) && isLOWER(max)) ||
1157 (isUPPER(min) && isUPPER(max))) {
1158 if (isLOWER(min)) {
1159 for (i = min; i <= max; i++)
1160 if (isLOWER(i))
1161 *d++ = i;
1162 } else {
1163 for (i = min; i <= max; i++)
1164 if (isUPPER(i))
1165 *d++ = i;
1166 }
1167 }
1168 else
1169#endif
1170 for (i = min; i <= max; i++)
1171 *d++ = i;
02aa26ce
NT
1172
1173 /* mark the range as done, and continue */
79072805
LW
1174 dorange = FALSE;
1175 continue;
1176 }
02aa26ce
NT
1177
1178 /* range begins (ignore - as first or last char) */
79072805 1179 else if (*s == '-' && s+1 < send && s != start) {
a0ed51b3 1180 if (utf) {
a176fa2a 1181 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1182 s++;
1183 continue;
1184 }
79072805
LW
1185 dorange = TRUE;
1186 s++;
1187 }
1188 }
02aa26ce
NT
1189
1190 /* if we get here, we're not doing a transliteration */
1191
0f5d15d6
IZ
1192 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1193 except for the last char, which will be done separately. */
3280af22 1194 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1195 if (s[2] == '#') {
1196 while (s < send && *s != ')')
1197 *d++ = *s++;
0f5d15d6
IZ
1198 } else if (s[2] == '{'
1199 || s[2] == 'p' && s[3] == '{') { /* This should march regcomp.c */
cc6b7395 1200 I32 count = 1;
0f5d15d6 1201 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1202 char c;
1203
d9f97599
GS
1204 while (count && (c = *regparse)) {
1205 if (c == '\\' && regparse[1])
1206 regparse++;
cc6b7395
IZ
1207 else if (c == '{')
1208 count++;
1209 else if (c == '}')
1210 count--;
d9f97599 1211 regparse++;
cc6b7395 1212 }
5bdf89e7
IZ
1213 if (*regparse != ')') {
1214 regparse--; /* Leave one char for continuation. */
cc6b7395 1215 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1216 }
0f5d15d6 1217 while (s < regparse)
cc6b7395
IZ
1218 *d++ = *s++;
1219 }
748a9306 1220 }
02aa26ce
NT
1221
1222 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1223 else if (*s == '#' && PL_lex_inpat &&
1224 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1225 while (s+1 < send && *s != '\n')
1226 *d++ = *s++;
1227 }
02aa26ce
NT
1228
1229 /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */
834a4ddd 1230 else if (*s == '@' && s[1] && (isALNUM_lazy(s+1) || strchr(":'{$", s[1])))
79072805 1231 break;
02aa26ce
NT
1232
1233 /* check for embedded scalars. only stop if we're sure it's a
1234 variable.
1235 */
79072805 1236 else if (*s == '$') {
3280af22 1237 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1238 break;
c277df42 1239 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1240 break; /* in regexp, $ might be tail anchor */
1241 }
02aa26ce 1242
a0ed51b3
LW
1243 /* (now in tr/// code again) */
1244
d008e5eb
GS
1245 if (*s & 0x80 && thisutf) {
1246 dTHR; /* only for ckWARN */
1247 if (ckWARN(WARN_UTF8)) {
dfe13c55 1248 (void)utf8_to_uv((U8*)s, &len); /* could cvt latin-1 to utf8 here... */
d008e5eb
GS
1249 if (len) {
1250 while (len--)
1251 *d++ = *s++;
1252 continue;
1253 }
a0ed51b3
LW
1254 }
1255 }
1256
02aa26ce 1257 /* backslashes */
79072805
LW
1258 if (*s == '\\' && s+1 < send) {
1259 s++;
02aa26ce
NT
1260
1261 /* some backslashes we leave behind */
c9f97d15 1262 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1263 *d++ = '\\';
1264 *d++ = *s++;
1265 continue;
1266 }
02aa26ce
NT
1267
1268 /* deprecate \1 in strings and substitution replacements */
3280af22 1269 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1270 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1271 {
d008e5eb 1272 dTHR; /* only for ckWARN */
599cee73 1273 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1274 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1275 *--s = '$';
1276 break;
1277 }
02aa26ce
NT
1278
1279 /* string-change backslash escapes */
3280af22 1280 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1281 --s;
1282 break;
1283 }
02aa26ce
NT
1284
1285 /* if we get here, it's either a quoted -, or a digit */
79072805 1286 switch (*s) {
02aa26ce
NT
1287
1288 /* quoted - in transliterations */
79072805 1289 case '-':
3280af22 1290 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1291 *d++ = *s++;
1292 continue;
1293 }
1294 /* FALL THROUGH */
1295 default:
11b8faa4
JH
1296 {
1297 dTHR;
1298 if (ckWARN(WARN_UNSAFE) && isALPHA(*s))
cea2e8a9 1299 Perl_warner(aTHX_ WARN_UNSAFE,
11b8faa4
JH
1300 "Unrecognized escape \\%c passed through",
1301 *s);
1302 /* default action is to copy the quoted character */
1303 *d++ = *s++;
1304 continue;
1305 }
02aa26ce
NT
1306
1307 /* \132 indicates an octal constant */
79072805
LW
1308 case '0': case '1': case '2': case '3':
1309 case '4': case '5': case '6': case '7':
1310 *d++ = scan_oct(s, 3, &len);
1311 s += len;
1312 continue;
02aa26ce
NT
1313
1314 /* \x24 indicates a hex constant */
79072805 1315 case 'x':
a0ed51b3
LW
1316 ++s;
1317 if (*s == '{') {
1318 char* e = strchr(s, '}');
1319
adaeee49 1320 if (!e) {
a0ed51b3 1321 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1322 e = s;
1323 }
d008e5eb
GS
1324 if (!utf) {
1325 dTHR;
1326 if (ckWARN(WARN_UTF8))
cea2e8a9 1327 Perl_warner(aTHX_ WARN_UTF8,
d008e5eb
GS
1328 "Use of \\x{} without utf8 declaration");
1329 }
a0ed51b3 1330 /* note: utf always shorter than hex */
dfe13c55
GS
1331 d = (char*)uv_to_utf8((U8*)d,
1332 scan_hex(s + 1, e - s - 1, &len));
a0ed51b3 1333 s = e + 1;
a0ed51b3
LW
1334 }
1335 else {
1336 UV uv = (UV)scan_hex(s, 2, &len);
1337 if (utf && PL_lex_inwhat == OP_TRANS &&
1338 utf != (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1339 {
dfe13c55 1340 d = (char*)uv_to_utf8((U8*)d, uv); /* doing a CU or UC */
a0ed51b3
LW
1341 }
1342 else {
d008e5eb
GS
1343 if (uv >= 127 && UTF) {
1344 dTHR;
1345 if (ckWARN(WARN_UTF8))
cea2e8a9 1346 Perl_warner(aTHX_ WARN_UTF8,
d008e5eb
GS
1347 "\\x%.*s will produce malformed UTF-8 character; use \\x{%.*s} for that",
1348 len,s,len,s);
1349 }
a0ed51b3
LW
1350 *d++ = (char)uv;
1351 }
1352 s += len;
1353 }
79072805 1354 continue;
02aa26ce 1355
423cee85
JH
1356 /* \C{latin small letter a} is a named character */
1357 case 'C':
1358 ++s;
1359 if (*s == '{') {
1360 char* e = strchr(s, '}');
1361 HV *hv;
1362 SV **svp;
1363 SV *res, *cv;
1364 STRLEN len;
1365 char *str;
1366 char *why = Nullch;
1367
1368 if (!e) {
1369 yyerror("Missing right brace on \\C{}");
1370 e = s - 1;
1371 goto cont_scan;
1372 }
1373 res = newSVpvn(s + 1, e - s - 1);
1374 res = new_constant( Nullch, 0, "charnames",
1375 res, Nullsv, "\\C{...}" );
1376 str = SvPV(res,len);
1377 if (len > e - s + 4) {
1378 char *odest = SvPVX(sv);
1379
1380 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1381 d = SvPVX(sv) + (d - odest);
1382 }
1383 Copy(str, d, len, char);
1384 d += len;
1385 SvREFCNT_dec(res);
1386 cont_scan:
1387 s = e + 1;
1388 }
1389 else
1390 yyerror("Missing braces on \\C{}");
1391 continue;
1392
02aa26ce 1393 /* \c is a control character */
79072805
LW
1394 case 'c':
1395 s++;
9d116dd7
JH
1396#ifdef EBCDIC
1397 *d = *s++;
1398 if (isLOWER(*d))
1399 *d = toUPPER(*d);
1400 *d++ = toCTRL(*d);
1401#else
bbce6d69 1402 len = *s++;
1403 *d++ = toCTRL(len);
9d116dd7 1404#endif
79072805 1405 continue;
02aa26ce
NT
1406
1407 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1408 case 'b':
1409 *d++ = '\b';
1410 break;
1411 case 'n':
1412 *d++ = '\n';
1413 break;
1414 case 'r':
1415 *d++ = '\r';
1416 break;
1417 case 'f':
1418 *d++ = '\f';
1419 break;
1420 case 't':
1421 *d++ = '\t';
1422 break;
34a3fe2a
PP
1423#ifdef EBCDIC
1424 case 'e':
1425 *d++ = '\047'; /* CP 1047 */
1426 break;
1427 case 'a':
1428 *d++ = '\057'; /* CP 1047 */
1429 break;
1430#else
79072805
LW
1431 case 'e':
1432 *d++ = '\033';
1433 break;
1434 case 'a':
1435 *d++ = '\007';
1436 break;
34a3fe2a 1437#endif
02aa26ce
NT
1438 } /* end switch */
1439
79072805
LW
1440 s++;
1441 continue;
02aa26ce
NT
1442 } /* end if (backslash) */
1443
79072805 1444 *d++ = *s++;
02aa26ce
NT
1445 } /* while loop to process each character */
1446
1447 /* terminate the string and set up the sv */
79072805 1448 *d = '\0';
463ee0b2 1449 SvCUR_set(sv, d - SvPVX(sv));
79072805
LW
1450 SvPOK_on(sv);
1451
02aa26ce 1452 /* shrink the sv if we allocated more than we used */
79072805
LW
1453 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1454 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1455 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1456 }
02aa26ce 1457
9b599b2a 1458 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1459 if (s > PL_bufptr) {
1460 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1461 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1462 sv, Nullsv,
3280af22 1463 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1464 ? "tr"
3280af22 1465 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1466 ? "s"
1467 : "qq")));
79072805 1468 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1469 } else
8990e307 1470 SvREFCNT_dec(sv);
79072805
LW
1471 return s;
1472}
1473
ffb4593c
NT
1474/* S_intuit_more
1475 * Returns TRUE if there's more to the expression (e.g., a subscript),
1476 * FALSE otherwise.
ffb4593c
NT
1477 *
1478 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1479 *
1480 * ->[ and ->{ return TRUE
1481 * { and [ outside a pattern are always subscripts, so return TRUE
1482 * if we're outside a pattern and it's not { or [, then return FALSE
1483 * if we're in a pattern and the first char is a {
1484 * {4,5} (any digits around the comma) returns FALSE
1485 * if we're in a pattern and the first char is a [
1486 * [] returns FALSE
1487 * [SOMETHING] has a funky algorithm to decide whether it's a
1488 * character class or not. It has to deal with things like
1489 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1490 * anything else returns TRUE
1491 */
1492
9cbb5ea2
GS
1493/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1494
76e3520e 1495STATIC int
cea2e8a9 1496S_intuit_more(pTHX_ register char *s)
79072805 1497{
3280af22 1498 if (PL_lex_brackets)
79072805
LW
1499 return TRUE;
1500 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1501 return TRUE;
1502 if (*s != '{' && *s != '[')
1503 return FALSE;
3280af22 1504 if (!PL_lex_inpat)
79072805
LW
1505 return TRUE;
1506
1507 /* In a pattern, so maybe we have {n,m}. */
1508 if (*s == '{') {
1509 s++;
1510 if (!isDIGIT(*s))
1511 return TRUE;
1512 while (isDIGIT(*s))
1513 s++;
1514 if (*s == ',')
1515 s++;
1516 while (isDIGIT(*s))
1517 s++;
1518 if (*s == '}')
1519 return FALSE;
1520 return TRUE;
1521
1522 }
1523
1524 /* On the other hand, maybe we have a character class */
1525
1526 s++;
1527 if (*s == ']' || *s == '^')
1528 return FALSE;
1529 else {
ffb4593c 1530 /* this is terrifying, and it works */
79072805
LW
1531 int weight = 2; /* let's weigh the evidence */
1532 char seen[256];
f27ffc4a 1533 unsigned char un_char = 255, last_un_char;
93a17b20 1534 char *send = strchr(s,']');
3280af22 1535 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1536
1537 if (!send) /* has to be an expression */
1538 return TRUE;
1539
1540 Zero(seen,256,char);
1541 if (*s == '$')
1542 weight -= 3;
1543 else if (isDIGIT(*s)) {
1544 if (s[1] != ']') {
1545 if (isDIGIT(s[1]) && s[2] == ']')
1546 weight -= 10;
1547 }
1548 else
1549 weight -= 100;
1550 }
1551 for (; s < send; s++) {
1552 last_un_char = un_char;
1553 un_char = (unsigned char)*s;
1554 switch (*s) {
1555 case '@':
1556 case '&':
1557 case '$':
1558 weight -= seen[un_char] * 10;
834a4ddd 1559 if (isALNUM_lazy(s+1)) {
8903cb82 1560 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1561 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1562 weight -= 100;
1563 else
1564 weight -= 10;
1565 }
1566 else if (*s == '$' && s[1] &&
93a17b20
LW
1567 strchr("[#!%*<>()-=",s[1])) {
1568 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1569 weight -= 10;
1570 else
1571 weight -= 1;
1572 }
1573 break;
1574 case '\\':
1575 un_char = 254;
1576 if (s[1]) {
93a17b20 1577 if (strchr("wds]",s[1]))
79072805
LW
1578 weight += 100;
1579 else if (seen['\''] || seen['"'])
1580 weight += 1;
93a17b20 1581 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1582 weight += 40;
1583 else if (isDIGIT(s[1])) {
1584 weight += 40;
1585 while (s[1] && isDIGIT(s[1]))
1586 s++;
1587 }
1588 }
1589 else
1590 weight += 100;
1591 break;
1592 case '-':
1593 if (s[1] == '\\')
1594 weight += 50;
93a17b20 1595 if (strchr("aA01! ",last_un_char))
79072805 1596 weight += 30;
93a17b20 1597 if (strchr("zZ79~",s[1]))
79072805 1598 weight += 30;
f27ffc4a
GS
1599 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1600 weight -= 5; /* cope with negative subscript */
79072805
LW
1601 break;
1602 default:
93a17b20 1603 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1604 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1605 char *d = tmpbuf;
1606 while (isALPHA(*s))
1607 *d++ = *s++;
1608 *d = '\0';
1609 if (keyword(tmpbuf, d - tmpbuf))
1610 weight -= 150;
1611 }
1612 if (un_char == last_un_char + 1)
1613 weight += 5;
1614 weight -= seen[un_char];
1615 break;
1616 }
1617 seen[un_char]++;
1618 }
1619 if (weight >= 0) /* probably a character class */
1620 return FALSE;
1621 }
1622
1623 return TRUE;
1624}
ffed7fef 1625
ffb4593c
NT
1626/*
1627 * S_intuit_method
1628 *
1629 * Does all the checking to disambiguate
1630 * foo bar
1631 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1632 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1633 *
1634 * First argument is the stuff after the first token, e.g. "bar".
1635 *
1636 * Not a method if bar is a filehandle.
1637 * Not a method if foo is a subroutine prototyped to take a filehandle.
1638 * Not a method if it's really "Foo $bar"
1639 * Method if it's "foo $bar"
1640 * Not a method if it's really "print foo $bar"
1641 * Method if it's really "foo package::" (interpreted as package->foo)
1642 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
1643 * Not a method if bar is a filehandle or package, but is quotd with
1644 * =>
1645 */
1646
76e3520e 1647STATIC int
cea2e8a9 1648S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1649{
1650 char *s = start + (*start == '$');
3280af22 1651 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1652 STRLEN len;
1653 GV* indirgv;
1654
1655 if (gv) {
b6c543e3 1656 CV *cv;
a0d0e21e
LW
1657 if (GvIO(gv))
1658 return 0;
b6c543e3
IZ
1659 if ((cv = GvCVu(gv))) {
1660 char *proto = SvPVX(cv);
1661 if (proto) {
1662 if (*proto == ';')
1663 proto++;
1664 if (*proto == '*')
1665 return 0;
1666 }
1667 } else
a0d0e21e
LW
1668 gv = 0;
1669 }
8903cb82 1670 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1671 /* start is the beginning of the possible filehandle/object,
1672 * and s is the end of it
1673 * tmpbuf is a copy of it
1674 */
1675
a0d0e21e 1676 if (*start == '$') {
3280af22 1677 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1678 return 0;
1679 s = skipspace(s);
3280af22
NIS
1680 PL_bufptr = start;
1681 PL_expect = XREF;
a0d0e21e
LW
1682 return *s == '(' ? FUNCMETH : METHOD;
1683 }
1684 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1685 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1686 len -= 2;
1687 tmpbuf[len] = '\0';
1688 goto bare_package;
1689 }
1690 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1691 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1692 return 0;
1693 /* filehandle or package name makes it a method */
89bfa8cd 1694 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1695 s = skipspace(s);
3280af22 1696 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1697 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1698 bare_package:
3280af22 1699 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1700 newSVpvn(tmpbuf,len));
3280af22
NIS
1701 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1702 PL_expect = XTERM;
a0d0e21e 1703 force_next(WORD);
3280af22 1704 PL_bufptr = s;
a0d0e21e
LW
1705 return *s == '(' ? FUNCMETH : METHOD;
1706 }
1707 }
1708 return 0;
1709}
1710
ffb4593c
NT
1711/*
1712 * S_incl_perldb
1713 * Return a string of Perl code to load the debugger. If PERL5DB
1714 * is set, it will return the contents of that, otherwise a
1715 * compile-time require of perl5db.pl.
1716 */
1717
76e3520e 1718STATIC char*
cea2e8a9 1719S_incl_perldb(pTHX)
a0d0e21e 1720{
3280af22 1721 if (PL_perldb) {
76e3520e 1722 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1723
1724 if (pdb)
1725 return pdb;
61bb5906 1726 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1727 return "BEGIN { require 'perl5db.pl' }";
1728 }
1729 return "";
1730}
1731
1732
16d20bd9
AD
1733/* Encoded script support. filter_add() effectively inserts a
1734 * 'pre-processing' function into the current source input stream.
1735 * Note that the filter function only applies to the current source file
1736 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1737 *
1738 * The datasv parameter (which may be NULL) can be used to pass
1739 * private data to this instance of the filter. The filter function
1740 * can recover the SV using the FILTER_DATA macro and use it to
1741 * store private buffers and state information.
1742 *
1743 * The supplied datasv parameter is upgraded to a PVIO type
1744 * and the IoDIRP field is used to store the function pointer.
1745 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1746 * private use must be set using malloc'd pointers.
1747 */
16d20bd9
AD
1748
1749SV *
864dbfa3 1750Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9
AD
1751{
1752 if (!funcp){ /* temporary handy debugging hack to be deleted */
80252599 1753 PL_filter_debug = atoi((char*)datasv);
16d20bd9
AD
1754 return NULL;
1755 }
3280af22
NIS
1756 if (!PL_rsfp_filters)
1757 PL_rsfp_filters = newAV();
16d20bd9 1758 if (!datasv)
8c52afec 1759 datasv = NEWSV(255,0);
16d20bd9 1760 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1761 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
16d20bd9 1762 IoDIRP(datasv) = (DIR*)funcp; /* stash funcp into spare field */
0453d815 1763#ifdef DEBUGGING
80252599 1764 if (PL_filter_debug) {
2d8e6c8d 1765 STRLEN n_a;
cea2e8a9 1766 Perl_warn(aTHX_ "filter_add func %p (%s)", funcp, SvPV(datasv, n_a));
2d8e6c8d 1767 }
0453d815 1768#endif /* DEBUGGING */
3280af22
NIS
1769 av_unshift(PL_rsfp_filters, 1);
1770 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1771 return(datasv);
1772}
1773
1774
1775/* Delete most recently added instance of this filter function. */
a0d0e21e 1776void
864dbfa3 1777Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1778{
0453d815 1779#ifdef DEBUGGING
80252599 1780 if (PL_filter_debug)
cea2e8a9 1781 Perl_warn(aTHX_ "filter_del func %p", funcp);
0453d815 1782#endif /* DEBUGGING */
3280af22 1783 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1784 return;
1785 /* if filter is on top of stack (usual case) just pop it off */
677ca527 1786 if (IoDIRP(FILTER_DATA(AvFILLp(PL_rsfp_filters))) == (DIR*)funcp){
a6c40364 1787 IoDIRP(FILTER_DATA(AvFILLp(PL_rsfp_filters))) = NULL;
3280af22 1788 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1789
16d20bd9
AD
1790 return;
1791 }
1792 /* we need to search for the correct entry and clear it */
cea2e8a9 1793 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1794}
1795
1796
1797/* Invoke the n'th filter function for the current rsfp. */
1798I32
864dbfa3 1799Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1800
1801
1802 /* 0 = read one text line */
a0d0e21e 1803{
16d20bd9
AD
1804 filter_t funcp;
1805 SV *datasv = NULL;
e50aee73 1806
3280af22 1807 if (!PL_rsfp_filters)
16d20bd9 1808 return -1;
3280af22 1809 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1810 /* Provide a default input filter to make life easy. */
1811 /* Note that we append to the line. This is handy. */
0453d815 1812#ifdef DEBUGGING
80252599 1813 if (PL_filter_debug)
cea2e8a9 1814 Perl_warn(aTHX_ "filter_read %d: from rsfp\n", idx);
0453d815 1815#endif /* DEBUGGING */
16d20bd9
AD
1816 if (maxlen) {
1817 /* Want a block */
1818 int len ;
1819 int old_len = SvCUR(buf_sv) ;
1820
1821 /* ensure buf_sv is large enough */
1822 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1823 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1824 if (PerlIO_error(PL_rsfp))
37120919
AD
1825 return -1; /* error */
1826 else
1827 return 0 ; /* end of file */
1828 }
16d20bd9
AD
1829 SvCUR_set(buf_sv, old_len + len) ;
1830 } else {
1831 /* Want a line */
3280af22
NIS
1832 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1833 if (PerlIO_error(PL_rsfp))
37120919
AD
1834 return -1; /* error */
1835 else
1836 return 0 ; /* end of file */
1837 }
16d20bd9
AD
1838 }
1839 return SvCUR(buf_sv);
1840 }
1841 /* Skip this filter slot if filter has been deleted */
3280af22 1842 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
0453d815 1843#ifdef DEBUGGING
80252599 1844 if (PL_filter_debug)
cea2e8a9 1845 Perl_warn(aTHX_ "filter_read %d: skipped (filter deleted)\n", idx);
0453d815 1846#endif /* DEBUGGING */
16d20bd9
AD
1847 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1848 }
1849 /* Get function pointer hidden within datasv */
1850 funcp = (filter_t)IoDIRP(datasv);
0453d815 1851#ifdef DEBUGGING
80252599 1852 if (PL_filter_debug) {
2d8e6c8d 1853 STRLEN n_a;
cea2e8a9 1854 Perl_warn(aTHX_ "filter_read %d: via function %p (%s)\n",
2d8e6c8d
GS
1855 idx, funcp, SvPV(datasv,n_a));
1856 }
0453d815 1857#endif /* DEBUGGING */
16d20bd9
AD
1858 /* Call function. The function is expected to */
1859 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1860 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1861 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1862}
1863
76e3520e 1864STATIC char *
cea2e8a9 1865S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1866{
a868473f 1867#ifdef WIN32FILTER
3280af22 1868 if (!PL_rsfp_filters) {
a868473f
NIS
1869 filter_add(win32_textfilter,NULL);
1870 }
1871#endif
3280af22 1872 if (PL_rsfp_filters) {
16d20bd9 1873
55497cff 1874 if (!append)
1875 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
1876 if (FILTER_READ(0, sv, 0) > 0)
1877 return ( SvPVX(sv) ) ;
1878 else
1879 return Nullch ;
1880 }
9d116dd7 1881 else
fd049845 1882 return (sv_gets(sv, fp, append));
a0d0e21e
LW
1883}
1884
1885
748a9306
LW
1886#ifdef DEBUGGING
1887 static char* exp_name[] =
a0d0e21e 1888 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "TERMBLOCK" };
748a9306 1889#endif
463ee0b2 1890
02aa26ce
NT
1891/*
1892 yylex
1893
1894 Works out what to call the token just pulled out of the input
1895 stream. The yacc parser takes care of taking the ops we return and
1896 stitching them into a tree.
1897
1898 Returns:
1899 PRIVATEREF
1900
1901 Structure:
1902 if read an identifier
1903 if we're in a my declaration
1904 croak if they tried to say my($foo::bar)
1905 build the ops for a my() declaration
1906 if it's an access to a my() variable
1907 are we in a sort block?
1908 croak if my($a); $a <=> $b
1909 build ops for access to a my() variable
1910 if in a dq string, and they've said @foo and we can't find @foo
1911 croak
1912 build ops for a bareword
1913 if we already built the token before, use it.
1914*/
1915
864dbfa3
GS
1916int
1917#ifdef USE_PURE_BISON
cea2e8a9 1918Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
864dbfa3 1919#else
cea2e8a9 1920Perl_yylex(pTHX)
864dbfa3 1921#endif
378cc40b 1922{
11343788 1923 dTHR;
79072805 1924 register char *s;
378cc40b 1925 register char *d;
79072805 1926 register I32 tmp;
463ee0b2 1927 STRLEN len;
161b471a
NIS
1928 GV *gv = Nullgv;
1929 GV **gvp = 0;
a687059c 1930
a1a0e61e
TD
1931#ifdef USE_PURE_BISON
1932 yylval_pointer = lvalp;
1933 yychar_pointer = lcharp;
1934#endif
1935
02aa26ce 1936 /* check if there's an identifier for us to look at */
3280af22 1937 if (PL_pending_ident) {
02aa26ce 1938 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
1939 char pit = PL_pending_ident;
1940 PL_pending_ident = 0;
bbce6d69 1941
02aa26ce
NT
1942 /* if we're in a my(), we can't allow dynamics here.
1943 $foo'bar has already been turned into $foo::bar, so
1944 just check for colons.
1945
1946 if it's a legal name, the OP is a PADANY.
1947 */
3280af22
NIS
1948 if (PL_in_my) {
1949 if (strchr(PL_tokenbuf,':'))
cea2e8a9 1950 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 1951
bbce6d69 1952 yylval.opval = newOP(OP_PADANY, 0);
3280af22 1953 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
bbce6d69 1954 return PRIVATEREF;
1955 }
1956
02aa26ce
NT
1957 /*
1958 build the ops for accesses to a my() variable.
1959
1960 Deny my($a) or my($b) in a sort block, *if* $a or $b is
1961 then used in a comparison. This catches most, but not
1962 all cases. For instance, it catches
1963 sort { my($a); $a <=> $b }
1964 but not
1965 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
1966 (although why you'd do that is anyone's guess).
1967 */
1968
3280af22 1969 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 1970#ifdef USE_THREADS
54b9620d 1971 /* Check for single character per-thread SVs */
3280af22
NIS
1972 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
1973 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
1974 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 1975 {
2faa37cc 1976 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
1977 yylval.opval->op_targ = tmp;
1978 return PRIVATEREF;
1979 }
1980#endif /* USE_THREADS */
3280af22 1981 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
02aa26ce 1982 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
1983 if (PL_last_lop_op == OP_SORT &&
1984 PL_tokenbuf[0] == '$' &&
1985 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
1986 && !PL_tokenbuf[2])
bbce6d69 1987 {
3280af22
NIS
1988 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
1989 d < PL_bufend && *d != '\n';
a863c7d1
MB
1990 d++)
1991 {
1992 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 1993 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 1994 PL_tokenbuf);
a863c7d1 1995 }
bbce6d69 1996 }
1997 }
bbce6d69 1998
a863c7d1
MB
1999 yylval.opval = newOP(OP_PADANY, 0);
2000 yylval.opval->op_targ = tmp;
2001 return PRIVATEREF;
2002 }
bbce6d69 2003 }
2004
02aa26ce
NT
2005 /*
2006 Whine if they've said @foo in a doublequoted string,
2007 and @foo isn't a variable we can find in the symbol
2008 table.
2009 */
3280af22
NIS
2010 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2011 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
2012 if (!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
cea2e8a9 2013 yyerror(Perl_form(aTHX_ "In string, %s now must be written as \\%s",
3280af22 2014 PL_tokenbuf, PL_tokenbuf));
bbce6d69 2015 }
2016
02aa26ce 2017 /* build ops for a bareword */
3280af22 2018 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2019 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2020 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2021 ((PL_tokenbuf[0] == '$') ? SVt_PV
2022 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69 2023 : SVt_PVHV));
2024 return WORD;
2025 }
2026
02aa26ce
NT
2027 /* no identifier pending identification */
2028
3280af22 2029 switch (PL_lex_state) {
79072805
LW
2030#ifdef COMMENTARY
2031 case LEX_NORMAL: /* Some compilers will produce faster */
2032 case LEX_INTERPNORMAL: /* code if we comment these out. */
2033 break;
2034#endif
2035
02aa26ce 2036 /* when we're already built the next token, just pull it out the queue */
79072805 2037 case LEX_KNOWNEXT:
3280af22
NIS
2038 PL_nexttoke--;
2039 yylval = PL_nextval[PL_nexttoke];
2040 if (!PL_nexttoke) {
2041 PL_lex_state = PL_lex_defer;
2042 PL_expect = PL_lex_expect;
2043 PL_lex_defer = LEX_NORMAL;
463ee0b2 2044 }
3280af22 2045 return(PL_nexttype[PL_nexttoke]);
79072805 2046
02aa26ce 2047 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2048 when we get here, PL_bufptr is at the \
02aa26ce 2049 */
79072805
LW
2050 case LEX_INTERPCASEMOD:
2051#ifdef DEBUGGING
3280af22 2052 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2053 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2054#endif
02aa26ce 2055 /* handle \E or end of string */
3280af22 2056 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2057 char oldmod;
02aa26ce
NT
2058
2059 /* if at a \E */
3280af22
NIS
2060 if (PL_lex_casemods) {
2061 oldmod = PL_lex_casestack[--PL_lex_casemods];
2062 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2063
3280af22
NIS
2064 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2065 PL_bufptr += 2;
2066 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2067 }
79072805
LW
2068 return ')';
2069 }
3280af22
NIS
2070 if (PL_bufptr != PL_bufend)
2071 PL_bufptr += 2;
2072 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2073 return yylex();
79072805
LW
2074 }
2075 else {
3280af22 2076 s = PL_bufptr + 1;
79072805
LW
2077 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2078 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2079 if (strchr("LU", *s) &&
3280af22 2080 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2081 {
3280af22 2082 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2083 return ')';
2084 }
3280af22
NIS
2085 if (PL_lex_casemods > 10) {
2086 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2087 if (newlb != PL_lex_casestack) {
a0d0e21e 2088 SAVEFREEPV(newlb);
3280af22 2089 PL_lex_casestack = newlb;
a0d0e21e
LW
2090 }
2091 }
3280af22
NIS
2092 PL_lex_casestack[PL_lex_casemods++] = *s;
2093 PL_lex_casestack[PL_lex_casemods] = '\0';
2094 PL_lex_state = LEX_INTERPCONCAT;
2095 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2096 force_next('(');
2097 if (*s == 'l')
3280af22 2098 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2099 else if (*s == 'u')
3280af22 2100 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2101 else if (*s == 'L')
3280af22 2102 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2103 else if (*s == 'U')
3280af22 2104 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2105 else if (*s == 'Q')
3280af22 2106 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2107 else
cea2e8a9 2108 Perl_croak(aTHX_ "panic: yylex");
3280af22 2109 PL_bufptr = s + 1;
79072805 2110 force_next(FUNC);
3280af22
NIS
2111 if (PL_lex_starts) {
2112 s = PL_bufptr;
2113 PL_lex_starts = 0;
79072805
LW
2114 Aop(OP_CONCAT);
2115 }
2116 else
cea2e8a9 2117 return yylex();
79072805
LW
2118 }
2119
55497cff 2120 case LEX_INTERPPUSH:
2121 return sublex_push();
2122
79072805 2123 case LEX_INTERPSTART:
3280af22 2124 if (PL_bufptr == PL_bufend)
79072805 2125 return sublex_done();
3280af22
NIS
2126 PL_expect = XTERM;
2127 PL_lex_dojoin = (*PL_bufptr == '@');
2128 PL_lex_state = LEX_INTERPNORMAL;
2129 if (PL_lex_dojoin) {
2130 PL_nextval[PL_nexttoke].ival = 0;
79072805 2131 force_next(',');
554b3eca 2132#ifdef USE_THREADS
533c011a
NIS
2133 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2134 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2135 force_next(PRIVATEREF);
2136#else
a0d0e21e 2137 force_ident("\"", '$');
554b3eca 2138#endif /* USE_THREADS */
3280af22 2139 PL_nextval[PL_nexttoke].ival = 0;
79072805 2140 force_next('$');
3280af22 2141 PL_nextval[PL_nexttoke].ival = 0;
79072805 2142 force_next('(');
3280af22 2143 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2144 force_next(FUNC);
2145 }
3280af22
NIS
2146 if (PL_lex_starts++) {
2147 s = PL_bufptr;
79072805
LW
2148 Aop(OP_CONCAT);
2149 }
cea2e8a9 2150 return yylex();
79072805
LW
2151
2152 case LEX_INTERPENDMAYBE:
3280af22
NIS
2153 if (intuit_more(PL_bufptr)) {
2154 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2155 break;
2156 }
2157 /* FALL THROUGH */
2158
2159 case LEX_INTERPEND:
3280af22
NIS
2160 if (PL_lex_dojoin) {
2161 PL_lex_dojoin = FALSE;
2162 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2163 return ')';
2164 }
43a16006 2165 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2166 && SvEVALED(PL_lex_repl))
43a16006 2167 {
e9fa98b2 2168 if (PL_bufptr != PL_bufend)
cea2e8a9 2169 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2170 PL_lex_repl = Nullsv;
2171 }
79072805
LW
2172 /* FALLTHROUGH */
2173 case LEX_INTERPCONCAT:
2174#ifdef DEBUGGING
3280af22 2175 if (PL_lex_brackets)
cea2e8a9 2176 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2177#endif
3280af22 2178 if (PL_bufptr == PL_bufend)
79072805
LW
2179 return sublex_done();
2180
3280af22
NIS
2181 if (SvIVX(PL_linestr) == '\'') {
2182 SV *sv = newSVsv(PL_linestr);
2183 if (!PL_lex_inpat)
76e3520e 2184 sv = tokeq(sv);
3280af22 2185 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2186 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2187 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2188 s = PL_bufend;
79072805
LW
2189 }
2190 else {
3280af22 2191 s = scan_const(PL_bufptr);
79072805 2192 if (*s == '\\')
3280af22 2193 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2194 else
3280af22 2195 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2196 }
2197
3280af22
NIS
2198 if (s != PL_bufptr) {
2199 PL_nextval[PL_nexttoke] = yylval;
2200 PL_expect = XTERM;
79072805 2201 force_next(THING);
3280af22 2202 if (PL_lex_starts++)
79072805
LW
2203 Aop(OP_CONCAT);
2204 else {
3280af22 2205 PL_bufptr = s;
cea2e8a9 2206 return yylex();
79072805
LW
2207 }
2208 }
2209
cea2e8a9 2210 return yylex();
a0d0e21e 2211 case LEX_FORMLINE:
3280af22
NIS
2212 PL_lex_state = LEX_NORMAL;
2213 s = scan_formline(PL_bufptr);
2214 if (!PL_lex_formbrack)
a0d0e21e
LW
2215 goto rightbracket;
2216 OPERATOR(';');
79072805
LW
2217 }
2218
3280af22
NIS
2219 s = PL_bufptr;
2220 PL_oldoldbufptr = PL_oldbufptr;
2221 PL_oldbufptr = s;
79072805 2222 DEBUG_p( {
3280af22 2223 PerlIO_printf(PerlIO_stderr(), "### Tokener expecting %s at %s\n", exp_name[PL_expect], s);
79072805 2224 } )
463ee0b2
LW
2225
2226 retry:
378cc40b
LW
2227 switch (*s) {
2228 default:
834a4ddd
LW
2229 if (isIDFIRST_lazy(s))
2230 goto keylookup;
cea2e8a9 2231 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2232 case 4:
2233 case 26:
2234 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2235 case 0:
3280af22
NIS
2236 if (!PL_rsfp) {
2237 PL_last_uni = 0;
2238 PL_last_lop = 0;
2239 if (PL_lex_brackets)
d98d5fff 2240 yyerror("Missing right curly or square bracket");
79072805 2241 TOKEN(0);
463ee0b2 2242 }
3280af22 2243 if (s++ < PL_bufend)
a687059c 2244 goto retry; /* ignore stray nulls */
3280af22
NIS
2245 PL_last_uni = 0;
2246 PL_last_lop = 0;
2247 if (!PL_in_eval && !PL_preambled) {
2248 PL_preambled = TRUE;
2249 sv_setpv(PL_linestr,incl_perldb());
2250 if (SvCUR(PL_linestr))
2251 sv_catpv(PL_linestr,";");
2252 if (PL_preambleav){
2253 while(AvFILLp(PL_preambleav) >= 0) {
2254 SV *tmpsv = av_shift(PL_preambleav);
2255 sv_catsv(PL_linestr, tmpsv);
2256 sv_catpv(PL_linestr, ";");
91b7def8 2257 sv_free(tmpsv);
2258 }
3280af22
NIS
2259 sv_free((SV*)PL_preambleav);
2260 PL_preambleav = NULL;
91b7def8 2261 }
3280af22
NIS
2262 if (PL_minus_n || PL_minus_p) {
2263 sv_catpv(PL_linestr, "LINE: while (<>) {");
2264 if (PL_minus_l)
2265 sv_catpv(PL_linestr,"chomp;");
2266 if (PL_minus_a) {
8fd239a7
CS
2267 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2268 if (gv)
2269 GvIMPORTED_AV_on(gv);
3280af22
NIS
2270 if (PL_minus_F) {
2271 if (strchr("/'\"", *PL_splitstr)
2272 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2273 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121 2274 else {
2275 char delim;
2276 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2277 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2278 delim = *s;
cea2e8a9 2279 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2280 "q" + (delim == '\''), delim);
3280af22 2281 for (s = PL_splitstr; *s; s++) {
54310121 2282 if (*s == '\\')
3280af22
NIS
2283 sv_catpvn(PL_linestr, "\\", 1);
2284 sv_catpvn(PL_linestr, s, 1);
54310121 2285 }
cea2e8a9 2286 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2287 }
2304df62
AD
2288 }
2289 else
3280af22 2290 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2291 }
79072805 2292 }
3280af22
NIS
2293 sv_catpv(PL_linestr, "\n");
2294 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2295 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2296 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2297 SV *sv = NEWSV(85,0);
2298
2299 sv_upgrade(sv, SVt_PVMG);
3280af22
NIS
2300 sv_setsv(sv,PL_linestr);
2301 av_store(GvAV(PL_curcop->cop_filegv),(I32)PL_curcop->cop_line,sv);
a0d0e21e 2302 }
79072805 2303 goto retry;
a687059c 2304 }
e929a76b 2305 do {
3280af22 2306 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
e929a76b 2307 fake_eof:
3280af22
NIS
2308 if (PL_rsfp) {
2309 if (PL_preprocess && !PL_in_eval)
2310 (void)PerlProc_pclose(PL_rsfp);
2311 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2312 PerlIO_clearerr(PL_rsfp);
395c3793 2313 else
3280af22
NIS
2314 (void)PerlIO_close(PL_rsfp);
2315 PL_rsfp = Nullfp;
4a9ae47a 2316 PL_doextract = FALSE;
395c3793 2317 }
3280af22
NIS
2318 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2319 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2320 sv_catpv(PL_linestr,";}");
2321 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2322 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2323 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2324 goto retry;
2325 }
3280af22
NIS
2326 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2327 sv_setpv(PL_linestr,"");
79072805 2328 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
378cc40b 2329 }
3280af22 2330 if (PL_doextract) {
a0d0e21e 2331 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2332 PL_doextract = FALSE;
a0d0e21e
LW
2333
2334 /* Incest with pod. */
2335 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2336 sv_setpv(PL_linestr, "");
2337 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2338 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2339 PL_doextract = FALSE;
a0d0e21e
LW
2340 }
2341 }
463ee0b2 2342 incline(s);
3280af22
NIS
2343 } while (PL_doextract);
2344 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2345 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2346 SV *sv = NEWSV(85,0);
a687059c 2347
93a17b20 2348 sv_upgrade(sv, SVt_PVMG);
3280af22
NIS
2349 sv_setsv(sv,PL_linestr);
2350 av_store(GvAV(PL_curcop->cop_filegv),(I32)PL_curcop->cop_line,sv);
a687059c 2351 }
3280af22
NIS
2352 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2353 if (PL_curcop->cop_line == 1) {
2354 while (s < PL_bufend && isSPACE(*s))
79072805 2355 s++;
a0d0e21e 2356 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2357 s++;
44a8e56a 2358 d = Nullch;
3280af22 2359 if (!PL_in_eval) {
44a8e56a 2360 if (*s == '#' && *(s+1) == '!')
2361 d = s + 2;
2362#ifdef ALTERNATE_SHEBANG
2363 else {
2364 static char as[] = ALTERNATE_SHEBANG;
2365 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2366 d = s + (sizeof(as) - 1);
2367 }
2368#endif /* ALTERNATE_SHEBANG */
2369 }
2370 if (d) {
b8378b72 2371 char *ipath;
774d564b 2372 char *ipathend;
b8378b72 2373
774d564b 2374 while (isSPACE(*d))
b8378b72
CS
2375 d++;
2376 ipath = d;
774d564b 2377 while (*d && !isSPACE(*d))
2378 d++;
2379 ipathend = d;
2380
2381#ifdef ARG_ZERO_IS_SCRIPT
2382 if (ipathend > ipath) {
2383 /*
2384 * HP-UX (at least) sets argv[0] to the script name,
2385 * which makes $^X incorrect. And Digital UNIX and Linux,
2386 * at least, set argv[0] to the basename of the Perl
2387 * interpreter. So, having found "#!", we'll set it right.
2388 */
2389 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2390 assert(SvPOK(x) || SvGMAGICAL(x));
6b88bc9c 2391 if (sv_eq(x, GvSV(PL_curcop->cop_filegv))) {
774d564b 2392 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2393 SvSETMAGIC(x);
2394 }
774d564b 2395 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2396 }
774d564b 2397#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2398
2399 /*
2400 * Look for options.
2401 */
748a9306
LW
2402 d = instr(s,"perl -");
2403 if (!d)
2404 d = instr(s,"perl");
44a8e56a 2405#ifdef ALTERNATE_SHEBANG
2406 /*
2407 * If the ALTERNATE_SHEBANG on this system starts with a
2408 * character that can be part of a Perl expression, then if
2409 * we see it but not "perl", we're probably looking at the
2410 * start of Perl code, not a request to hand off to some
2411 * other interpreter. Similarly, if "perl" is there, but
2412 * not in the first 'word' of the line, we assume the line
2413 * contains the start of the Perl program.
44a8e56a 2414 */
2415 if (d && *s != '#') {
774d564b 2416 char *c = ipath;
44a8e56a 2417 while (*c && !strchr("; \t\r\n\f\v#", *c))
2418 c++;
2419 if (c < d)
2420 d = Nullch; /* "perl" not in first word; ignore */
2421 else
2422 *s = '#'; /* Don't try to parse shebang line */
2423 }
774d564b 2424#endif /* ALTERNATE_SHEBANG */
748a9306 2425 if (!d &&
44a8e56a 2426 *s == '#' &&
774d564b 2427 ipathend > ipath &&
3280af22 2428 !PL_minus_c &&
748a9306 2429 !instr(s,"indir") &&
3280af22 2430 instr(PL_origargv[0],"perl"))
748a9306 2431 {
9f68db38 2432 char **newargv;
9f68db38 2433
774d564b 2434 *ipathend = '\0';
2435 s = ipathend + 1;
3280af22 2436 while (s < PL_bufend && isSPACE(*s))
9f68db38 2437 s++;
3280af22
NIS
2438 if (s < PL_bufend) {
2439 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2440 newargv[1] = s;
3280af22 2441 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2442 s++;
2443 *s = '\0';
3280af22 2444 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2445 }
2446 else
3280af22 2447 newargv = PL_origargv;
774d564b 2448 newargv[0] = ipath;
80252599 2449 PerlProc_execv(ipath, newargv);
cea2e8a9 2450 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2451 }
748a9306 2452 if (d) {
3280af22
NIS
2453 U32 oldpdb = PL_perldb;
2454 bool oldn = PL_minus_n;
2455 bool oldp = PL_minus_p;
748a9306
LW
2456
2457 while (*d && !isSPACE(*d)) d++;
89bfa8cd 2458 while (*d == ' ' || *d == '\t') d++;
748a9306
LW
2459
2460 if (*d++ == '-') {
8cc95fdb 2461 do {
2462 if (*d == 'M' || *d == 'm') {
2463 char *m = d;
2464 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2465 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2466 (int)(d - m), m);
2467 }
2468 d = moreswitches(d);
2469 } while (d);
84902520 2470 if (PERLDB_LINE && !oldpdb ||
3280af22 2471 ( PL_minus_n || PL_minus_p ) && !(oldn || oldp) )
b084f20b 2472 /* if we have already added "LINE: while (<>) {",
2473 we must not do it again */
748a9306 2474 {
3280af22
NIS
2475 sv_setpv(PL_linestr, "");
2476 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2477 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2478 PL_preambled = FALSE;
84902520 2479 if (PERLDB_LINE)
3280af22 2480 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2481 goto retry;
2482 }
a0d0e21e 2483 }
79072805 2484 }
9f68db38 2485 }
79072805 2486 }
3280af22
NIS
2487 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2488 PL_bufptr = s;
2489 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2490 return yylex();
ae986130 2491 }
378cc40b 2492 goto retry;
4fdae800 2493 case '\r':
6a27c188 2494#ifdef PERL_STRICT_CR
cea2e8a9
GS
2495 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2496 Perl_croak(aTHX_
54310121 2497 "(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2498#endif
4fdae800 2499 case ' ': case '\t': case '\f': case 013:
378cc40b
LW
2500 s++;
2501 goto retry;
378cc40b 2502 case '#':
e929a76b 2503 case '\n':
3280af22
NIS
2504 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2505 d = PL_bufend;
a687059c 2506 while (s < d && *s != '\n')
378cc40b 2507 s++;
0f85fab0 2508 if (s < d)
378cc40b 2509 s++;
463ee0b2 2510 incline(s);
3280af22
NIS
2511 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2512 PL_bufptr = s;
2513 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2514 return yylex();
a687059c 2515 }
378cc40b 2516 }
a687059c 2517 else {
378cc40b 2518 *s = '\0';
3280af22 2519 PL_bufend = s;
a687059c 2520 }
378cc40b
LW
2521 goto retry;
2522 case '-':
79072805 2523 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2524 s++;
3280af22 2525 PL_bufptr = s;
748a9306
LW
2526 tmp = *s++;
2527
3280af22 2528 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306
LW
2529 s++;
2530
2531 if (strnEQ(s,"=>",2)) {
3280af22 2532 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2533 OPERATOR('-'); /* unary minus */
2534 }
3280af22
NIS
2535 PL_last_uni = PL_oldbufptr;
2536 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2537 switch (tmp) {
79072805
LW
2538 case 'r': FTST(OP_FTEREAD);
2539 case 'w': FTST(OP_FTEWRITE);
2540 case 'x': FTST(OP_FTEEXEC);
2541 case 'o': FTST(OP_FTEOWNED);
2542 case 'R': FTST(OP_FTRREAD);
2543 case 'W': FTST(OP_FTRWRITE);
2544 case 'X': FTST(OP_FTREXEC);
2545 case 'O': FTST(OP_FTROWNED);
2546 case 'e': FTST(OP_FTIS);
2547 case 'z': FTST(OP_FTZERO);
2548 case 's': FTST(OP_FTSIZE);
2549 case 'f': FTST(OP_FTFILE);
2550 case 'd': FTST(OP_FTDIR);
2551 case 'l': FTST(OP_FTLINK);
2552 case 'p': FTST(OP_FTPIPE);
2553 case 'S': FTST(OP_FTSOCK);
2554 case 'u': FTST(OP_FTSUID);
2555 case 'g': FTST(OP_FTSGID);
2556 case 'k': FTST(OP_FTSVTX);
2557 case 'b': FTST(OP_FTBLK);
2558 case 'c': FTST(OP_FTCHR);
2559 case 't': FTST(OP_FTTTY);
2560 case 'T': FTST(OP_FTTEXT);
2561 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2562 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2563 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2564 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2565 default:
cea2e8a9 2566 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2567 break;
2568 }
2569 }
a687059c
LW
2570 tmp = *s++;
2571 if (*s == tmp) {
2572 s++;
3280af22 2573 if (PL_expect == XOPERATOR)
79072805
LW
2574 TERM(POSTDEC);
2575 else
2576 OPERATOR(PREDEC);
2577 }
2578 else if (*s == '>') {
2579 s++;
2580 s = skipspace(s);
834a4ddd 2581 if (isIDFIRST_lazy(s)) {
a0d0e21e 2582 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2583 TOKEN(ARROW);
79072805 2584 }
748a9306
LW
2585 else if (*s == '$')
2586 OPERATOR(ARROW);
463ee0b2 2587 else
748a9306 2588 TERM(ARROW);
a687059c 2589 }
3280af22 2590 if (PL_expect == XOPERATOR)
79072805
LW
2591 Aop(OP_SUBTRACT);
2592 else {
3280af22 2593 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2594 check_uni();
79072805 2595 OPERATOR('-'); /* unary minus */
2f3197b3 2596 }
79072805 2597
378cc40b 2598 case '+':
a687059c
LW
2599 tmp = *s++;
2600 if (*s == tmp) {
378cc40b 2601 s++;
3280af22 2602 if (PL_expect == XOPERATOR)
79072805
LW
2603 TERM(POSTINC);
2604 else
2605 OPERATOR(PREINC);
378cc40b 2606 }
3280af22 2607 if (PL_expect == XOPERATOR)
79072805
LW
2608 Aop(OP_ADD);
2609 else {
3280af22 2610 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2611 check_uni();
a687059c 2612 OPERATOR('+');
2f3197b3 2613 }
a687059c 2614
378cc40b 2615 case '*':
3280af22
NIS
2616 if (PL_expect != XOPERATOR) {
2617 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2618 PL_expect = XOPERATOR;
2619 force_ident(PL_tokenbuf, '*');
2620 if (!*PL_tokenbuf)
a0d0e21e 2621 PREREF('*');
79072805 2622 TERM('*');
a687059c 2623 }
79072805
LW
2624 s++;
2625 if (*s == '*') {
a687059c 2626 s++;
79072805 2627 PWop(OP_POW);
a687059c 2628 }
79072805
LW
2629 Mop(OP_MULTIPLY);
2630
378cc40b 2631 case '%':
3280af22 2632 if (PL_expect == XOPERATOR) {
bbce6d69 2633 ++s;
2634 Mop(OP_MODULO);
a687059c 2635 }
3280af22
NIS
2636 PL_tokenbuf[0] = '%';
2637 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2638 if (!PL_tokenbuf[1]) {
2639 if (s == PL_bufend)
bbce6d69 2640 yyerror("Final % should be \\% or %name");
2641 PREREF('%');
a687059c 2642 }
3280af22 2643 PL_pending_ident = '%';
bbce6d69 2644 TERM('%');
a687059c 2645
378cc40b 2646 case '^':
79072805 2647 s++;
a0d0e21e 2648 BOop(OP_BIT_XOR);
79072805 2649 case '[':
3280af22 2650 PL_lex_brackets++;
79072805 2651 /* FALL THROUGH */
378cc40b 2652 case '~':
378cc40b 2653 case ',':
378cc40b
LW
2654 tmp = *s++;
2655 OPERATOR(tmp);
a0d0e21e
LW
2656 case ':':
2657 if (s[1] == ':') {
2658 len = 0;
2659 goto just_a_word;
2660 }
2661 s++;
2662 OPERATOR(':');
8990e307
LW
2663 case '(':
2664 s++;
3280af22
NIS
2665 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
2666 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 2667 else
3280af22 2668 PL_expect = XTERM;
a0d0e21e 2669 TOKEN('(');
378cc40b 2670 case ';':
3280af22
NIS
2671 if (PL_curcop->cop_line < PL_copline)
2672 PL_copline = PL_curcop->cop_line;
378cc40b
LW
2673 tmp = *s++;
2674 OPERATOR(tmp);
2675 case ')':
378cc40b 2676 tmp = *s++;
16d20bd9
AD
2677 s = skipspace(s);
2678 if (*s == '{')
2679 PREBLOCK(tmp);
378cc40b 2680 TERM(tmp);
79072805
LW
2681 case ']':
2682 s++;
3280af22 2683 if (PL_lex_brackets <= 0)
d98d5fff 2684 yyerror("Unmatched right square bracket");
463ee0b2 2685 else
3280af22
NIS
2686 --PL_lex_brackets;
2687 if (PL_lex_state == LEX_INTERPNORMAL) {
2688 if (PL_lex_brackets == 0) {
a0d0e21e 2689 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 2690 PL_lex_state = LEX_INTERPEND;
79072805
LW
2691 }
2692 }
4633a7c4 2693 TERM(']');
79072805
LW
2694 case '{':
2695 leftbracket:
79072805 2696 s++;
3280af22
NIS
2697 if (PL_lex_brackets > 100) {
2698 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
2699 if (newlb != PL_lex_brackstack) {
8990e307 2700 SAVEFREEPV(newlb);
3280af22 2701 PL_lex_brackstack = newlb;
8990e307
LW
2702 }
2703 }
3280af22 2704 switch (PL_expect) {
a0d0e21e 2705 case XTERM:
3280af22 2706 if (PL_lex_formbrack) {
a0d0e21e
LW
2707 s--;
2708 PRETERMBLOCK(DO);
2709 }
3280af22
NIS
2710 if (PL_oldoldbufptr == PL_last_lop)
2711 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2712 else
3280af22 2713 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 2714 OPERATOR(HASHBRACK);
a0d0e21e 2715 case XOPERATOR:
3280af22 2716 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306 2717 s++;
44a8e56a 2718 d = s;
3280af22
NIS
2719 PL_tokenbuf[0] = '\0';
2720 if (d < PL_bufend && *d == '-') {
2721 PL_tokenbuf[0] = '-';
44a8e56a 2722 d++;
3280af22 2723 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
44a8e56a 2724 d++;
2725 }
834a4ddd 2726 if (d < PL_bufend && isIDFIRST_lazy(d)) {
3280af22 2727 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 2728 FALSE, &len);
3280af22 2729 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
748a9306
LW
2730 d++;
2731 if (*d == '}') {
3280af22 2732 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 2733 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
2734 if (minus)
2735 force_next('-');
748a9306
LW
2736 }
2737 }
2738 /* FALL THROUGH */
2739 case XBLOCK:
3280af22
NIS
2740 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
2741 PL_expect = XSTATE;
a0d0e21e
LW
2742 break;
2743 case XTERMBLOCK:
3280af22
NIS
2744 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
2745 PL_expect = XSTATE;
a0d0e21e
LW
2746 break;
2747 default: {
2748 char *t;
3280af22
NIS
2749 if (PL_oldoldbufptr == PL_last_lop)
2750 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2751 else
3280af22 2752 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 2753 s = skipspace(s);
09ecc4b6 2754 if (*s == '}')
a0d0e21e 2755 OPERATOR(HASHBRACK);
b8a4b1be
GS
2756 /* This hack serves to disambiguate a pair of curlies
2757 * as being a block or an anon hash. Normally, expectation
2758 * determines that, but in cases where we're not in a
2759 * position to expect anything in particular (like inside
2760 * eval"") we have to resolve the ambiguity. This code
2761 * covers the case where the first term in the curlies is a
2762 * quoted string. Most other cases need to be explicitly
2763 * disambiguated by prepending a `+' before the opening
2764 * curly in order to force resolution as an anon hash.
2765 *
2766 * XXX should probably propagate the outer expectation
2767 * into eval"" to rely less on this hack, but that could
2768 * potentially break current behavior of eval"".
2769 * GSAR 97-07-21
2770 */
2771 t = s;
2772 if (*s == '\'' || *s == '"' || *s == '`') {
2773 /* common case: get past first string, handling escapes */
3280af22 2774 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
2775 if (*t++ == '\\' && (*t == '\\' || *t == *s))
2776 t++;
2777 t++;
a0d0e21e 2778 }
b8a4b1be 2779 else if (*s == 'q') {
3280af22 2780 if (++t < PL_bufend
b8a4b1be 2781 && (!isALNUM(*t)
3280af22 2782 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
b8a4b1be
GS
2783 && !isALNUM(*t)))) {
2784 char *tmps;
2785 char open, close, term;
2786 I32 brackets = 1;
2787
3280af22 2788 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
2789 t++;
2790 term = *t;
2791 open = term;
2792 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
2793 term = tmps[5];
2794 close = term;
2795 if (open == close)
3280af22
NIS
2796 for (t++; t < PL_bufend; t++) {
2797 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 2798 t++;
6d07e5e9 2799 else if (*t == open)
b8a4b1be
GS
2800 break;
2801 }
2802 else
3280af22
NIS
2803 for (t++; t < PL_bufend; t++) {
2804 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 2805 t++;
6d07e5e9 2806 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
2807 break;
2808 else if (*t == open)
2809 brackets++;
2810 }
2811 }
2812 t++;
a0d0e21e 2813 }
834a4ddd
LW
2814 else if (isIDFIRST_lazy(s)) {
2815 for (t++; t < PL_bufend && isALNUM_lazy(t); t++) ;
a0d0e21e 2816 }
3280af22 2817 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 2818 t++;
b8a4b1be
GS
2819 /* if comma follows first term, call it an anon hash */
2820 /* XXX it could be a comma expression with loop modifiers */
3280af22 2821 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 2822 || (*t == '=' && t[1] == '>')))
a0d0e21e 2823 OPERATOR(HASHBRACK);
3280af22 2824 if (PL_expect == XREF)
4e4e412b 2825 PL_expect = XTERM;
a0d0e21e 2826 else {
3280af22
NIS
2827 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
2828 PL_expect = XSTATE;
a0d0e21e 2829 }
8990e307 2830 }
a0d0e21e 2831 break;
463ee0b2 2832 }
3280af22 2833 yylval.ival = PL_curcop->cop_line;
79072805 2834 if (isSPACE(*s) || *s == '#')
3280af22 2835 PL_copline = NOLINE; /* invalidate current command line number */
79072805 2836 TOKEN('{');
378cc40b 2837 case '}':
79072805
LW
2838 rightbracket:
2839 s++;
3280af22 2840 if (PL_lex_brackets <= 0)
d98d5fff 2841 yyerror("Unmatched right curly bracket");
463ee0b2 2842 else
3280af22
NIS
2843 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
2844 if (PL_lex_brackets < PL_lex_formbrack)
2845 PL_lex_formbrack = 0;
2846 if (PL_lex_state == LEX_INTERPNORMAL) {
2847 if (PL_lex_brackets == 0) {
2848 if (PL_lex_fakebrack) {
2849 PL_lex_state = LEX_INTERPEND;
2850 PL_bufptr = s;
cea2e8a9 2851 return yylex(); /* ignore fake brackets */
79072805 2852 }
fa83b5b6 2853 if (*s == '-' && s[1] == '>')
3280af22 2854 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 2855 else if (*s != '[' && *s != '{')
3280af22 2856 PL_lex_state = LEX_INTERPEND;
79072805
LW
2857 }
2858 }
3280af22
NIS
2859 if (PL_lex_brackets < PL_lex_fakebrack) {
2860 PL_bufptr = s;
2861 PL_lex_fakebrack = 0;
cea2e8a9 2862 return yylex(); /* ignore fake brackets */
748a9306 2863 }
79072805
LW
2864 force_next('}');
2865 TOKEN(';');
378cc40b
LW
2866 case '&':
2867 s++;
2868 tmp = *s++;
2869 if (tmp == '&')
a0d0e21e 2870 AOPERATOR(ANDAND);
378cc40b 2871 s--;
3280af22 2872 if (PL_expect == XOPERATOR) {
834a4ddd 2873 if (ckWARN(WARN_SEMICOLON) && isIDFIRST_lazy(s) && PL_bufptr == PL_linestart) {
3280af22 2874 PL_curcop->cop_line--;
cea2e8a9 2875 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
3280af22 2876 PL_curcop->cop_line++;
463ee0b2 2877 }
79072805 2878 BAop(OP_BIT_AND);
463ee0b2 2879 }
79072805 2880
3280af22
NIS
2881 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2882 if (*PL_tokenbuf) {
2883 PL_expect = XOPERATOR;
2884 force_ident(PL_tokenbuf, '&');
463ee0b2 2885 }
79072805
LW
2886 else
2887 PREREF('&');
c07a80fd 2888 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
2889 TERM('&');
2890
378cc40b
LW
2891 case '|':
2892 s++;
2893 tmp = *s++;
2894 if (tmp == '|')
a0d0e21e 2895 AOPERATOR(OROR);
378cc40b 2896 s--;
79072805 2897 BOop(OP_BIT_OR);
378cc40b
LW
2898 case '=':
2899 s++;
2900 tmp = *s++;
2901 if (tmp == '=')
79072805
LW
2902 Eop(OP_EQ);
2903 if (tmp == '>')
2904 OPERATOR(',');
378cc40b 2905 if (tmp == '~')
79072805 2906 PMop(OP_MATCH);
599cee73 2907 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 2908 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 2909 s--;
3280af22
NIS
2910 if (PL_expect == XSTATE && isALPHA(tmp) &&
2911 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 2912 {
3280af22
NIS
2913 if (PL_in_eval && !PL_rsfp) {
2914 d = PL_bufend;
a5f75d66
AD
2915 while (s < d) {
2916 if (*s++ == '\n') {
2917 incline(s);
2918 if (strnEQ(s,"=cut",4)) {
2919 s = strchr(s,'\n');
2920 if (s)
2921 s++;
2922 else
2923 s = d;
2924 incline(s);
2925 goto retry;
2926 }
2927 }
2928 }
2929 goto retry;
2930 }
3280af22
NIS
2931 s = PL_bufend;
2932 PL_doextract = TRUE;
a0d0e21e
LW
2933 goto retry;
2934 }
3280af22 2935 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 2936 char *t;
51882d45 2937#ifdef PERL_STRICT_CR
a0d0e21e 2938 for (t = s; *t == ' ' || *t == '\t'; t++) ;
51882d45
GS
2939#else
2940 for (t = s; *t == ' ' || *t == '\t' || *t == '\r'; t++) ;
2941#endif
a0d0e21e
LW
2942 if (*t == '\n' || *t == '#') {
2943 s--;
3280af22 2944 PL_expect = XBLOCK;
a0d0e21e
LW
2945 goto leftbracket;
2946 }
79072805 2947 }
a0d0e21e
LW
2948 yylval.ival = 0;
2949 OPERATOR(ASSIGNOP);
378cc40b
LW
2950 case '!':
2951 s++;
2952 tmp = *s++;
2953 if (tmp == '=')
79072805 2954 Eop(OP_NE);
378cc40b 2955 if (tmp == '~')
79072805 2956 PMop(OP_NOT);
378cc40b
LW
2957 s--;
2958 OPERATOR('!');
2959 case '<':
3280af22 2960 if (PL_expect != XOPERATOR) {
93a17b20 2961 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 2962 check_uni();
79072805
LW
2963 if (s[1] == '<')
2964 s = scan_heredoc(s);
2965 else
2966 s = scan_inputsymbol(s);
2967 TERM(sublex_start());
378cc40b
LW
2968 }
2969 s++;
2970 tmp = *s++;
2971 if (tmp == '<')
79072805 2972 SHop(OP_LEFT_SHIFT);
395c3793
LW
2973 if (tmp == '=') {
2974 tmp = *s++;
2975 if (tmp == '>')
79072805 2976 Eop(OP_NCMP);
395c3793 2977 s--;
79072805 2978 Rop(OP_LE);
395c3793 2979 }
378cc40b 2980 s--;
79072805 2981 Rop(OP_LT);
378cc40b
LW
2982 case '>':
2983 s++;
2984 tmp = *s++;
2985 if (tmp == '>')
79072805 2986 SHop(OP_RIGHT_SHIFT);
378cc40b 2987 if (tmp == '=')
79072805 2988 Rop(OP_GE);
378cc40b 2989 s--;
79072805 2990 Rop(OP_GT);
378cc40b
LW
2991
2992 case '$':
bbce6d69 2993 CLINE;
2994
3280af22
NIS
2995 if (PL_expect == XOPERATOR) {
2996 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
2997 PL_expect = XTERM;
a0d0e21e 2998 depcom();
bbce6d69 2999 return ','; /* grandfather non-comma-format format */
a0d0e21e 3000 }
8990e307 3001 }
a0d0e21e 3002
834a4ddd 3003 if (s[1] == '#' && (isIDFIRST_lazy(s+2) || strchr("{$:+-", s[2]))) {
3280af22 3004 PL_tokenbuf[0] = '@';
376b8730
SM
3005 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3006 sizeof PL_tokenbuf - 1, FALSE);
3007 if (PL_expect == XOPERATOR)
3008 no_op("Array length", s);
3280af22 3009 if (!PL_tokenbuf[1])
a0d0e21e 3010 PREREF(DOLSHARP);
3280af22
NIS
3011 PL_expect = XOPERATOR;
3012 PL_pending_ident = '#';
463ee0b2 3013 TOKEN(DOLSHARP);
79072805 3014 }
bbce6d69 3015
3280af22 3016 PL_tokenbuf[0] = '$';
376b8730
SM
3017 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3018 sizeof PL_tokenbuf - 1, FALSE);
3019 if (PL_expect == XOPERATOR)
3020 no_op("Scalar", s);
3280af22
NIS
3021 if (!PL_tokenbuf[1]) {
3022 if (s == PL_bufend)
bbce6d69 3023 yyerror("Final $ should be \\$ or $name");
3024 PREREF('$');
8990e307 3025 }
a0d0e21e 3026
bbce6d69 3027 /* This kludge not intended to be bulletproof. */
3280af22 3028 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3029 yylval.opval = newSVOP(OP_CONST, 0,
3280af22 3030 newSViv((IV)PL_compiling.cop_arybase));
bbce6d69 3031 yylval.opval->op_private = OPpCONST_ARYBASE;
3032 TERM(THING);
3033 }
3034
ff68c719 3035 d = s;
69d2bceb 3036 tmp = (I32)*s;
3280af22 3037 if (PL_lex_state == LEX_NORMAL)
ff68c719 3038 s = skipspace(s);
3039
3280af22 3040 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3041 char *t;
3042 if (*s == '[') {
3280af22 3043 PL_tokenbuf[0] = '@';
599cee73 3044 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3045 for(t = s + 1;
834a4ddd 3046 isSPACE(*t) || isALNUM_lazy(t) || *t == '$';
bbce6d69 3047 t++) ;
a0d0e21e 3048 if (*t++ == ',') {
3280af22
NIS
3049 PL_bufptr = skipspace(PL_bufptr);
3050 while (t < PL_bufend && *t != ']')
bbce6d69 3051 t++;
cea2e8a9 3052 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3053 "Multidimensional syntax %.*s not supported",
3054 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3055 }
3056 }
bbce6d69 3057 }
3058 else if (*s == '{') {
3280af22 3059 PL_tokenbuf[0] = '%';
599cee73 3060 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69 3061 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3062 {
3280af22 3063 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3064 STRLEN len;
3065 for (t++; isSPACE(*t); t++) ;
834a4ddd 3066 if (isIDFIRST_lazy(t)) {
8903cb82 3067 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3068 for (; isSPACE(*t); t++) ;
864dbfa3 3069 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3070 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3071 "You need to quote \"%s\"", tmpbuf);
748a9306 3072 }
93a17b20
LW
3073 }
3074 }
2f3197b3 3075 }
bbce6d69 3076
3280af22 3077 PL_expect = XOPERATOR;
69d2bceb 3078 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3079 bool islop = (PL_last_lop == PL_oldoldbufptr);
3080 if (!islop || PL_last_lop_op == OP_GREPSTART)
3081 PL_expect = XOPERATOR;
bbce6d69 3082 else if (strchr("$@\"'`q", *s))
3280af22 3083 PL_expect = XTERM; /* e.g. print $fh "foo" */
834a4ddd 3084 else if (strchr("&*<%", *s) && isIDFIRST_lazy(s+1))
3280af22 3085 PL_expect = XTERM; /* e.g. print $fh &sub */
834a4ddd 3086 else if (isIDFIRST_lazy(s)) {
3280af22 3087 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3088 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
84902520
TB
3089 if (tmp = keyword(tmpbuf, len)) {
3090 /* binary operators exclude handle interpretations */
3091 switch (tmp) {
3092 case -KEY_x:
3093 case -KEY_eq:
3094 case -KEY_ne:
3095 case -KEY_gt:
3096 case -KEY_lt:
3097 case -KEY_ge:
3098 case -KEY_le:
3099 case -KEY_cmp:
3100 break;
3101 default:
3280af22 3102 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3103 break;
3104 }
3105 }
68dc0745 3106 else {
3107 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3108 if (gv && GvCVu(gv))
3280af22 3109 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3110 }
93a17b20 3111 }
bbce6d69 3112 else if (isDIGIT(*s))
3280af22 3113 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3114 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3115 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3116 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3117 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3118 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3119 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3120 }
3280af22 3121 PL_pending_ident = '$';
79072805 3122 TOKEN('$');
378cc40b
LW
3123
3124 case '@':
3280af22 3125 if (PL_expect == XOPERATOR)
bbce6d69 3126 no_op("Array", s);
3280af22
NIS
3127 PL_tokenbuf[0] = '@';
3128 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3129 if (!PL_tokenbuf[1]) {
3130 if (s == PL_bufend)
bbce6d69 3131 yyerror("Final @ should be \\@ or @name");
3132 PREREF('@');
3133 }
3280af22 3134 if (PL_lex_state == LEX_NORMAL)
ff68c719 3135 s = skipspace(s);
3280af22 3136 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3137 if (*s == '{')
3280af22 3138 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3139
3140 /* Warn about @ where they meant $. */
599cee73 3141 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3142 if (*s == '[' || *s == '{') {
3143 char *t = s + 1;
834a4ddd 3144 while (*t && (isALNUM_lazy(t) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3145 t++;
3146 if (*t == '}' || *t == ']') {
3147 t++;
3280af22 3148 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3149 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3150 "Scalar value %.*s better written as $%.*s",
3280af22 3151 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3152 }
93a17b20
LW
3153 }
3154 }
463ee0b2 3155 }
3280af22 3156 PL_pending_ident = '@';
79072805 3157 TERM('@');
378cc40b
LW
3158
3159 case '/': /* may either be division or pattern */
3160 case '?': /* may either be conditional or pattern */
3280af22 3161 if (PL_expect != XOPERATOR) {
c277df42 3162 /* Disable warning on "study /blah/" */
3280af22
NIS
3163 if (PL_oldoldbufptr == PL_last_uni
3164 && (*PL_last_uni != 's' || s - PL_last_uni < 5
834a4ddd 3165 || memNE(PL_last_uni, "study", 5) || isALNUM_lazy(PL_last_uni+5)))
c277df42 3166 check_uni();
8782bef2 3167 s = scan_pat(s,OP_MATCH);
79072805 3168 TERM(sublex_start());
378cc40b
LW
3169 }
3170 tmp = *s++;
a687059c 3171 if (tmp == '/')
79072805 3172 Mop(OP_DIVIDE);
378cc40b
LW
3173 OPERATOR(tmp);
3174
3175 case '.':
51882d45
GS
3176 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3177#ifdef PERL_STRICT_CR
3178 && s[1] == '\n'
3179#else
3180 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3181#endif
3182 && (s == PL_linestart || s[-1] == '\n') )
3183 {
3280af22
NIS
3184 PL_lex_formbrack = 0;
3185 PL_expect = XSTATE;
79072805
LW
3186 goto rightbracket;
3187 }
3280af22 3188 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3189 tmp = *s++;
a687059c
LW
3190 if (*s == tmp) {
3191 s++;
2f3197b3
LW
3192 if (*s == tmp) {
3193 s++;
79072805 3194 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3195 }
3196 else
79072805 3197 yylval.ival = 0;
378cc40b 3198 OPERATOR(DOTDOT);
a687059c 3199 }
3280af22 3200 if (PL_expect != XOPERATOR)
2f3197b3 3201 check_uni();
79072805 3202 Aop(OP_CONCAT);
378cc40b
LW
3203 }
3204 /* FALL THROUGH */
3205 case '0': case '1': case '2': case '3': case '4':
3206 case '5': case '6': case '7': case '8': case '9':
79072805 3207 s = scan_num(s);
3280af22 3208 if (PL_expect == XOPERATOR)
8990e307 3209 no_op("Number",s);
79072805
LW
3210 TERM(THING);
3211
3212 case '\'':
8990e307 3213 s = scan_str(s);
3280af22
NIS
3214 if (PL_expect == XOPERATOR) {
3215 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3216 PL_expect = XTERM;
a0d0e21e
LW
3217 depcom();
3218 return ','; /* grandfather non-comma-format format */
3219 }
463ee0b2 3220 else
8990e307 3221 no_op("String",s);
463ee0b2 3222 }
79072805 3223 if (!s)
85e6fe83 3224 missingterm((char*)0);
79072805
LW
3225 yylval.ival = OP_CONST;
3226 TERM(sublex_start());
3227
3228 case '"':
8990e307 3229 s = scan_str(s);
3280af22
NIS
3230 if (PL_expect == XOPERATOR) {
3231 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3232 PL_expect = XTERM;
a0d0e21e
LW
3233 depcom();
3234 return ','; /* grandfather non-comma-format format */
3235 }
463ee0b2 3236 else
8990e307 3237 no_op("String",s);
463ee0b2 3238 }
79072805 3239 if (!s)
85e6fe83 3240 missingterm((char*)0);
4633a7c4 3241 yylval.ival = OP_CONST;
3280af22 3242 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
a0ed51b3 3243 if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {
4633a7c4
LW
3244 yylval.ival = OP_STRINGIFY;
3245 break;
3246 }
3247 }
79072805
LW
3248 TERM(sublex_start());
3249
3250 case '`':
3251 s = scan_str(s);
3280af22 3252 if (PL_expect == XOPERATOR)
8990e307 3253 no_op("Backticks",s);
79072805 3254 if (!s)
85e6fe83 3255 missingterm((char*)0);
79072805
LW
3256 yylval.ival = OP_BACKTICK;
3257 set_csh();
3258 TERM(sublex_start());
3259
3260 case '\\':
3261 s++;
599cee73 3262 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
cea2e8a9 3263 Perl_warner(aTHX_ WARN_SYNTAX,"Can't use \\%c to mean $%c in expression",
599cee73 3264 *s, *s);
3280af22 3265 if (PL_expect == XOPERATOR)
8990e307 3266 no_op("Backslash",s);
79072805
LW
3267 OPERATOR(REFGEN);
3268
3269 case 'x':
3280af22 3270 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
3271 s++;
3272 Mop(OP_REPEAT);
2f3197b3 3273 }
79072805
LW
3274 goto keylookup;
3275
378cc40b 3276 case '_':
79072805
LW
3277 case 'a': case 'A':
3278 case 'b': case 'B':
3279 case 'c': case 'C':
3280 case 'd': case 'D':
3281 case 'e': case 'E':
3282 case 'f': case 'F':
3283 case 'g': case 'G':
3284 case 'h': case 'H':
3285 case 'i': case 'I':
3286 case 'j': case 'J':
3287 case 'k': case 'K':
3288 case 'l': case 'L':
3289 case 'm': case 'M':
3290 case 'n': case 'N':
3291 case 'o': case 'O':
3292 case 'p': case 'P':
3293 case 'q': case 'Q':
3294 case 'r': case 'R':
3295 case 's': case 'S':
3296 case 't': case 'T':
3297 case 'u': case 'U':
3298 case 'v': case 'V':
3299 case 'w': case 'W':
3300 case 'X':
3301 case 'y': case 'Y':
3302 case 'z': case 'Z':
3303
49dc05e3 3304 keylookup: {
2d8e6c8d 3305 STRLEN n_a;
161b471a
NIS
3306 gv = Nullgv;
3307 gvp = 0;
49dc05e3 3308
3280af22
NIS
3309 PL_bufptr = s;
3310 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 3311
3312 /* Some keywords can be followed by any delimiter, including ':' */
3280af22
NIS
3313 tmp = (len == 1 && strchr("msyq", PL_tokenbuf[0]) ||
3314 len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3315 (PL_tokenbuf[0] == 'q' &&
3316 strchr("qwxr", PL_tokenbuf[1]))));
8ebc5c01 3317
3318 /* x::* is just a word, unless x is "CORE" */
3280af22 3319 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
3320 goto just_a_word;
3321
3643fb5f 3322 d = s;
3280af22 3323 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
3324 d++; /* no comments skipped here, or s### is misparsed */
3325
3326 /* Is this a label? */
3280af22
NIS
3327 if (!tmp && PL_expect == XSTATE
3328 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8ebc5c01 3329 s = d + 1;
3280af22 3330 yylval.pval = savepv(PL_tokenbuf);
8ebc5c01 3331 CLINE;
3332 TOKEN(LABEL);
3643fb5f
CS
3333 }
3334
3335 /* Check for keywords */
3280af22 3336 tmp = keyword(PL_tokenbuf, len);
748a9306
LW
3337
3338 /* Is this a word before a => operator? */
748a9306
LW
3339 if (strnEQ(d,"=>",2)) {
3340 CLINE;
3280af22 3341 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
748a9306
LW
3342 yylval.opval->op_private = OPpCONST_BARE;
3343 TERM(WORD);
3344 }
3345
a0d0e21e 3346 if (tmp < 0) { /* second-class keyword? */
56f7f34b
CS
3347 GV *ogv = Nullgv; /* override (winner) */
3348 GV *hgv = Nullgv; /* hidden (loser) */
3280af22 3349 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 3350 CV *cv;
3280af22 3351 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
56f7f34b
CS
3352 (cv = GvCVu(gv)))
3353 {
3354 if (GvIMPORTED_CV(gv))
3355 ogv = gv;
3356 else if (! CvMETHOD(cv))
3357 hgv = gv;
3358 }
3359 if (!ogv &&
3280af22
NIS
3360 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
3361 (gv = *gvp) != (GV*)&PL_sv_undef &&
56f7f34b
CS
3362 GvCVu(gv) && GvIMPORTED_CV(gv))
3363 {
3364 ogv = gv;
3365 }
3366 }
3367 if (ogv) {
3368 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
3369 }
3370 else if (gv && !gvp
3371 && -tmp==KEY_lock /* XXX generalizable kludge */
3280af22 3372 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
6e7b2336
GS
3373 {
3374 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 3375 }
56f7f34b
CS
3376 else { /* no override */
3377 tmp = -tmp;
3378 gv = Nullgv;
3379 gvp = 0;
4944e2f7
GS
3380 if (ckWARN(WARN_AMBIGUOUS) && hgv
3381 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
cea2e8a9 3382 Perl_warner(aTHX_ WARN_AMBIGUOUS,
599cee73 3383 "Ambiguous call resolved as CORE::%s(), %s",
2f3ca594 3384 GvENAME(hgv), "qualify as such or use &");
49dc05e3 3385 }
a0d0e21e
LW
3386 }
3387
3388 reserved_word:
3389 switch (tmp) {
79072805
LW
3390
3391 default: /* not a keyword */
93a17b20 3392 just_a_word: {
96e4d5b1 3393 SV *sv;
3280af22 3394 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
8990e307
LW
3395
3396 /* Get the rest if it looks like a package qualifier */
3397
a0d0e21e 3398 if (*s == '\'' || *s == ':' && s[1] == ':') {
c3e0f903 3399 STRLEN morelen;
3280af22 3400 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
3401 TRUE, &morelen);
3402 if (!morelen)
cea2e8a9 3403 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 3404 *s == '\'' ? "'" : "::");
c3e0f903 3405 len += morelen;
a0d0e21e 3406 }
8990e307 3407
3280af22
NIS
3408 if (PL_expect == XOPERATOR) {
3409 if (PL_bufptr == PL_linestart) {
3410 PL_curcop->cop_line--;
cea2e8a9 3411 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
3280af22 3412 PL_curcop->cop_line++;
463ee0b2
LW
3413 }
3414 else
54310121 3415 no_op("Bareword",s);
463ee0b2 3416 }
8990e307 3417
c3e0f903
GS
3418 /* Look for a subroutine with this name in current package,
3419 unless name is "Foo::", in which case Foo is a bearword
3420 (and a package name). */
3421
3422 if (len > 2 &&
3280af22 3423 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 3424 {
599cee73 3425 if (ckWARN(WARN_UNSAFE) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
cea2e8a9 3426 Perl_warner(aTHX_ WARN_UNSAFE,
599cee73 3427 "Bareword \"%s\" refers to nonexistent package",
3280af22 3428 PL_tokenbuf);
c3e0f903 3429 len -= 2;
3280af22 3430 PL_tokenbuf[len] = '\0';
c3e0f903
GS
3431 gv = Nullgv;
3432 gvp = 0;
3433 }
3434 else {
3435 len = 0;
3436 if (!gv)
3280af22 3437 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV);
c3e0f903
GS
3438 }
3439
3440 /* if we saw a global override before, get the right name */
8990e307 3441
49dc05e3 3442 if (gvp) {
79cb57f6 3443 sv = newSVpvn("CORE::GLOBAL::",14);
3280af22 3444 sv_catpv(sv,PL_tokenbuf);
49dc05e3
GS
3445 }
3446 else
3280af22 3447 sv = newSVpv(PL_tokenbuf,0);
8990e307 3448
a0d0e21e
LW
3449 /* Presume this is going to be a bareword of some sort. */
3450
3451 CLINE;
49dc05e3 3452 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
a0d0e21e
LW
3453 yylval.opval->op_private = OPpCONST_BARE;
3454
c3e0f903
GS
3455 /* And if "Foo::", then that's what it certainly is. */
3456
3457 if (len)
3458 goto safe_bareword;
3459
8990e307
LW
3460 /* See if it's the indirect object for a list operator. */
3461
3280af22
NIS
3462 if (PL_oldoldbufptr &&
3463 PL_oldoldbufptr < PL_bufptr &&
3464 (PL_oldoldbufptr == PL_last_lop || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 3465 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
3466 (PL_expect == XREF ||
3467 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 3468 {
748a9306
LW
3469 bool immediate_paren = *s == '(';
3470
a0d0e21e
LW
3471 /* (Now we can afford to cross potential line boundary.) */
3472 s = skipspace(s);
3473
3474 /* Two barewords in a row may indicate method call. */
3475
834a4ddd 3476 if ((isIDFIRST_lazy(s) || *s == '$') && (tmp=intuit_method(s,gv)))
a0d0e21e
LW
3477 return tmp;
3478
3479 /* If not a declared subroutine, it's an indirect object. */
3480 /* (But it's an indir obj regardless for sort.) */
3481
3280af22 3482 if ((PL_last_lop_op == OP_SORT ||
a9ef352a
GS
3483 (!immediate_paren && (!gv || !GvCVu(gv)))) &&
3484 (PL_last_lop_op != OP_MAPSTART &&
3485 PL_last_lop_op != OP_GREPSTART))
3486 {
3280af22 3487 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 3488 goto bareword;
93a17b20
LW
3489 }
3490 }
8990e307
LW
3491
3492 /* If followed by a paren, it's certainly a subroutine. */
3493
3280af22 3494 PL_expect = XOPERATOR;
8990e307 3495 s = skipspace(s);
93a17b20 3496 if (*s == '(') {
79072805 3497 CLINE;
96e4d5b1 3498 if (gv && GvCVu(gv)) {
3499 for (d = s + 1; *d == ' ' || *d == '\t'; d++) ;
7a52d87a 3500 if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) {
96e4d5b1 3501 s = d + 1;
3502 goto its_constant;
3503 }
3504 }
3280af22
NIS
3505 PL_nextval[PL_nexttoke].opval = yylval.opval;
3506 PL_expect = XOPERATOR;
93a17b20 3507 force_next(WORD);
c07a80fd 3508 yylval.ival = 0;
463ee0b2 3509 TOKEN('&');
79072805 3510 }
93a17b20 3511
a0d0e21e 3512 /* If followed by var or block, call it a method (unless sub) */
8990e307 3513
8ebc5c01 3514 if ((*s == '$' || *s == '{') && (!gv || !GvCVu(gv))) {
3280af22
NIS
3515 PL_last_lop = PL_oldbufptr;
3516 PL_last_lop_op = OP_METHOD;
93a17b20 3517 PREBLOCK(METHOD);
463ee0b2
LW
3518 }
3519
8990e307
LW
3520 /* If followed by a bareword, see if it looks like indir obj. */
3521
834a4ddd 3522 if ((isIDFIRST_lazy(s) || *s == '$') && (tmp = intuit_method(s,gv)))
a0d0e21e 3523 return tmp;
93a17b20 3524
8990e307
LW
3525 /* Not a method, so call it a subroutine (if defined) */
3526
8ebc5c01 3527 if (gv && GvCVu(gv)) {
46fc3d4c 3528 CV* cv;
0453d815
PM
3529 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
3530 Perl_warner(aTHX_ WARN_AMBIGUOUS,
3531 "Ambiguous use of -%s resolved as -&%s()",
3280af22 3532 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 3533 /* Check for a constant sub */
46fc3d4c 3534 cv = GvCV(gv);
96e4d5b1 3535 if ((sv = cv_const_sv(cv))) {
3536 its_constant:
3537 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
3538 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
3539 yylval.opval->op_private = 0;
3540 TOKEN(WORD);
89bfa8cd 3541 }
3542
a5f75d66
AD
3543 /* Resolve to GV now. */
3544 op_free(yylval.opval);
3545 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
9675f7ac 3546 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 3547 PL_last_lop = PL_oldbufptr;
bf848113 3548 PL_last_lop_op = OP_ENTERSUB;
4633a7c4
LW
3549 /* Is there a prototype? */
3550 if (SvPOK(cv)) {
3551 STRLEN len;
7a52d87a 3552 char *proto = SvPV((SV*)cv, len);
4633a7c4
LW
3553 if (!len)
3554 TERM(FUNC0SUB);
7a52d87a 3555 if (strEQ(proto, "$"))
4633a7c4 3556 OPERATOR(UNIOPSUB);
7a52d87a 3557 if (*proto == '&' && *s == '{') {
3280af22 3558 sv_setpv(PL_subname,"__ANON__");
4633a7c4
LW
3559 PREBLOCK(LSTOPSUB);
3560 }
a9ef352a 3561 }
3280af22
NIS
3562 PL_nextval[PL_nexttoke].opval = yylval.opval;
3563 PL_expect = XTERM;
8990e307
LW
3564 force_next(WORD);
3565 TOKEN(NOAMP);
3566 }
748a9306 3567
8990e307
LW
3568 /* Call it a bare word */
3569
5603f27d
GS
3570 if (PL_hints & HINT_STRICT_SUBS)
3571 yylval.opval->op_private |= OPpCONST_STRICT;
3572 else {
3573 bareword:
3574 if (ckWARN(WARN_RESERVED)) {
3575 if (lastchar != '-') {
3576 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
3577 if (!*d)
cea2e8a9 3578 Perl_warner(aTHX_ WARN_RESERVED, PL_warn_reserved,
5603f27d
GS
3579 PL_tokenbuf);
3580 }
748a9306
LW
3581 }