This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
produce better error message when \N{...} is used without
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
3818b22b 3 * Copyright (c) 1991-2000, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
51371543 31
9059aa12
LW
32#define XFAKEBRACK 128
33#define XENUMMASK 127
34
7e2040f0 35/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
36#define UTF (PL_hints & HINT_UTF8)
37
2b92dfce
GS
38/* In variables name $^X, these are the legal values for X.
39 * 1999-02-27 mjd-perl-patch@plover.com */
40#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
41
ffb4593c
NT
42/* LEX_* are values for PL_lex_state, the state of the lexer.
43 * They are arranged oddly so that the guard on the switch statement
79072805
LW
44 * can get by with a single comparison (if the compiler is smart enough).
45 */
46
fb73857a
PP
47/* #define LEX_NOTPARSING 11 is done in perl.h. */
48
55497cff
PP
49#define LEX_NORMAL 10
50#define LEX_INTERPNORMAL 9
51#define LEX_INTERPCASEMOD 8
52#define LEX_INTERPPUSH 7
53#define LEX_INTERPSTART 6
54#define LEX_INTERPEND 5
55#define LEX_INTERPENDMAYBE 4
56#define LEX_INTERPCONCAT 3
57#define LEX_INTERPCONST 2
58#define LEX_FORMLINE 1
59#define LEX_KNOWNEXT 0
79072805 60
a790bc05
PP
61/* XXX If this causes problems, set i_unistd=undef in the hint file. */
62#ifdef I_UNISTD
63# include <unistd.h> /* Needed for execv() */
64#endif
65
66
79072805
LW
67#ifdef ff_next
68#undef ff_next
d48672a2
LW
69#endif
70
a1a0e61e
TD
71#ifdef USE_PURE_BISON
72YYSTYPE* yylval_pointer = NULL;
73int* yychar_pointer = NULL;
22c35a8c
GS
74# undef yylval
75# undef yychar
e4bfbdd4
JH
76# define yylval (*yylval_pointer)
77# define yychar (*yychar_pointer)
78# define PERL_YYLEX_PARAM yylval_pointer,yychar_pointer
cea2e8a9
GS
79# undef yylex
80# define yylex() Perl_yylex(aTHX_ yylval_pointer, yychar_pointer)
a1a0e61e
TD
81#endif
82
79072805 83#include "keywords.h"
fe14fcc3 84
ffb4593c
NT
85/* CLINE is a macro that ensures PL_copline has a sane value */
86
ae986130
LW
87#ifdef CLINE
88#undef CLINE
89#endif
57843af0 90#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 91
ffb4593c
NT
92/*
93 * Convenience functions to return different tokens and prime the
9cbb5ea2 94 * lexer for the next token. They all take an argument.
ffb4593c
NT
95 *
96 * TOKEN : generic token (used for '(', DOLSHARP, etc)
97 * OPERATOR : generic operator
98 * AOPERATOR : assignment operator
99 * PREBLOCK : beginning the block after an if, while, foreach, ...
100 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
101 * PREREF : *EXPR where EXPR is not a simple identifier
102 * TERM : expression term
103 * LOOPX : loop exiting command (goto, last, dump, etc)
104 * FTST : file test operator
105 * FUN0 : zero-argument function
2d2e263d 106 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
107 * BOop : bitwise or or xor
108 * BAop : bitwise and
109 * SHop : shift operator
110 * PWop : power operator
9cbb5ea2 111 * PMop : pattern-matching operator
ffb4593c
NT
112 * Aop : addition-level operator
113 * Mop : multiplication-level operator
114 * Eop : equality-testing operator
115 * Rop : relational operator <= != gt
116 *
117 * Also see LOP and lop() below.
118 */
119
3280af22
NIS
120#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
121#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
122#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
123#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
124#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
125#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
126#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
127#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
128#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
129#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
130#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
131#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
132#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
133#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
134#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
135#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
136#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
137#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
138#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
139#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 140
a687059c
LW
141/* This bit of chicanery makes a unary function followed by
142 * a parenthesis into a function with one argument, highest precedence.
143 */
2f3197b3 144#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
145 PL_expect = XTERM, \
146 PL_bufptr = s, \
147 PL_last_uni = PL_oldbufptr, \
148 PL_last_lop_op = f, \
a687059c
LW
149 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
150
79072805 151#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
152 PL_bufptr = s, \
153 PL_last_uni = PL_oldbufptr, \
79072805
LW
154 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
155
9f68db38 156/* grandfather return to old style */
3280af22 157#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 158
ffb4593c
NT
159/*
160 * S_ao
161 *
162 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
163 * into an OP_ANDASSIGN or OP_ORASSIGN
164 */
165
76e3520e 166STATIC int
cea2e8a9 167S_ao(pTHX_ int toketype)
a0d0e21e 168{
3280af22
NIS
169 if (*PL_bufptr == '=') {
170 PL_bufptr++;
a0d0e21e
LW
171 if (toketype == ANDAND)
172 yylval.ival = OP_ANDASSIGN;
173 else if (toketype == OROR)
174 yylval.ival = OP_ORASSIGN;
175 toketype = ASSIGNOP;
176 }
177 return toketype;
178}
179
ffb4593c
NT
180/*
181 * S_no_op
182 * When Perl expects an operator and finds something else, no_op
183 * prints the warning. It always prints "<something> found where
184 * operator expected. It prints "Missing semicolon on previous line?"
185 * if the surprise occurs at the start of the line. "do you need to
186 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
187 * where the compiler doesn't know if foo is a method call or a function.
188 * It prints "Missing operator before end of line" if there's nothing
189 * after the missing operator, or "... before <...>" if there is something
190 * after the missing operator.
191 */
192
76e3520e 193STATIC void
cea2e8a9 194S_no_op(pTHX_ char *what, char *s)
463ee0b2 195{
3280af22
NIS
196 char *oldbp = PL_bufptr;
197 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 198
1189a94a
GS
199 if (!s)
200 s = oldbp;
201 else {
202 assert(s >= oldbp);
203 PL_bufptr = s;
204 }
cea2e8a9 205 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 206 if (is_first)
cea2e8a9 207 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 208 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 209 char *t;
7e2040f0 210 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 211 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 212 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 213 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306
LW
214 }
215 else
cea2e8a9 216 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
3280af22 217 PL_bufptr = oldbp;
8990e307
LW
218}
219
ffb4593c
NT
220/*
221 * S_missingterm
222 * Complain about missing quote/regexp/heredoc terminator.
223 * If it's called with (char *)NULL then it cauterizes the line buffer.
224 * If we're in a delimited string and the delimiter is a control
225 * character, it's reformatted into a two-char sequence like ^C.
226 * This is fatal.
227 */
228
76e3520e 229STATIC void
cea2e8a9 230S_missingterm(pTHX_ char *s)
8990e307
LW
231{
232 char tmpbuf[3];
233 char q;
234 if (s) {
235 char *nl = strrchr(s,'\n');
d2719217 236 if (nl)
8990e307
LW
237 *nl = '\0';
238 }
9d116dd7
JH
239 else if (
240#ifdef EBCDIC
241 iscntrl(PL_multi_close)
242#else
243 PL_multi_close < 32 || PL_multi_close == 127
244#endif
245 ) {
8990e307 246 *tmpbuf = '^';
3280af22 247 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
248 s = "\\n";
249 tmpbuf[2] = '\0';
250 s = tmpbuf;
251 }
252 else {
3280af22 253 *tmpbuf = PL_multi_close;
8990e307
LW
254 tmpbuf[1] = '\0';
255 s = tmpbuf;
256 }
257 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 258 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 259}
79072805 260
ffb4593c
NT
261/*
262 * Perl_deprecate
ffb4593c
NT
263 */
264
79072805 265void
864dbfa3 266Perl_deprecate(pTHX_ char *s)
a0d0e21e 267{
d008e5eb 268 dTHR;
599cee73 269 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 270 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
271}
272
ffb4593c
NT
273/*
274 * depcom
9cbb5ea2 275 * Deprecate a comma-less variable list.
ffb4593c
NT
276 */
277
76e3520e 278STATIC void
cea2e8a9 279S_depcom(pTHX)
a0d0e21e
LW
280{
281 deprecate("comma-less variable list");
282}
283
ffb4593c 284/*
9cbb5ea2
GS
285 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
286 * utf16-to-utf8-reversed.
ffb4593c
NT
287 */
288
c39cd008
GS
289#ifdef PERL_CR_FILTER
290static void
291strip_return(SV *sv)
292{
293 register char *s = SvPVX(sv);
294 register char *e = s + SvCUR(sv);
295 /* outer loop optimized to do nothing if there are no CR-LFs */
296 while (s < e) {
297 if (*s++ == '\r' && *s == '\n') {
298 /* hit a CR-LF, need to copy the rest */
299 register char *d = s - 1;
300 *d++ = *s++;
301 while (s < e) {
302 if (*s == '\r' && s[1] == '\n')
303 s++;
304 *d++ = *s++;
305 }
306 SvCUR(sv) -= s - d;
307 return;
308 }
309 }
310}
a868473f 311
76e3520e 312STATIC I32
c39cd008 313S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 314{
c39cd008
GS
315 I32 count = FILTER_READ(idx+1, sv, maxlen);
316 if (count > 0 && !maxlen)
317 strip_return(sv);
318 return count;
a868473f
NIS
319}
320#endif
321
155aba94 322#if 0
a0ed51b3 323STATIC I32
cea2e8a9 324S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
325{
326 I32 count = FILTER_READ(idx+1, sv, maxlen);
327 if (count) {
dfe13c55
GS
328 U8* tmps;
329 U8* tend;
330 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 331 tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 332 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
333 }
334 return count;
335}
336
337STATIC I32
cea2e8a9 338S_utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
339{
340 I32 count = FILTER_READ(idx+1, sv, maxlen);
341 if (count) {
dfe13c55
GS
342 U8* tmps;
343 U8* tend;
344 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 345 tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 346 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
347 }
348 return count;
349}
155aba94 350#endif
a868473f 351
ffb4593c
NT
352/*
353 * Perl_lex_start
9cbb5ea2
GS
354 * Initialize variables. Uses the Perl save_stack to save its state (for
355 * recursive calls to the parser).
ffb4593c
NT
356 */
357
a0d0e21e 358void
864dbfa3 359Perl_lex_start(pTHX_ SV *line)
79072805 360{
0f15f207 361 dTHR;
8990e307
LW
362 char *s;
363 STRLEN len;
364
3280af22
NIS
365 SAVEI32(PL_lex_dojoin);
366 SAVEI32(PL_lex_brackets);
3280af22
NIS
367 SAVEI32(PL_lex_casemods);
368 SAVEI32(PL_lex_starts);
369 SAVEI32(PL_lex_state);
7766f137 370 SAVEVPTR(PL_lex_inpat);
3280af22 371 SAVEI32(PL_lex_inwhat);
18b09519
GS
372 if (PL_lex_state == LEX_KNOWNEXT) {
373 I32 toke = PL_nexttoke;
374 while (--toke >= 0) {
375 SAVEI32(PL_nexttype[toke]);
376 SAVEVPTR(PL_nextval[toke]);
377 }
378 SAVEI32(PL_nexttoke);
379 PL_nexttoke = 0;
380 }
57843af0 381 SAVECOPLINE(PL_curcop);
3280af22
NIS
382 SAVEPPTR(PL_bufptr);
383 SAVEPPTR(PL_bufend);
384 SAVEPPTR(PL_oldbufptr);
385 SAVEPPTR(PL_oldoldbufptr);
386 SAVEPPTR(PL_linestart);
387 SAVESPTR(PL_linestr);
388 SAVEPPTR(PL_lex_brackstack);
389 SAVEPPTR(PL_lex_casestack);
c76ac1ee 390 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
391 SAVESPTR(PL_lex_stuff);
392 SAVEI32(PL_lex_defer);
09bef843 393 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 394 SAVESPTR(PL_lex_repl);
bebdddfc
GS
395 SAVEINT(PL_expect);
396 SAVEINT(PL_lex_expect);
3280af22
NIS
397
398 PL_lex_state = LEX_NORMAL;
399 PL_lex_defer = 0;
400 PL_expect = XSTATE;
401 PL_lex_brackets = 0;
3280af22
NIS
402 New(899, PL_lex_brackstack, 120, char);
403 New(899, PL_lex_casestack, 12, char);
404 SAVEFREEPV(PL_lex_brackstack);
405 SAVEFREEPV(PL_lex_casestack);
406 PL_lex_casemods = 0;
407 *PL_lex_casestack = '\0';
408 PL_lex_dojoin = 0;
409 PL_lex_starts = 0;
410 PL_lex_stuff = Nullsv;
411 PL_lex_repl = Nullsv;
412 PL_lex_inpat = 0;
413 PL_lex_inwhat = 0;
09bef843 414 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
415 PL_linestr = line;
416 if (SvREADONLY(PL_linestr))
417 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
418 s = SvPV(PL_linestr, len);
8990e307 419 if (len && s[len-1] != ';') {
3280af22
NIS
420 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
421 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
422 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 423 }
3280af22
NIS
424 SvTEMP_off(PL_linestr);
425 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
426 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
427 SvREFCNT_dec(PL_rs);
79cb57f6 428 PL_rs = newSVpvn("\n", 1);
3280af22 429 PL_rsfp = 0;
79072805 430}
a687059c 431
ffb4593c
NT
432/*
433 * Perl_lex_end
9cbb5ea2
GS
434 * Finalizer for lexing operations. Must be called when the parser is
435 * done with the lexer.
ffb4593c
NT
436 */
437
463ee0b2 438void
864dbfa3 439Perl_lex_end(pTHX)
463ee0b2 440{
3280af22 441 PL_doextract = FALSE;
463ee0b2
LW
442}
443
ffb4593c
NT
444/*
445 * S_incline
446 * This subroutine has nothing to do with tilting, whether at windmills
447 * or pinball tables. Its name is short for "increment line". It
57843af0 448 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 449 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
450 * # line 500 "foo.pm"
451 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
452 */
453
76e3520e 454STATIC void
cea2e8a9 455S_incline(pTHX_ char *s)
463ee0b2 456{
0f15f207 457 dTHR;
463ee0b2
LW
458 char *t;
459 char *n;
73659bf1 460 char *e;
463ee0b2 461 char ch;
463ee0b2 462
57843af0 463 CopLINE_inc(PL_curcop);
463ee0b2
LW
464 if (*s++ != '#')
465 return;
466 while (*s == ' ' || *s == '\t') s++;
73659bf1
GS
467 if (strnEQ(s, "line", 4))
468 s += 4;
469 else
470 return;
471 if (*s == ' ' || *s == '\t')
472 s++;
473 else
474 return;
475 while (*s == ' ' || *s == '\t') s++;
463ee0b2
LW
476 if (!isDIGIT(*s))
477 return;
478 n = s;
479 while (isDIGIT(*s))
480 s++;
481 while (*s == ' ' || *s == '\t')
482 s++;
73659bf1 483 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 484 s++;
73659bf1
GS
485 e = t + 1;
486 }
463ee0b2 487 else {
463ee0b2 488 for (t = s; !isSPACE(*t); t++) ;
73659bf1 489 e = t;
463ee0b2 490 }
73659bf1
GS
491 while (*e == ' ' || *e == '\t' || *e == '\r' || *e == '\f')
492 e++;
493 if (*e != '\n' && *e != '\0')
494 return; /* false alarm */
495
463ee0b2
LW
496 ch = *t;
497 *t = '\0';
498 if (t - s > 0)
57843af0 499 CopFILE_set(PL_curcop, s);
463ee0b2 500 *t = ch;
57843af0 501 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
502}
503
ffb4593c
NT
504/*
505 * S_skipspace
506 * Called to gobble the appropriate amount and type of whitespace.
507 * Skips comments as well.
508 */
509
76e3520e 510STATIC char *
cea2e8a9 511S_skipspace(pTHX_ register char *s)
a687059c 512{
11343788 513 dTHR;
3280af22
NIS
514 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
515 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
463ee0b2
LW
516 s++;
517 return s;
518 }
519 for (;;) {
fd049845 520 STRLEN prevlen;
09bef843
SB
521 SSize_t oldprevlen, oldoldprevlen;
522 SSize_t oldloplen, oldunilen;
60e6418e
GS
523 while (s < PL_bufend && isSPACE(*s)) {
524 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
525 incline(s);
526 }
ffb4593c
NT
527
528 /* comment */
3280af22
NIS
529 if (s < PL_bufend && *s == '#') {
530 while (s < PL_bufend && *s != '\n')
463ee0b2 531 s++;
60e6418e 532 if (s < PL_bufend) {
463ee0b2 533 s++;
60e6418e
GS
534 if (PL_in_eval && !PL_rsfp) {
535 incline(s);
536 continue;
537 }
538 }
463ee0b2 539 }
ffb4593c
NT
540
541 /* only continue to recharge the buffer if we're at the end
542 * of the buffer, we're not reading from a source filter, and
543 * we're in normal lexing mode
544 */
09bef843
SB
545 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
546 PL_lex_state == LEX_FORMLINE)
463ee0b2 547 return s;
ffb4593c
NT
548
549 /* try to recharge the buffer */
9cbb5ea2
GS
550 if ((s = filter_gets(PL_linestr, PL_rsfp,
551 (prevlen = SvCUR(PL_linestr)))) == Nullch)
552 {
553 /* end of file. Add on the -p or -n magic */
3280af22
NIS
554 if (PL_minus_n || PL_minus_p) {
555 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
556 ";}continue{print or die qq(-p destination: $!\\n)" :
557 "");
3280af22
NIS
558 sv_catpv(PL_linestr,";}");
559 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
560 }
561 else
3280af22 562 sv_setpv(PL_linestr,";");
ffb4593c
NT
563
564 /* reset variables for next time we lex */
9cbb5ea2
GS
565 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
566 = SvPVX(PL_linestr);
3280af22 567 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
568
569 /* Close the filehandle. Could be from -P preprocessor,
570 * STDIN, or a regular file. If we were reading code from
571 * STDIN (because the commandline held no -e or filename)
572 * then we don't close it, we reset it so the code can
573 * read from STDIN too.
574 */
575
3280af22
NIS
576 if (PL_preprocess && !PL_in_eval)
577 (void)PerlProc_pclose(PL_rsfp);
578 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
579 PerlIO_clearerr(PL_rsfp);
8990e307 580 else
3280af22
NIS
581 (void)PerlIO_close(PL_rsfp);
582 PL_rsfp = Nullfp;
463ee0b2
LW
583 return s;
584 }
ffb4593c
NT
585
586 /* not at end of file, so we only read another line */
09bef843
SB
587 /* make corresponding updates to old pointers, for yyerror() */
588 oldprevlen = PL_oldbufptr - PL_bufend;
589 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
590 if (PL_last_uni)
591 oldunilen = PL_last_uni - PL_bufend;
592 if (PL_last_lop)
593 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
594 PL_linestart = PL_bufptr = s + prevlen;
595 PL_bufend = s + SvCUR(PL_linestr);
596 s = PL_bufptr;
09bef843
SB
597 PL_oldbufptr = s + oldprevlen;
598 PL_oldoldbufptr = s + oldoldprevlen;
599 if (PL_last_uni)
600 PL_last_uni = s + oldunilen;
601 if (PL_last_lop)
602 PL_last_lop = s + oldloplen;
a0d0e21e 603 incline(s);
ffb4593c
NT
604
605 /* debugger active and we're not compiling the debugger code,
606 * so store the line into the debugger's array of lines
607 */
3280af22 608 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
609 SV *sv = NEWSV(85,0);
610
611 sv_upgrade(sv, SVt_PVMG);
3280af22 612 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 613 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 614 }
463ee0b2 615 }
a687059c 616}
378cc40b 617
ffb4593c
NT
618/*
619 * S_check_uni
620 * Check the unary operators to ensure there's no ambiguity in how they're
621 * used. An ambiguous piece of code would be:
622 * rand + 5
623 * This doesn't mean rand() + 5. Because rand() is a unary operator,
624 * the +5 is its argument.
625 */
626
76e3520e 627STATIC void
cea2e8a9 628S_check_uni(pTHX)
ba106d47 629{
2f3197b3 630 char *s;
a0d0e21e 631 char *t;
0453d815 632 dTHR;
2f3197b3 633
3280af22 634 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 635 return;
3280af22
NIS
636 while (isSPACE(*PL_last_uni))
637 PL_last_uni++;
7e2040f0 638 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 639 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 640 return;
0453d815 641 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 642 char ch = *s;
0453d815
PM
643 *s = '\0';
644 Perl_warner(aTHX_ WARN_AMBIGUOUS,
645 "Warning: Use of \"%s\" without parens is ambiguous",
646 PL_last_uni);
647 *s = ch;
648 }
2f3197b3
LW
649}
650
ffb4593c
NT
651/* workaround to replace the UNI() macro with a function. Only the
652 * hints/uts.sh file mentions this. Other comments elsewhere in the
653 * source indicate Microport Unix might need it too.
654 */
655
ffed7fef
LW
656#ifdef CRIPPLED_CC
657
658#undef UNI
ffed7fef 659#define UNI(f) return uni(f,s)
ffed7fef 660
76e3520e 661STATIC int
cea2e8a9 662S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
663{
664 yylval.ival = f;
3280af22
NIS
665 PL_expect = XTERM;
666 PL_bufptr = s;
8f872242
NIS
667 PL_last_uni = PL_oldbufptr;
668 PL_last_lop_op = f;
ffed7fef
LW
669 if (*s == '(')
670 return FUNC1;
671 s = skipspace(s);
672 if (*s == '(')
673 return FUNC1;
674 else
675 return UNIOP;
676}
677
a0d0e21e
LW
678#endif /* CRIPPLED_CC */
679
ffb4593c
NT
680/*
681 * LOP : macro to build a list operator. Its behaviour has been replaced
682 * with a subroutine, S_lop() for which LOP is just another name.
683 */
684
a0d0e21e
LW
685#define LOP(f,x) return lop(f,x,s)
686
ffb4593c
NT
687/*
688 * S_lop
689 * Build a list operator (or something that might be one). The rules:
690 * - if we have a next token, then it's a list operator [why?]
691 * - if the next thing is an opening paren, then it's a function
692 * - else it's a list operator
693 */
694
76e3520e 695STATIC I32
a0be28da 696S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 697{
0f15f207 698 dTHR;
79072805 699 yylval.ival = f;
35c8bce7 700 CLINE;
3280af22
NIS
701 PL_expect = x;
702 PL_bufptr = s;
703 PL_last_lop = PL_oldbufptr;
704 PL_last_lop_op = f;
705 if (PL_nexttoke)
a0d0e21e 706 return LSTOP;
79072805
LW
707 if (*s == '(')
708 return FUNC;
709 s = skipspace(s);
710 if (*s == '(')
711 return FUNC;
712 else
713 return LSTOP;
714}
715
ffb4593c
NT
716/*
717 * S_force_next
9cbb5ea2 718 * When the lexer realizes it knows the next token (for instance,
ffb4593c 719 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
720 * to know what token to return the next time the lexer is called. Caller
721 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
722 * handles the token correctly.
ffb4593c
NT
723 */
724
76e3520e 725STATIC void
cea2e8a9 726S_force_next(pTHX_ I32 type)
79072805 727{
3280af22
NIS
728 PL_nexttype[PL_nexttoke] = type;
729 PL_nexttoke++;
730 if (PL_lex_state != LEX_KNOWNEXT) {
731 PL_lex_defer = PL_lex_state;
732 PL_lex_expect = PL_expect;
733 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
734 }
735}
736
ffb4593c
NT
737/*
738 * S_force_word
739 * When the lexer knows the next thing is a word (for instance, it has
740 * just seen -> and it knows that the next char is a word char, then
741 * it calls S_force_word to stick the next word into the PL_next lookahead.
742 *
743 * Arguments:
b1b65b59 744 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
745 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
746 * int check_keyword : if true, Perl checks to make sure the word isn't
747 * a keyword (do this if the word is a label, e.g. goto FOO)
748 * int allow_pack : if true, : characters will also be allowed (require,
749 * use, etc. do this)
9cbb5ea2 750 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
751 */
752
76e3520e 753STATIC char *
cea2e8a9 754S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 755{
463ee0b2
LW
756 register char *s;
757 STRLEN len;
758
759 start = skipspace(start);
760 s = start;
7e2040f0 761 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 762 (allow_pack && *s == ':') ||
15f0808c 763 (allow_initial_tick && *s == '\'') )
a0d0e21e 764 {
3280af22
NIS
765 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
766 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
767 return start;
768 if (token == METHOD) {
769 s = skipspace(s);
770 if (*s == '(')
3280af22 771 PL_expect = XTERM;
463ee0b2 772 else {
3280af22 773 PL_expect = XOPERATOR;
463ee0b2 774 }
79072805 775 }
3280af22
NIS
776 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
777 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
778 force_next(token);
779 }
780 return s;
781}
782
ffb4593c
NT
783/*
784 * S_force_ident
9cbb5ea2 785 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
786 * text only contains the "foo" portion. The first argument is a pointer
787 * to the "foo", and the second argument is the type symbol to prefix.
788 * Forces the next token to be a "WORD".
9cbb5ea2 789 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
790 */
791
76e3520e 792STATIC void
cea2e8a9 793S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
794{
795 if (s && *s) {
11343788 796 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 797 PL_nextval[PL_nexttoke].opval = o;
79072805 798 force_next(WORD);
748a9306 799 if (kind) {
e858de61 800 dTHR; /* just for in_eval */
11343788 801 o->op_private = OPpCONST_ENTERED;
55497cff
PP
802 /* XXX see note in pp_entereval() for why we forgo typo
803 warnings if the symbol must be introduced in an eval.
804 GSAR 96-10-12 */
3280af22 805 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
806 kind == '$' ? SVt_PV :
807 kind == '@' ? SVt_PVAV :
808 kind == '%' ? SVt_PVHV :
809 SVt_PVGV
810 );
748a9306 811 }
79072805
LW
812 }
813}
814
1571675a
GS
815NV
816Perl_str_to_version(pTHX_ SV *sv)
817{
818 NV retval = 0.0;
819 NV nshift = 1.0;
820 STRLEN len;
821 char *start = SvPVx(sv,len);
822 bool utf = SvUTF8(sv);
823 char *end = start + len;
824 while (start < end) {
825 I32 skip;
826 UV n;
827 if (utf)
828 n = utf8_to_uv((U8*)start, &skip);
829 else {
830 n = *(U8*)start;
831 skip = 1;
832 }
833 retval += ((NV)n)/nshift;
834 start += skip;
835 nshift *= 1000;
836 }
837 return retval;
838}
839
ffb4593c
NT
840/*
841 * S_force_version
842 * Forces the next token to be a version number.
843 */
844
76e3520e 845STATIC char *
cea2e8a9 846S_force_version(pTHX_ char *s)
89bfa8cd
PP
847{
848 OP *version = Nullop;
44dcb63b 849 char *d;
89bfa8cd
PP
850
851 s = skipspace(s);
852
44dcb63b 853 d = s;
dd629d5b 854 if (*d == 'v')
44dcb63b 855 d++;
44dcb63b 856 if (isDIGIT(*d)) {
a7cb1f99 857 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 858 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 859 SV *ver;
89bfa8cd 860 s = scan_num(s);
89bfa8cd 861 version = yylval.opval;
dd629d5b
GS
862 ver = cSVOPx(version)->op_sv;
863 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 864 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
865 SvNVX(ver) = str_to_version(ver);
866 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 867 }
89bfa8cd
PP
868 }
869 }
870
871 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 872 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd
PP
873 force_next(WORD);
874
875 return (s);
876}
877
ffb4593c
NT
878/*
879 * S_tokeq
880 * Tokenize a quoted string passed in as an SV. It finds the next
881 * chunk, up to end of string or a backslash. It may make a new
882 * SV containing that chunk (if HINT_NEW_STRING is on). It also
883 * turns \\ into \.
884 */
885
76e3520e 886STATIC SV *
cea2e8a9 887S_tokeq(pTHX_ SV *sv)
79072805
LW
888{
889 register char *s;
890 register char *send;
891 register char *d;
b3ac6de7
IZ
892 STRLEN len = 0;
893 SV *pv = sv;
79072805
LW
894
895 if (!SvLEN(sv))
b3ac6de7 896 goto finish;
79072805 897
a0d0e21e 898 s = SvPV_force(sv, len);
748a9306 899 if (SvIVX(sv) == -1)
b3ac6de7 900 goto finish;
463ee0b2 901 send = s + len;
79072805
LW
902 while (s < send && *s != '\\')
903 s++;
904 if (s == send)
b3ac6de7 905 goto finish;
79072805 906 d = s;
3280af22 907 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 908 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
909 while (s < send) {
910 if (*s == '\\') {
a0d0e21e 911 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
912 s++; /* all that, just for this */
913 }
914 *d++ = *s++;
915 }
916 *d = '\0';
463ee0b2 917 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 918 finish:
3280af22 919 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 920 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
921 return sv;
922}
923
ffb4593c
NT
924/*
925 * Now come three functions related to double-quote context,
926 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
927 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
928 * interact with PL_lex_state, and create fake ( ... ) argument lists
929 * to handle functions and concatenation.
930 * They assume that whoever calls them will be setting up a fake
931 * join call, because each subthing puts a ',' after it. This lets
932 * "lower \luPpEr"
933 * become
934 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
935 *
936 * (I'm not sure whether the spurious commas at the end of lcfirst's
937 * arguments and join's arguments are created or not).
938 */
939
940/*
941 * S_sublex_start
942 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
943 *
944 * Pattern matching will set PL_lex_op to the pattern-matching op to
945 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
946 *
947 * OP_CONST and OP_READLINE are easy--just make the new op and return.
948 *
949 * Everything else becomes a FUNC.
950 *
951 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
952 * had an OP_CONST or OP_READLINE). This just sets us up for a
953 * call to S_sublex_push().
954 */
955
76e3520e 956STATIC I32
cea2e8a9 957S_sublex_start(pTHX)
79072805
LW
958{
959 register I32 op_type = yylval.ival;
79072805
LW
960
961 if (op_type == OP_NULL) {
3280af22
NIS
962 yylval.opval = PL_lex_op;
963 PL_lex_op = Nullop;
79072805
LW
964 return THING;
965 }
966 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 967 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
968
969 if (SvTYPE(sv) == SVt_PVIV) {
970 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
971 STRLEN len;
972 char *p;
973 SV *nsv;
974
975 p = SvPV(sv, len);
79cb57f6 976 nsv = newSVpvn(p, len);
b3ac6de7
IZ
977 SvREFCNT_dec(sv);
978 sv = nsv;
979 }
980 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 981 PL_lex_stuff = Nullsv;
79072805
LW
982 return THING;
983 }
984
3280af22
NIS
985 PL_sublex_info.super_state = PL_lex_state;
986 PL_sublex_info.sub_inwhat = op_type;
987 PL_sublex_info.sub_op = PL_lex_op;
988 PL_lex_state = LEX_INTERPPUSH;
55497cff 989
3280af22
NIS
990 PL_expect = XTERM;
991 if (PL_lex_op) {
992 yylval.opval = PL_lex_op;
993 PL_lex_op = Nullop;
55497cff
PP
994 return PMFUNC;
995 }
996 else
997 return FUNC;
998}
999
ffb4593c
NT
1000/*
1001 * S_sublex_push
1002 * Create a new scope to save the lexing state. The scope will be
1003 * ended in S_sublex_done. Returns a '(', starting the function arguments
1004 * to the uc, lc, etc. found before.
1005 * Sets PL_lex_state to LEX_INTERPCONCAT.
1006 */
1007
76e3520e 1008STATIC I32
cea2e8a9 1009S_sublex_push(pTHX)
55497cff 1010{
0f15f207 1011 dTHR;
f46d017c 1012 ENTER;
55497cff 1013
3280af22
NIS
1014 PL_lex_state = PL_sublex_info.super_state;
1015 SAVEI32(PL_lex_dojoin);
1016 SAVEI32(PL_lex_brackets);
3280af22
NIS
1017 SAVEI32(PL_lex_casemods);
1018 SAVEI32(PL_lex_starts);
1019 SAVEI32(PL_lex_state);
7766f137 1020 SAVEVPTR(PL_lex_inpat);
3280af22 1021 SAVEI32(PL_lex_inwhat);
57843af0 1022 SAVECOPLINE(PL_curcop);
3280af22
NIS
1023 SAVEPPTR(PL_bufptr);
1024 SAVEPPTR(PL_oldbufptr);
1025 SAVEPPTR(PL_oldoldbufptr);
1026 SAVEPPTR(PL_linestart);
1027 SAVESPTR(PL_linestr);
1028 SAVEPPTR(PL_lex_brackstack);
1029 SAVEPPTR(PL_lex_casestack);
1030
1031 PL_linestr = PL_lex_stuff;
1032 PL_lex_stuff = Nullsv;
1033
9cbb5ea2
GS
1034 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1035 = SvPVX(PL_linestr);
3280af22
NIS
1036 PL_bufend += SvCUR(PL_linestr);
1037 SAVEFREESV(PL_linestr);
1038
1039 PL_lex_dojoin = FALSE;
1040 PL_lex_brackets = 0;
3280af22
NIS
1041 New(899, PL_lex_brackstack, 120, char);
1042 New(899, PL_lex_casestack, 12, char);
1043 SAVEFREEPV(PL_lex_brackstack);
1044 SAVEFREEPV(PL_lex_casestack);
1045 PL_lex_casemods = 0;
1046 *PL_lex_casestack = '\0';
1047 PL_lex_starts = 0;
1048 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1049 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1050
1051 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1052 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1053 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1054 else
3280af22 1055 PL_lex_inpat = Nullop;
79072805 1056
55497cff 1057 return '(';
79072805
LW
1058}
1059
ffb4593c
NT
1060/*
1061 * S_sublex_done
1062 * Restores lexer state after a S_sublex_push.
1063 */
1064
76e3520e 1065STATIC I32
cea2e8a9 1066S_sublex_done(pTHX)
79072805 1067{
3280af22
NIS
1068 if (!PL_lex_starts++) {
1069 PL_expect = XOPERATOR;
79cb57f6 1070 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1071 return THING;
1072 }
1073
3280af22
NIS
1074 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1075 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1076 return yylex();
79072805
LW
1077 }
1078
ffb4593c 1079 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1080 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1081 PL_linestr = PL_lex_repl;
1082 PL_lex_inpat = 0;
1083 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1084 PL_bufend += SvCUR(PL_linestr);
1085 SAVEFREESV(PL_linestr);
1086 PL_lex_dojoin = FALSE;
1087 PL_lex_brackets = 0;
3280af22
NIS
1088 PL_lex_casemods = 0;
1089 *PL_lex_casestack = '\0';
1090 PL_lex_starts = 0;
25da4f38 1091 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1092 PL_lex_state = LEX_INTERPNORMAL;
1093 PL_lex_starts++;
e9fa98b2
HS
1094 /* we don't clear PL_lex_repl here, so that we can check later
1095 whether this is an evalled subst; that means we rely on the
1096 logic to ensure sublex_done() is called again only via the
1097 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1098 }
e9fa98b2 1099 else {
3280af22 1100 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1101 PL_lex_repl = Nullsv;
1102 }
79072805 1103 return ',';
ffed7fef
LW
1104 }
1105 else {
f46d017c 1106 LEAVE;
3280af22
NIS
1107 PL_bufend = SvPVX(PL_linestr);
1108 PL_bufend += SvCUR(PL_linestr);
1109 PL_expect = XOPERATOR;
09bef843 1110 PL_sublex_info.sub_inwhat = 0;
79072805 1111 return ')';
ffed7fef
LW
1112 }
1113}
1114
02aa26ce
NT
1115/*
1116 scan_const
1117
1118 Extracts a pattern, double-quoted string, or transliteration. This
1119 is terrifying code.
1120
3280af22
NIS
1121 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1122 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1123 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1124
9b599b2a
GS
1125 Returns a pointer to the character scanned up to. Iff this is
1126 advanced from the start pointer supplied (ie if anything was
1127 successfully parsed), will leave an OP for the substring scanned
1128 in yylval. Caller must intuit reason for not parsing further
1129 by looking at the next characters herself.
1130
02aa26ce
NT
1131 In patterns:
1132 backslashes:
1133 double-quoted style: \r and \n
1134 regexp special ones: \D \s
1135 constants: \x3
1136 backrefs: \1 (deprecated in substitution replacements)
1137 case and quoting: \U \Q \E
1138 stops on @ and $, but not for $ as tail anchor
1139
1140 In transliterations:
1141 characters are VERY literal, except for - not at the start or end
1142 of the string, which indicates a range. scan_const expands the
1143 range to the full set of intermediate characters.
1144
1145 In double-quoted strings:
1146 backslashes:
1147 double-quoted style: \r and \n
1148 constants: \x3
1149 backrefs: \1 (deprecated)
1150 case and quoting: \U \Q \E
1151 stops on @ and $
1152
1153 scan_const does *not* construct ops to handle interpolated strings.
1154 It stops processing as soon as it finds an embedded $ or @ variable
1155 and leaves it to the caller to work out what's going on.
1156
1157 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1158
1159 $ in pattern could be $foo or could be tail anchor. Assumption:
1160 it's a tail anchor if $ is the last thing in the string, or if it's
1161 followed by one of ")| \n\t"
1162
1163 \1 (backreferences) are turned into $1
1164
1165 The structure of the code is
1166 while (there's a character to process) {
1167 handle transliteration ranges
1168 skip regexp comments
1169 skip # initiated comments in //x patterns
1170 check for embedded @foo
1171 check for embedded scalars
1172 if (backslash) {
1173 leave intact backslashes from leave (below)
1174 deprecate \1 in strings and sub replacements
1175 handle string-changing backslashes \l \U \Q \E, etc.
1176 switch (what was escaped) {
1177 handle - in a transliteration (becomes a literal -)
1178 handle \132 octal characters
1179 handle 0x15 hex characters
1180 handle \cV (control V)
1181 handle printf backslashes (\f, \r, \n, etc)
1182 } (end switch)
1183 } (end if backslash)
1184 } (end while character to read)
1185
1186*/
1187
76e3520e 1188STATIC char *
cea2e8a9 1189S_scan_const(pTHX_ char *start)
79072805 1190{
3280af22 1191 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1192 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1193 register char *s = start; /* start of the constant */
1194 register char *d = SvPVX(sv); /* destination for copies */
1195 bool dorange = FALSE; /* are we in a translit range? */
7e2040f0 1196 bool has_utf = FALSE; /* embedded \x{} */
02aa26ce 1197 I32 len; /* ? */
012bcf8d
GS
1198 UV uv;
1199
ac2262e3 1200 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1201 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1202 : UTF;
ac2262e3 1203 I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1204 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1205 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1206 : UTF;
dff6d3cd 1207 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1208 PL_lex_inpat
4a2d328f 1209 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1210 : "";
79072805
LW
1211
1212 while (s < send || dorange) {
02aa26ce 1213 /* get transliterations out of the way (they're most literal) */
3280af22 1214 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1215 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1216 if (dorange) {
02aa26ce 1217 I32 i; /* current expanded character */
8ada0baa 1218 I32 min; /* first character in range */
02aa26ce
NT
1219 I32 max; /* last character in range */
1220
1221 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1222 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1223 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1224 d -= 2; /* eat the first char and the - */
1225
8ada0baa
JH
1226 min = (U8)*d; /* first char in range */
1227 max = (U8)d[1]; /* last char in range */
1228
1229#ifndef ASCIIish
1230 if ((isLOWER(min) && isLOWER(max)) ||
1231 (isUPPER(min) && isUPPER(max))) {
1232 if (isLOWER(min)) {
1233 for (i = min; i <= max; i++)
1234 if (isLOWER(i))
1235 *d++ = i;
1236 } else {
1237 for (i = min; i <= max; i++)
1238 if (isUPPER(i))
1239 *d++ = i;
1240 }
1241 }
1242 else
1243#endif
1244 for (i = min; i <= max; i++)
1245 *d++ = i;
02aa26ce
NT
1246
1247 /* mark the range as done, and continue */
79072805
LW
1248 dorange = FALSE;
1249 continue;
1250 }
02aa26ce
NT
1251
1252 /* range begins (ignore - as first or last char) */
79072805 1253 else if (*s == '-' && s+1 < send && s != start) {
a0ed51b3 1254 if (utf) {
a176fa2a 1255 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1256 s++;
1257 continue;
1258 }
79072805
LW
1259 dorange = TRUE;
1260 s++;
1261 }
1262 }
02aa26ce
NT
1263
1264 /* if we get here, we're not doing a transliteration */
1265
0f5d15d6
IZ
1266 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1267 except for the last char, which will be done separately. */
3280af22 1268 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1269 if (s[2] == '#') {
1270 while (s < send && *s != ')')
1271 *d++ = *s++;
155aba94
GS
1272 }
1273 else if (s[2] == '{' /* This should match regcomp.c */
1274 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1275 {
cc6b7395 1276 I32 count = 1;
0f5d15d6 1277 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1278 char c;
1279
d9f97599
GS
1280 while (count && (c = *regparse)) {
1281 if (c == '\\' && regparse[1])
1282 regparse++;
cc6b7395
IZ
1283 else if (c == '{')
1284 count++;
1285 else if (c == '}')
1286 count--;
d9f97599 1287 regparse++;
cc6b7395 1288 }
5bdf89e7
IZ
1289 if (*regparse != ')') {
1290 regparse--; /* Leave one char for continuation. */
cc6b7395 1291 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1292 }
0f5d15d6 1293 while (s < regparse)
cc6b7395
IZ
1294 *d++ = *s++;
1295 }
748a9306 1296 }
02aa26ce
NT
1297
1298 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1299 else if (*s == '#' && PL_lex_inpat &&
1300 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1301 while (s+1 < send && *s != '\n')
1302 *d++ = *s++;
1303 }
02aa26ce
NT
1304
1305 /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */
7e2040f0
GS
1306 else if (*s == '@' && s[1]
1307 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$", s[1])))
79072805 1308 break;
02aa26ce
NT
1309
1310 /* check for embedded scalars. only stop if we're sure it's a
1311 variable.
1312 */
79072805 1313 else if (*s == '$') {
3280af22 1314 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1315 break;
c277df42 1316 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1317 break; /* in regexp, $ might be tail anchor */
1318 }
02aa26ce 1319
a0ed51b3
LW
1320 /* (now in tr/// code again) */
1321
d008e5eb 1322 if (*s & 0x80 && thisutf) {
012bcf8d
GS
1323 (void)utf8_to_uv((U8*)s, &len);
1324 if (len == 1) {
1325 /* illegal UTF8, make it valid */
012bcf8d 1326 char *old_pvx = SvPVX(sv);
bee7c419
GS
1327 /* need space for one extra char (NOTE: SvCUR() not set here) */
1328 d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
012bcf8d
GS
1329 d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
1330 }
1331 else {
1332 while (len--)
1333 *d++ = *s++;
1334 }
1335 has_utf = TRUE;
1336 continue;
a0ed51b3
LW
1337 }
1338
02aa26ce 1339 /* backslashes */
79072805
LW
1340 if (*s == '\\' && s+1 < send) {
1341 s++;
02aa26ce
NT
1342
1343 /* some backslashes we leave behind */
c9f97d15 1344 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1345 *d++ = '\\';
1346 *d++ = *s++;
1347 continue;
1348 }
02aa26ce
NT
1349
1350 /* deprecate \1 in strings and substitution replacements */
3280af22 1351 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1352 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1353 {
d008e5eb 1354 dTHR; /* only for ckWARN */
599cee73 1355 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1356 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1357 *--s = '$';
1358 break;
1359 }
02aa26ce
NT
1360
1361 /* string-change backslash escapes */
3280af22 1362 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1363 --s;
1364 break;
1365 }
02aa26ce
NT
1366
1367 /* if we get here, it's either a quoted -, or a digit */
79072805 1368 switch (*s) {
02aa26ce
NT
1369
1370 /* quoted - in transliterations */
79072805 1371 case '-':
3280af22 1372 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1373 *d++ = *s++;
1374 continue;
1375 }
1376 /* FALL THROUGH */
1377 default:
11b8faa4
JH
1378 {
1379 dTHR;
e476b1b5
GS
1380 if (ckWARN(WARN_MISC) && isALPHA(*s))
1381 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1382 "Unrecognized escape \\%c passed through",
1383 *s);
1384 /* default action is to copy the quoted character */
1385 *d++ = *s++;
1386 continue;
1387 }
02aa26ce
NT
1388
1389 /* \132 indicates an octal constant */
79072805
LW
1390 case '0': case '1': case '2': case '3':
1391 case '4': case '5': case '6': case '7':
012bcf8d 1392 uv = (UV)scan_oct(s, 3, &len);
79072805 1393 s += len;
012bcf8d 1394 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1395
1396 /* \x24 indicates a hex constant */
79072805 1397 case 'x':
a0ed51b3
LW
1398 ++s;
1399 if (*s == '{') {
1400 char* e = strchr(s, '}');
adaeee49 1401 if (!e) {
a0ed51b3 1402 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1403 e = s;
1404 }
012bcf8d
GS
1405 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
1406 s = e + 1;
a0ed51b3
LW
1407 }
1408 else {
012bcf8d
GS
1409 uv = (UV)scan_hex(s, 2, &len);
1410 s += len;
1411 }
1412
1413 NUM_ESCAPE_INSERT:
1414 /* Insert oct or hex escaped character.
1415 * There will always enough room in sv since such escapes will
1416 * be longer than any utf8 sequence they can end up as
1417 */
1418 if (uv > 127) {
1419 if (!thisutf && !has_utf && uv > 255) {
1420 /* might need to recode whatever we have accumulated so far
1421 * if it contains any hibit chars
1422 */
1423 int hicount = 0;
1424 char *c;
1425 for (c = SvPVX(sv); c < d; c++) {
1426 if (*c & 0x80)
1427 hicount++;
1428 }
1429 if (hicount) {
1430 char *old_pvx = SvPVX(sv);
1431 char *src, *dst;
1432 d = SvGROW(sv, SvCUR(sv) + hicount + 1) + (d - old_pvx);
1433
1434 src = d - 1;
1435 d += hicount;
1436 dst = d - 1;
1437
1438 while (src < dst) {
1439 if (*src & 0x80) {
1440 dst--;
1441 uv_to_utf8((U8*)dst, (U8)*src--);
1442 dst--;
1443 }
1444 else {
1445 *dst-- = *src--;
1446 }
1447 }
1448 }
1449 }
1450
1451 if (thisutf || uv > 255) {
1452 d = (char*)uv_to_utf8((U8*)d, uv);
7e2040f0 1453 has_utf = TRUE;
012bcf8d 1454 }
a0ed51b3 1455 else {
012bcf8d 1456 *d++ = (char)uv;
a0ed51b3 1457 }
012bcf8d
GS
1458 }
1459 else {
1460 *d++ = (char)uv;
a0ed51b3 1461 }
79072805 1462 continue;
02aa26ce 1463
4a2d328f
IZ
1464 /* \N{latin small letter a} is a named character */
1465 case 'N':
423cee85
JH
1466 ++s;
1467 if (*s == '{') {
1468 char* e = strchr(s, '}');
155aba94 1469 SV *res;
423cee85
JH
1470 STRLEN len;
1471 char *str;
423cee85
JH
1472
1473 if (!e) {
5777a3f7 1474 yyerror("Missing right brace on \\N{}");
423cee85
JH
1475 e = s - 1;
1476 goto cont_scan;
1477 }
1478 res = newSVpvn(s + 1, e - s - 1);
1479 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1480 res, Nullsv, "\\N{...}" );
423cee85 1481 str = SvPV(res,len);
e1992b6d
GS
1482 if (len > 1)
1483 has_utf = TRUE;
423cee85
JH
1484 if (len > e - s + 4) {
1485 char *odest = SvPVX(sv);
1486
1487 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1488 d = SvPVX(sv) + (d - odest);
1489 }
1490 Copy(str, d, len, char);
1491 d += len;
1492 SvREFCNT_dec(res);
1493 cont_scan:
1494 s = e + 1;
1495 }
1496 else
5777a3f7 1497 yyerror("Missing braces on \\N{}");
423cee85
JH
1498 continue;
1499
02aa26ce 1500 /* \c is a control character */
79072805
LW
1501 case 'c':
1502 s++;
9d116dd7
JH
1503#ifdef EBCDIC
1504 *d = *s++;
1505 if (isLOWER(*d))
1506 *d = toUPPER(*d);
1507 *d++ = toCTRL(*d);
1508#else
bbce6d69
PP
1509 len = *s++;
1510 *d++ = toCTRL(len);
9d116dd7 1511#endif
79072805 1512 continue;
02aa26ce
NT
1513
1514 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1515 case 'b':
1516 *d++ = '\b';
1517 break;
1518 case 'n':
1519 *d++ = '\n';
1520 break;
1521 case 'r':
1522 *d++ = '\r';
1523 break;
1524 case 'f':
1525 *d++ = '\f';
1526 break;
1527 case 't':
1528 *d++ = '\t';
1529 break;
34a3fe2a
PP
1530#ifdef EBCDIC
1531 case 'e':
1532 *d++ = '\047'; /* CP 1047 */
1533 break;
1534 case 'a':
1535 *d++ = '\057'; /* CP 1047 */
1536 break;
1537#else
79072805
LW
1538 case 'e':
1539 *d++ = '\033';
1540 break;
1541 case 'a':
1542 *d++ = '\007';
1543 break;
34a3fe2a 1544#endif
02aa26ce
NT
1545 } /* end switch */
1546
79072805
LW
1547 s++;
1548 continue;
02aa26ce
NT
1549 } /* end if (backslash) */
1550
79072805 1551 *d++ = *s++;
02aa26ce
NT
1552 } /* while loop to process each character */
1553
1554 /* terminate the string and set up the sv */
79072805 1555 *d = '\0';
463ee0b2 1556 SvCUR_set(sv, d - SvPVX(sv));
79072805 1557 SvPOK_on(sv);
7e2040f0
GS
1558 if (has_utf)
1559 SvUTF8_on(sv);
79072805 1560
02aa26ce 1561 /* shrink the sv if we allocated more than we used */
79072805
LW
1562 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1563 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1564 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1565 }
02aa26ce 1566
9b599b2a 1567 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1568 if (s > PL_bufptr) {
1569 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1570 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1571 sv, Nullsv,
3280af22 1572 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1573 ? "tr"
3280af22 1574 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1575 ? "s"
1576 : "qq")));
79072805 1577 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1578 } else
8990e307 1579 SvREFCNT_dec(sv);
79072805
LW
1580 return s;
1581}
1582
ffb4593c
NT
1583/* S_intuit_more
1584 * Returns TRUE if there's more to the expression (e.g., a subscript),
1585 * FALSE otherwise.
ffb4593c
NT
1586 *
1587 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1588 *
1589 * ->[ and ->{ return TRUE
1590 * { and [ outside a pattern are always subscripts, so return TRUE
1591 * if we're outside a pattern and it's not { or [, then return FALSE
1592 * if we're in a pattern and the first char is a {
1593 * {4,5} (any digits around the comma) returns FALSE
1594 * if we're in a pattern and the first char is a [
1595 * [] returns FALSE
1596 * [SOMETHING] has a funky algorithm to decide whether it's a
1597 * character class or not. It has to deal with things like
1598 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1599 * anything else returns TRUE
1600 */
1601
9cbb5ea2
GS
1602/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1603
76e3520e 1604STATIC int
cea2e8a9 1605S_intuit_more(pTHX_ register char *s)
79072805 1606{
3280af22 1607 if (PL_lex_brackets)
79072805
LW
1608 return TRUE;
1609 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1610 return TRUE;
1611 if (*s != '{' && *s != '[')
1612 return FALSE;
3280af22 1613 if (!PL_lex_inpat)
79072805
LW
1614 return TRUE;
1615
1616 /* In a pattern, so maybe we have {n,m}. */
1617 if (*s == '{') {
1618 s++;
1619 if (!isDIGIT(*s))
1620 return TRUE;
1621 while (isDIGIT(*s))
1622 s++;
1623 if (*s == ',')
1624 s++;
1625 while (isDIGIT(*s))
1626 s++;
1627 if (*s == '}')
1628 return FALSE;
1629 return TRUE;
1630
1631 }
1632
1633 /* On the other hand, maybe we have a character class */
1634
1635 s++;
1636 if (*s == ']' || *s == '^')
1637 return FALSE;
1638 else {
ffb4593c 1639 /* this is terrifying, and it works */
79072805
LW
1640 int weight = 2; /* let's weigh the evidence */
1641 char seen[256];
f27ffc4a 1642 unsigned char un_char = 255, last_un_char;
93a17b20 1643 char *send = strchr(s,']');
3280af22 1644 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1645
1646 if (!send) /* has to be an expression */
1647 return TRUE;
1648
1649 Zero(seen,256,char);
1650 if (*s == '$')
1651 weight -= 3;
1652 else if (isDIGIT(*s)) {
1653 if (s[1] != ']') {
1654 if (isDIGIT(s[1]) && s[2] == ']')
1655 weight -= 10;
1656 }
1657 else
1658 weight -= 100;
1659 }
1660 for (; s < send; s++) {
1661 last_un_char = un_char;
1662 un_char = (unsigned char)*s;
1663 switch (*s) {
1664 case '@':
1665 case '&':
1666 case '$':
1667 weight -= seen[un_char] * 10;
7e2040f0 1668 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1669 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1670 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1671 weight -= 100;
1672 else
1673 weight -= 10;
1674 }
1675 else if (*s == '$' && s[1] &&
93a17b20
LW
1676 strchr("[#!%*<>()-=",s[1])) {
1677 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1678 weight -= 10;
1679 else
1680 weight -= 1;
1681 }
1682 break;
1683 case '\\':
1684 un_char = 254;
1685 if (s[1]) {
93a17b20 1686 if (strchr("wds]",s[1]))
79072805
LW
1687 weight += 100;
1688 else if (seen['\''] || seen['"'])
1689 weight += 1;
93a17b20 1690 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1691 weight += 40;
1692 else if (isDIGIT(s[1])) {
1693 weight += 40;
1694 while (s[1] && isDIGIT(s[1]))
1695 s++;
1696 }
1697 }
1698 else
1699 weight += 100;
1700 break;
1701 case '-':
1702 if (s[1] == '\\')
1703 weight += 50;
93a17b20 1704 if (strchr("aA01! ",last_un_char))
79072805 1705 weight += 30;
93a17b20 1706 if (strchr("zZ79~",s[1]))
79072805 1707 weight += 30;
f27ffc4a
GS
1708 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1709 weight -= 5; /* cope with negative subscript */
79072805
LW
1710 break;
1711 default:
93a17b20 1712 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1713 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1714 char *d = tmpbuf;
1715 while (isALPHA(*s))
1716 *d++ = *s++;
1717 *d = '\0';
1718 if (keyword(tmpbuf, d - tmpbuf))
1719 weight -= 150;
1720 }
1721 if (un_char == last_un_char + 1)
1722 weight += 5;
1723 weight -= seen[un_char];
1724 break;
1725 }
1726 seen[un_char]++;
1727 }
1728 if (weight >= 0) /* probably a character class */
1729 return FALSE;
1730 }
1731
1732 return TRUE;
1733}
ffed7fef 1734
ffb4593c
NT
1735/*
1736 * S_intuit_method
1737 *
1738 * Does all the checking to disambiguate
1739 * foo bar
1740 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1741 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1742 *
1743 * First argument is the stuff after the first token, e.g. "bar".
1744 *
1745 * Not a method if bar is a filehandle.
1746 * Not a method if foo is a subroutine prototyped to take a filehandle.
1747 * Not a method if it's really "Foo $bar"
1748 * Method if it's "foo $bar"
1749 * Not a method if it's really "print foo $bar"
1750 * Method if it's really "foo package::" (interpreted as package->foo)
1751 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1752 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1753 * =>
1754 */
1755
76e3520e 1756STATIC int
cea2e8a9 1757S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1758{
1759 char *s = start + (*start == '$');
3280af22 1760 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1761 STRLEN len;
1762 GV* indirgv;
1763
1764 if (gv) {
b6c543e3 1765 CV *cv;
a0d0e21e
LW
1766 if (GvIO(gv))
1767 return 0;
b6c543e3
IZ
1768 if ((cv = GvCVu(gv))) {
1769 char *proto = SvPVX(cv);
1770 if (proto) {
1771 if (*proto == ';')
1772 proto++;
1773 if (*proto == '*')
1774 return 0;
1775 }
1776 } else
a0d0e21e
LW
1777 gv = 0;
1778 }
8903cb82 1779 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1780 /* start is the beginning of the possible filehandle/object,
1781 * and s is the end of it
1782 * tmpbuf is a copy of it
1783 */
1784
a0d0e21e 1785 if (*start == '$') {
3280af22 1786 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1787 return 0;
1788 s = skipspace(s);
3280af22
NIS
1789 PL_bufptr = start;
1790 PL_expect = XREF;
a0d0e21e
LW
1791 return *s == '(' ? FUNCMETH : METHOD;
1792 }
1793 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1794 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1795 len -= 2;
1796 tmpbuf[len] = '\0';
1797 goto bare_package;
1798 }
1799 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1800 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1801 return 0;
1802 /* filehandle or package name makes it a method */
89bfa8cd 1803 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1804 s = skipspace(s);
3280af22 1805 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1806 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1807 bare_package:
3280af22 1808 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1809 newSVpvn(tmpbuf,len));
3280af22
NIS
1810 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1811 PL_expect = XTERM;
a0d0e21e 1812 force_next(WORD);
3280af22 1813 PL_bufptr = s;
a0d0e21e
LW
1814 return *s == '(' ? FUNCMETH : METHOD;
1815 }
1816 }
1817 return 0;
1818}
1819
ffb4593c
NT
1820/*
1821 * S_incl_perldb
1822 * Return a string of Perl code to load the debugger. If PERL5DB
1823 * is set, it will return the contents of that, otherwise a
1824 * compile-time require of perl5db.pl.
1825 */
1826
76e3520e 1827STATIC char*
cea2e8a9 1828S_incl_perldb(pTHX)
a0d0e21e 1829{
3280af22 1830 if (PL_perldb) {
76e3520e 1831 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1832
1833 if (pdb)
1834 return pdb;
61bb5906 1835 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1836 return "BEGIN { require 'perl5db.pl' }";
1837 }
1838 return "";
1839}
1840
1841
16d20bd9
AD
1842/* Encoded script support. filter_add() effectively inserts a
1843 * 'pre-processing' function into the current source input stream.
1844 * Note that the filter function only applies to the current source file
1845 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1846 *
1847 * The datasv parameter (which may be NULL) can be used to pass
1848 * private data to this instance of the filter. The filter function
1849 * can recover the SV using the FILTER_DATA macro and use it to
1850 * store private buffers and state information.
1851 *
1852 * The supplied datasv parameter is upgraded to a PVIO type
e0c19803
GS
1853 * and the IoDIRP field is used to store the function pointer,
1854 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1855 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1856 * private use must be set using malloc'd pointers.
1857 */
16d20bd9
AD
1858
1859SV *
864dbfa3 1860Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1861{
f4c556ac
GS
1862 if (!funcp)
1863 return Nullsv;
1864
3280af22
NIS
1865 if (!PL_rsfp_filters)
1866 PL_rsfp_filters = newAV();
16d20bd9 1867 if (!datasv)
8c52afec 1868 datasv = NEWSV(255,0);
16d20bd9 1869 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1870 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
16d20bd9 1871 IoDIRP(datasv) = (DIR*)funcp; /* stash funcp into spare field */
e0c19803 1872 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1873 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1874 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1875 av_unshift(PL_rsfp_filters, 1);
1876 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1877 return(datasv);
1878}
1879
1880
1881/* Delete most recently added instance of this filter function. */
a0d0e21e 1882void
864dbfa3 1883Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1884{
e0c19803 1885 SV *datasv;
f4c556ac 1886 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1887 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1888 return;
1889 /* if filter is on top of stack (usual case) just pop it off */
e0c19803
GS
1890 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
1891 if (IoDIRP(datasv) == (DIR*)funcp) {
1892 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
1893 IoDIRP(datasv) = (DIR*)NULL;
3280af22 1894 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1895
16d20bd9
AD
1896 return;
1897 }
1898 /* we need to search for the correct entry and clear it */
cea2e8a9 1899 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1900}
1901
1902
1903/* Invoke the n'th filter function for the current rsfp. */
1904I32
864dbfa3 1905Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1906
1907
1908 /* 0 = read one text line */
a0d0e21e 1909{
16d20bd9
AD
1910 filter_t funcp;
1911 SV *datasv = NULL;
e50aee73 1912
3280af22 1913 if (!PL_rsfp_filters)
16d20bd9 1914 return -1;
3280af22 1915 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1916 /* Provide a default input filter to make life easy. */
1917 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1918 DEBUG_P(PerlIO_printf(Perl_debug_log,
1919 "filter_read %d: from rsfp\n", idx));
16d20bd9
AD
1920 if (maxlen) {
1921 /* Want a block */
1922 int len ;
1923 int old_len = SvCUR(buf_sv) ;
1924
1925 /* ensure buf_sv is large enough */
1926 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1927 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1928 if (PerlIO_error(PL_rsfp))
37120919
AD
1929 return -1; /* error */
1930 else
1931 return 0 ; /* end of file */
1932 }
16d20bd9
AD
1933 SvCUR_set(buf_sv, old_len + len) ;
1934 } else {
1935 /* Want a line */
3280af22
NIS
1936 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1937 if (PerlIO_error(PL_rsfp))
37120919
AD
1938 return -1; /* error */
1939 else
1940 return 0 ; /* end of file */
1941 }
16d20bd9
AD
1942 }
1943 return SvCUR(buf_sv);
1944 }
1945 /* Skip this filter slot if filter has been deleted */
3280af22 1946 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1947 DEBUG_P(PerlIO_printf(Perl_debug_log,
1948 "filter_read %d: skipped (filter deleted)\n",
1949 idx));
16d20bd9
AD
1950 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1951 }
1952 /* Get function pointer hidden within datasv */
1953 funcp = (filter_t)IoDIRP(datasv);
f4c556ac
GS
1954 DEBUG_P(PerlIO_printf(Perl_debug_log,
1955 "filter_read %d: via function %p (%s)\n",
1956 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1957 /* Call function. The function is expected to */
1958 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1959 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1960 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1961}
1962
76e3520e 1963STATIC char *
cea2e8a9 1964S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1965{
c39cd008 1966#ifdef PERL_CR_FILTER
3280af22 1967 if (!PL_rsfp_filters) {
c39cd008 1968 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
1969 }
1970#endif
3280af22 1971 if (PL_rsfp_filters) {
16d20bd9 1972
55497cff
PP
1973 if (!append)
1974 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
1975 if (FILTER_READ(0, sv, 0) > 0)
1976 return ( SvPVX(sv) ) ;
1977 else
1978 return Nullch ;
1979 }
9d116dd7 1980 else
fd049845 1981 return (sv_gets(sv, fp, append));
a0d0e21e
LW
1982}
1983
1984
748a9306
LW
1985#ifdef DEBUGGING
1986 static char* exp_name[] =
09bef843
SB
1987 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
1988 "ATTRTERM", "TERMBLOCK"
1989 };
748a9306 1990#endif
463ee0b2 1991
02aa26ce
NT
1992/*
1993 yylex
1994
1995 Works out what to call the token just pulled out of the input
1996 stream. The yacc parser takes care of taking the ops we return and
1997 stitching them into a tree.
1998
1999 Returns:
2000 PRIVATEREF
2001
2002 Structure:
2003 if read an identifier
2004 if we're in a my declaration
2005 croak if they tried to say my($foo::bar)
2006 build the ops for a my() declaration
2007 if it's an access to a my() variable
2008 are we in a sort block?
2009 croak if my($a); $a <=> $b
2010 build ops for access to a my() variable
2011 if in a dq string, and they've said @foo and we can't find @foo
2012 croak
2013 build ops for a bareword
2014 if we already built the token before, use it.
2015*/
2016
864dbfa3
GS
2017int
2018#ifdef USE_PURE_BISON
cea2e8a9 2019Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
864dbfa3 2020#else
cea2e8a9 2021Perl_yylex(pTHX)
864dbfa3 2022#endif
378cc40b 2023{
11343788 2024 dTHR;
79072805 2025 register char *s;
378cc40b 2026 register char *d;
79072805 2027 register I32 tmp;
463ee0b2 2028 STRLEN len;
161b471a
NIS
2029 GV *gv = Nullgv;
2030 GV **gvp = 0;
a687059c 2031
a1a0e61e
TD
2032#ifdef USE_PURE_BISON
2033 yylval_pointer = lvalp;
2034 yychar_pointer = lcharp;
2035#endif
2036
02aa26ce 2037 /* check if there's an identifier for us to look at */
3280af22 2038 if (PL_pending_ident) {
02aa26ce 2039 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2040 char pit = PL_pending_ident;
2041 PL_pending_ident = 0;
bbce6d69 2042
02aa26ce
NT
2043 /* if we're in a my(), we can't allow dynamics here.
2044 $foo'bar has already been turned into $foo::bar, so
2045 just check for colons.
2046
2047 if it's a legal name, the OP is a PADANY.
2048 */
3280af22 2049 if (PL_in_my) {
77ca0c92 2050 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2051 if (strchr(PL_tokenbuf,':'))
2052 yyerror(Perl_form(aTHX_ "No package name allowed for "
2053 "variable %s in \"our\"",
2054 PL_tokenbuf));
77ca0c92
LW
2055 tmp = pad_allocmy(PL_tokenbuf);
2056 }
2057 else {
2058 if (strchr(PL_tokenbuf,':'))
2059 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2060
77ca0c92
LW
2061 yylval.opval = newOP(OP_PADANY, 0);
2062 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2063 return PRIVATEREF;
2064 }
bbce6d69
PP
2065 }
2066
02aa26ce
NT
2067 /*
2068 build the ops for accesses to a my() variable.
2069
2070 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2071 then used in a comparison. This catches most, but not
2072 all cases. For instance, it catches
2073 sort { my($a); $a <=> $b }
2074 but not
2075 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2076 (although why you'd do that is anyone's guess).
2077 */
2078
3280af22 2079 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2080#ifdef USE_THREADS
54b9620d 2081 /* Check for single character per-thread SVs */
3280af22
NIS
2082 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2083 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2084 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2085 {
2faa37cc 2086 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2087 yylval.opval->op_targ = tmp;
2088 return PRIVATEREF;
2089 }
2090#endif /* USE_THREADS */
3280af22 2091 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2092 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2093 /* might be an "our" variable" */
f472eb5c 2094 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2095 /* build ops for a bareword */
f472eb5c
GS
2096 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2097 sv_catpvn(sym, "::", 2);
2098 sv_catpv(sym, PL_tokenbuf+1);
2099 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2100 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2101 gv_fetchpv(SvPVX(sym),
77ca0c92 2102 (PL_in_eval
f472eb5c
GS
2103 ? (GV_ADDMULTI | GV_ADDINEVAL)
2104 : TRUE
77ca0c92
LW
2105 ),
2106 ((PL_tokenbuf[0] == '$') ? SVt_PV
2107 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2108 : SVt_PVHV));
2109 return WORD;
2110 }
2111
02aa26ce 2112 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2113 if (PL_last_lop_op == OP_SORT &&
2114 PL_tokenbuf[0] == '$' &&
2115 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2116 && !PL_tokenbuf[2])
bbce6d69 2117 {
3280af22
NIS
2118 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2119 d < PL_bufend && *d != '\n';
a863c7d1
MB
2120 d++)
2121 {
2122 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2123 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2124 PL_tokenbuf);
a863c7d1 2125 }
bbce6d69
PP
2126 }
2127 }
bbce6d69 2128
a863c7d1
MB
2129 yylval.opval = newOP(OP_PADANY, 0);
2130 yylval.opval->op_targ = tmp;
2131 return PRIVATEREF;
2132 }
bbce6d69
PP
2133 }
2134
02aa26ce
NT
2135 /*
2136 Whine if they've said @foo in a doublequoted string,
2137 and @foo isn't a variable we can find in the symbol
2138 table.
2139 */
3280af22
NIS
2140 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2141 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
2142 if (!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
cea2e8a9 2143 yyerror(Perl_form(aTHX_ "In string, %s now must be written as \\%s",
3280af22 2144 PL_tokenbuf, PL_tokenbuf));
bbce6d69
PP
2145 }
2146
02aa26ce 2147 /* build ops for a bareword */
3280af22 2148 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2149 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2150 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2151 ((PL_tokenbuf[0] == '$') ? SVt_PV
2152 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69
PP
2153 : SVt_PVHV));
2154 return WORD;
2155 }
2156
02aa26ce
NT
2157 /* no identifier pending identification */
2158
3280af22 2159 switch (PL_lex_state) {
79072805
LW
2160#ifdef COMMENTARY
2161 case LEX_NORMAL: /* Some compilers will produce faster */
2162 case LEX_INTERPNORMAL: /* code if we comment these out. */
2163 break;
2164#endif
2165
09bef843 2166 /* when we've already built the next token, just pull it out of the queue */
79072805 2167 case LEX_KNOWNEXT:
3280af22
NIS
2168 PL_nexttoke--;
2169 yylval = PL_nextval[PL_nexttoke];
2170 if (!PL_nexttoke) {
2171 PL_lex_state = PL_lex_defer;
2172 PL_expect = PL_lex_expect;
2173 PL_lex_defer = LEX_NORMAL;
463ee0b2 2174 }
3280af22 2175 return(PL_nexttype[PL_nexttoke]);
79072805 2176
02aa26ce 2177 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2178 when we get here, PL_bufptr is at the \
02aa26ce 2179 */
79072805
LW
2180 case LEX_INTERPCASEMOD:
2181#ifdef DEBUGGING
3280af22 2182 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2183 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2184#endif
02aa26ce 2185 /* handle \E or end of string */
3280af22 2186 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2187 char oldmod;
02aa26ce
NT
2188
2189 /* if at a \E */
3280af22
NIS
2190 if (PL_lex_casemods) {
2191 oldmod = PL_lex_casestack[--PL_lex_casemods];
2192 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2193
3280af22
NIS
2194 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2195 PL_bufptr += 2;
2196 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2197 }
79072805
LW
2198 return ')';
2199 }
3280af22
NIS
2200 if (PL_bufptr != PL_bufend)
2201 PL_bufptr += 2;
2202 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2203 return yylex();
79072805
LW
2204 }
2205 else {
3280af22 2206 s = PL_bufptr + 1;
79072805
LW
2207 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2208 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2209 if (strchr("LU", *s) &&
3280af22 2210 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2211 {
3280af22 2212 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2213 return ')';
2214 }
3280af22
NIS
2215 if (PL_lex_casemods > 10) {
2216 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2217 if (newlb != PL_lex_casestack) {
a0d0e21e 2218 SAVEFREEPV(newlb);
3280af22 2219 PL_lex_casestack = newlb;
a0d0e21e
LW
2220 }
2221 }
3280af22
NIS
2222 PL_lex_casestack[PL_lex_casemods++] = *s;
2223 PL_lex_casestack[PL_lex_casemods] = '\0';
2224 PL_lex_state = LEX_INTERPCONCAT;
2225 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2226 force_next('(');
2227 if (*s == 'l')
3280af22 2228 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2229 else if (*s == 'u')
3280af22 2230 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2231 else if (*s == 'L')
3280af22 2232 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2233 else if (*s == 'U')
3280af22 2234 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2235 else if (*s == 'Q')
3280af22 2236 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2237 else
cea2e8a9 2238 Perl_croak(aTHX_ "panic: yylex");
3280af22 2239 PL_bufptr = s + 1;
79072805 2240 force_next(FUNC);
3280af22
NIS
2241 if (PL_lex_starts) {
2242 s = PL_bufptr;
2243 PL_lex_starts = 0;
79072805
LW
2244 Aop(OP_CONCAT);
2245 }
2246 else
cea2e8a9 2247 return yylex();
79072805
LW
2248 }
2249
55497cff
PP
2250 case LEX_INTERPPUSH:
2251 return sublex_push();
2252
79072805 2253 case LEX_INTERPSTART:
3280af22 2254 if (PL_bufptr == PL_bufend)
79072805 2255 return sublex_done();
3280af22
NIS
2256 PL_expect = XTERM;
2257 PL_lex_dojoin = (*PL_bufptr == '@');
2258 PL_lex_state = LEX_INTERPNORMAL;
2259 if (PL_lex_dojoin) {
2260 PL_nextval[PL_nexttoke].ival = 0;
79072805 2261 force_next(',');
554b3eca 2262#ifdef USE_THREADS
533c011a
NIS
2263 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2264 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2265 force_next(PRIVATEREF);
2266#else
a0d0e21e 2267 force_ident("\"", '$');
554b3eca 2268#endif /* USE_THREADS */
3280af22 2269 PL_nextval[PL_nexttoke].ival = 0;
79072805 2270 force_next('$');
3280af22 2271 PL_nextval[PL_nexttoke].ival = 0;
79072805 2272 force_next('(');
3280af22 2273 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2274 force_next(FUNC);
2275 }
3280af22
NIS
2276 if (PL_lex_starts++) {
2277 s = PL_bufptr;
79072805
LW
2278 Aop(OP_CONCAT);
2279 }
cea2e8a9 2280 return yylex();
79072805
LW
2281
2282 case LEX_INTERPENDMAYBE:
3280af22
NIS
2283 if (intuit_more(PL_bufptr)) {
2284 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2285 break;
2286 }
2287 /* FALL THROUGH */
2288
2289 case LEX_INTERPEND:
3280af22
NIS
2290 if (PL_lex_dojoin) {
2291 PL_lex_dojoin = FALSE;
2292 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2293 return ')';
2294 }
43a16006 2295 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2296 && SvEVALED(PL_lex_repl))
43a16006 2297 {
e9fa98b2 2298 if (PL_bufptr != PL_bufend)
cea2e8a9 2299 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2300 PL_lex_repl = Nullsv;
2301 }
79072805
LW
2302 /* FALLTHROUGH */
2303 case LEX_INTERPCONCAT:
2304#ifdef DEBUGGING
3280af22 2305 if (PL_lex_brackets)
cea2e8a9 2306 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2307#endif
3280af22 2308 if (PL_bufptr == PL_bufend)
79072805
LW
2309 return sublex_done();
2310
3280af22
NIS
2311 if (SvIVX(PL_linestr) == '\'') {
2312 SV *sv = newSVsv(PL_linestr);
2313 if (!PL_lex_inpat)
76e3520e 2314 sv = tokeq(sv);
3280af22 2315 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2316 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2317 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2318 s = PL_bufend;
79072805
LW
2319 }
2320 else {
3280af22 2321 s = scan_const(PL_bufptr);
79072805 2322 if (*s == '\\')
3280af22 2323 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2324 else
3280af22 2325 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2326 }
2327
3280af22
NIS
2328 if (s != PL_bufptr) {
2329 PL_nextval[PL_nexttoke] = yylval;
2330 PL_expect = XTERM;
79072805 2331 force_next(THING);
3280af22 2332 if (PL_lex_starts++)
79072805
LW
2333 Aop(OP_CONCAT);
2334 else {
3280af22 2335 PL_bufptr = s;
cea2e8a9 2336 return yylex();
79072805
LW
2337 }
2338 }
2339
cea2e8a9 2340 return yylex();
a0d0e21e 2341 case LEX_FORMLINE:
3280af22
NIS
2342 PL_lex_state = LEX_NORMAL;
2343 s = scan_formline(PL_bufptr);
2344 if (!PL_lex_formbrack)
a0d0e21e
LW
2345 goto rightbracket;
2346 OPERATOR(';');
79072805
LW
2347 }
2348
3280af22
NIS
2349 s = PL_bufptr;
2350 PL_oldoldbufptr = PL_oldbufptr;
2351 PL_oldbufptr = s;
79072805 2352 DEBUG_p( {
bf49b057
GS
2353 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2354 exp_name[PL_expect], s);
79072805 2355 } )
463ee0b2
LW
2356
2357 retry:
378cc40b
LW
2358 switch (*s) {
2359 default:
7e2040f0 2360 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2361 goto keylookup;
cea2e8a9 2362 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2363 case 4:
2364 case 26:
2365 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2366 case 0:
3280af22
NIS
2367 if (!PL_rsfp) {
2368 PL_last_uni = 0;
2369 PL_last_lop = 0;
2370 if (PL_lex_brackets)
d98d5fff 2371 yyerror("Missing right curly or square bracket");
79072805 2372 TOKEN(0);
463ee0b2 2373 }
3280af22 2374 if (s++ < PL_bufend)
a687059c 2375 goto retry; /* ignore stray nulls */
3280af22
NIS
2376 PL_last_uni = 0;
2377 PL_last_lop = 0;
2378 if (!PL_in_eval && !PL_preambled) {
2379 PL_preambled = TRUE;
2380 sv_setpv(PL_linestr,incl_perldb());
2381 if (SvCUR(PL_linestr))
2382 sv_catpv(PL_linestr,";");
2383 if (PL_preambleav){
2384 while(AvFILLp(PL_preambleav) >= 0) {
2385 SV *tmpsv = av_shift(PL_preambleav);
2386 sv_catsv(PL_linestr, tmpsv);
2387 sv_catpv(PL_linestr, ";");
91b7def8
PP
2388 sv_free(tmpsv);
2389 }
3280af22
NIS
2390 sv_free((SV*)PL_preambleav);
2391 PL_preambleav = NULL;
91b7def8 2392 }
3280af22
NIS
2393 if (PL_minus_n || PL_minus_p) {
2394 sv_catpv(PL_linestr, "LINE: while (<>) {");
2395 if (PL_minus_l)
2396 sv_catpv(PL_linestr,"chomp;");
2397 if (PL_minus_a) {
8fd239a7
CS
2398 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2399 if (gv)
2400 GvIMPORTED_AV_on(gv);
3280af22
NIS
2401 if (PL_minus_F) {
2402 if (strchr("/'\"", *PL_splitstr)
2403 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2404 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121
PP
2405 else {
2406 char delim;
2407 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2408 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2409 delim = *s;
cea2e8a9 2410 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2411 "q" + (delim == '\''), delim);
3280af22 2412 for (s = PL_splitstr; *s; s++) {
54310121 2413 if (*s == '\\')
3280af22
NIS
2414 sv_catpvn(PL_linestr, "\\", 1);
2415 sv_catpvn(PL_linestr, s, 1);
54310121 2416 }
cea2e8a9 2417 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2418 }
2304df62
AD
2419 }
2420 else
3280af22 2421 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2422 }
79072805 2423 }
3280af22
NIS
2424 sv_catpv(PL_linestr, "\n");
2425 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2426 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2427 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2428 SV *sv = NEWSV(85,0);
2429
2430 sv_upgrade(sv, SVt_PVMG);
3280af22 2431 sv_setsv(sv,PL_linestr);
57843af0 2432 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2433 }
79072805 2434 goto retry;
a687059c 2435 }
e929a76b 2436 do {
3280af22 2437 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
e929a76b 2438 fake_eof:
3280af22
NIS
2439 if (PL_rsfp) {
2440 if (PL_preprocess && !PL_in_eval)
2441 (void)PerlProc_pclose(PL_rsfp);
2442 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2443 PerlIO_clearerr(PL_rsfp);
395c3793 2444 else
3280af22
NIS
2445 (void)PerlIO_close(PL_rsfp);
2446 PL_rsfp = Nullfp;
4a9ae47a 2447 PL_doextract = FALSE;
395c3793 2448 }
3280af22
NIS
2449 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2450 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2451 sv_catpv(PL_linestr,";}");
2452 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2453 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2454 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2455 goto retry;
2456 }
3280af22
NIS
2457 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2458 sv_setpv(PL_linestr,"");
79072805 2459 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
378cc40b 2460 }
3280af22 2461 if (PL_doextract) {
a0d0e21e 2462 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2463 PL_doextract = FALSE;
a0d0e21e
LW
2464
2465 /* Incest with pod. */
2466 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2467 sv_setpv(PL_linestr, "");
2468 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2469 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2470 PL_doextract = FALSE;
a0d0e21e
LW
2471 }
2472 }
463ee0b2 2473 incline(s);
3280af22
NIS
2474 } while (PL_doextract);
2475 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2476 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2477 SV *sv = NEWSV(85,0);
a687059c 2478
93a17b20 2479 sv_upgrade(sv, SVt_PVMG);
3280af22 2480 sv_setsv(sv,PL_linestr);
57843af0 2481 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2482 }
3280af22 2483 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2484 if (CopLINE(PL_curcop) == 1) {
3280af22 2485 while (s < PL_bufend && isSPACE(*s))
79072805 2486 s++;
a0d0e21e 2487 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2488 s++;
44a8e56a 2489 d = Nullch;
3280af22 2490 if (!PL_in_eval) {
44a8e56a
PP
2491 if (*s == '#' && *(s+1) == '!')
2492 d = s + 2;
2493#ifdef ALTERNATE_SHEBANG
2494 else {
2495 static char as[] = ALTERNATE_SHEBANG;
2496 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2497 d = s + (sizeof(as) - 1);
2498 }
2499#endif /* ALTERNATE_SHEBANG */
2500 }
2501 if (d) {
b8378b72 2502 char *ipath;
774d564b 2503 char *ipathend;
b8378b72 2504
774d564b 2505 while (isSPACE(*d))
b8378b72
CS
2506 d++;
2507 ipath = d;
774d564b
PP
2508 while (*d && !isSPACE(*d))
2509 d++;
2510 ipathend = d;
2511
2512#ifdef ARG_ZERO_IS_SCRIPT
2513 if (ipathend > ipath) {
2514 /*
2515 * HP-UX (at least) sets argv[0] to the script name,
2516 * which makes $^X incorrect. And Digital UNIX and Linux,
2517 * at least, set argv[0] to the basename of the Perl
2518 * interpreter. So, having found "#!", we'll set it right.
2519 */
2520 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2521 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2522 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2523 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2524 SvSETMAGIC(x);
2525 }
774d564b 2526 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2527 }
774d564b 2528#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2529
2530 /*
2531 * Look for options.
2532 */
748a9306 2533 d = instr(s,"perl -");
84e30d1a 2534 if (!d) {
748a9306 2535 d = instr(s,"perl");
84e30d1a
GS
2536#if defined(DOSISH)
2537 /* avoid getting into infinite loops when shebang
2538 * line contains "Perl" rather than "perl" */
2539 if (!d) {
2540 for (d = ipathend-4; d >= ipath; --d) {
2541 if ((*d == 'p' || *d == 'P')
2542 && !ibcmp(d, "perl", 4))
2543 {
2544 break;
2545 }
2546 }
2547 if (d < ipath)
2548 d = Nullch;
2549 }
2550#endif
2551 }
44a8e56a
PP
2552#ifdef ALTERNATE_SHEBANG
2553 /*
2554 * If the ALTERNATE_SHEBANG on this system starts with a
2555 * character that can be part of a Perl expression, then if
2556 * we see it but not "perl", we're probably looking at the
2557 * start of Perl code, not a request to hand off to some
2558 * other interpreter. Similarly, if "perl" is there, but
2559 * not in the first 'word' of the line, we assume the line
2560 * contains the start of the Perl program.
44a8e56a
PP
2561 */
2562 if (d && *s != '#') {
774d564b 2563 char *c = ipath;
44a8e56a
PP
2564 while (*c && !strchr("; \t\r\n\f\v#", *c))
2565 c++;
2566 if (c < d)
2567 d = Nullch; /* "perl" not in first word; ignore */
2568 else
2569 *s = '#'; /* Don't try to parse shebang line */
2570 }
774d564b 2571#endif /* ALTERNATE_SHEBANG */
748a9306 2572 if (!d &&
44a8e56a 2573 *s == '#' &&
774d564b 2574 ipathend > ipath &&
3280af22 2575 !PL_minus_c &&
748a9306 2576 !instr(s,"indir") &&
3280af22 2577 instr(PL_origargv[0],"perl"))
748a9306 2578 {
9f68db38 2579 char **newargv;
9f68db38 2580
774d564b
PP
2581 *ipathend = '\0';
2582 s = ipathend + 1;
3280af22 2583 while (s < PL_bufend && isSPACE(*s))
9f68db38 2584 s++;
3280af22
NIS
2585 if (s < PL_bufend) {
2586 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2587 newargv[1] = s;
3280af22 2588 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2589 s++;
2590 *s = '\0';
3280af22 2591 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2592 }
2593 else
3280af22 2594 newargv = PL_origargv;
774d564b 2595 newargv[0] = ipath;
80252599 2596 PerlProc_execv(ipath, newargv);
cea2e8a9 2597 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2598 }
748a9306 2599 if (d) {
3280af22
NIS
2600 U32 oldpdb = PL_perldb;
2601 bool oldn = PL_minus_n;
2602 bool oldp = PL_minus_p;
748a9306
LW
2603
2604 while (*d && !isSPACE(*d)) d++;
89bfa8cd 2605 while (*d == ' ' || *d == '\t') d++;
748a9306
LW
2606
2607 if (*d++ == '-') {
8cc95fdb
PP
2608 do {
2609 if (*d == 'M' || *d == 'm') {
2610 char *m = d;
2611 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2612 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2613 (int)(d - m), m);
2614 }
2615 d = moreswitches(d);
2616 } while (d);
155aba94
GS
2617 if ((PERLDB_LINE && !oldpdb) ||
2618 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2619 /* if we have already added "LINE: while (<>) {",
2620 we must not do it again */
748a9306 2621 {
3280af22
NIS
2622 sv_setpv(PL_linestr, "");
2623 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2624 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2625 PL_preambled = FALSE;
84902520 2626 if (PERLDB_LINE)
3280af22 2627 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2628 goto retry;
2629 }
a0d0e21e 2630 }
79072805 2631 }
9f68db38 2632 }
79072805 2633 }
3280af22
NIS
2634 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2635 PL_bufptr = s;
2636 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2637 return yylex();
ae986130 2638 }
378cc40b 2639 goto retry;
4fdae800 2640 case '\r':
6a27c188 2641#ifdef PERL_STRICT_CR
cea2e8a9
GS
2642 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2643 Perl_croak(aTHX_
54310121 2644 "(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2645#endif
4fdae800 2646 case ' ': case '\t': case '\f': case 013:
378cc40b
LW
2647 s++;
2648 goto retry;
378cc40b 2649 case '#':
e929a76b 2650 case '\n':
3280af22
NIS
2651 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2652 d = PL_bufend;
a687059c 2653 while (s < d && *s != '\n')
378cc40b 2654 s++;
0f85fab0 2655 if (s < d)
378cc40b 2656 s++;
463ee0b2 2657 incline(s);
3280af22
NIS
2658 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2659 PL_bufptr = s;
2660 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2661 return yylex();
a687059c 2662 }
378cc40b 2663 }
a687059c 2664 else {
378cc40b 2665 *s = '\0';
3280af22 2666 PL_bufend = s;
a687059c 2667 }
378cc40b
LW
2668 goto retry;
2669 case '-':
79072805 2670 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2671 s++;
3280af22 2672 PL_bufptr = s;
748a9306
LW
2673 tmp = *s++;
2674
3280af22 2675 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306
LW
2676 s++;
2677
2678 if (strnEQ(s,"=>",2)) {
3280af22 2679 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2680 OPERATOR('-'); /* unary minus */
2681 }
3280af22
NIS
2682 PL_last_uni = PL_oldbufptr;
2683 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2684 switch (tmp) {
79072805
LW
2685 case 'r': FTST(OP_FTEREAD);
2686 case 'w': FTST(OP_FTEWRITE);
2687 case 'x': FTST(OP_FTEEXEC);
2688 case 'o': FTST(OP_FTEOWNED);
2689 case 'R': FTST(OP_FTRREAD);
2690 case 'W': FTST(OP_FTRWRITE);
2691 case 'X': FTST(OP_FTREXEC);
2692 case 'O': FTST(OP_FTROWNED);
2693 case 'e': FTST(OP_FTIS);
2694 case 'z': FTST(OP_FTZERO);
2695 case 's': FTST(OP_FTSIZE);
2696 case 'f': FTST(OP_FTFILE);
2697 case 'd': FTST(OP_FTDIR);
2698 case 'l': FTST(OP_FTLINK);
2699 case 'p': FTST(OP_FTPIPE);
2700 case 'S': FTST(OP_FTSOCK);
2701 case 'u': FTST(OP_FTSUID);
2702 case 'g': FTST(OP_FTSGID);
2703 case 'k': FTST(OP_FTSVTX);
2704 case 'b': FTST(OP_FTBLK);
2705 case 'c': FTST(OP_FTCHR);
2706 case 't': FTST(OP_FTTTY);
2707 case 'T': FTST(OP_FTTEXT);
2708 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2709 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2710 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2711 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2712 default:
cea2e8a9 2713 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2714 break;
2715 }
2716 }
a687059c
LW
2717 tmp = *s++;
2718 if (*s == tmp) {
2719 s++;
3280af22 2720 if (PL_expect == XOPERATOR)
79072805
LW
2721 TERM(POSTDEC);
2722 else
2723 OPERATOR(PREDEC);
2724 }
2725 else if (*s == '>') {
2726 s++;
2727 s = skipspace(s);
7e2040f0 2728 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2729 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2730 TOKEN(ARROW);
79072805 2731 }
748a9306
LW
2732 else if (*s == '$')
2733 OPERATOR(ARROW);
463ee0b2 2734 else
748a9306 2735 TERM(ARROW);
a687059c 2736 }
3280af22 2737 if (PL_expect == XOPERATOR)
79072805
LW
2738 Aop(OP_SUBTRACT);
2739 else {
3280af22 2740 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2741 check_uni();
79072805 2742 OPERATOR('-'); /* unary minus */
2f3197b3 2743 }
79072805 2744
378cc40b 2745 case '+':
a687059c
LW
2746 tmp = *s++;
2747 if (*s == tmp) {
378cc40b 2748 s++;
3280af22 2749 if (PL_expect == XOPERATOR)
79072805
LW
2750 TERM(POSTINC);
2751 else
2752 OPERATOR(PREINC);
378cc40b 2753 }
3280af22 2754 if (PL_expect == XOPERATOR)
79072805
LW
2755 Aop(OP_ADD);
2756 else {
3280af22 2757 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2758 check_uni();
a687059c 2759 OPERATOR('+');
2f3197b3 2760 }
a687059c 2761
378cc40b 2762 case '*':
3280af22
NIS
2763 if (PL_expect != XOPERATOR) {
2764 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2765 PL_expect = XOPERATOR;
2766 force_ident(PL_tokenbuf, '*');
2767 if (!*PL_tokenbuf)
a0d0e21e 2768 PREREF('*');
79072805 2769 TERM('*');
a687059c 2770 }
79072805
LW
2771 s++;
2772 if (*s == '*') {
a687059c 2773 s++;
79072805 2774 PWop(OP_POW);
a687059c 2775 }
79072805
LW
2776 Mop(OP_MULTIPLY);
2777
378cc40b 2778 case '%':
3280af22 2779 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2780 ++s;
2781 Mop(OP_MODULO);
a687059c 2782 }
3280af22
NIS
2783 PL_tokenbuf[0] = '%';
2784 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2785 if (!PL_tokenbuf[1]) {
2786 if (s == PL_bufend)
bbce6d69
PP
2787 yyerror("Final % should be \\% or %name");
2788 PREREF('%');
a687059c 2789 }
3280af22 2790 PL_pending_ident = '%';
bbce6d69 2791 TERM('%');
a687059c 2792
378cc40b 2793 case '^':
79072805 2794 s++;
a0d0e21e 2795 BOop(OP_BIT_XOR);
79072805 2796 case '[':
3280af22 2797 PL_lex_brackets++;
79072805 2798 /* FALL THROUGH */
378cc40b 2799 case '~':
378cc40b 2800 case ',':
378cc40b
LW
2801 tmp = *s++;
2802 OPERATOR(tmp);
a0d0e21e
LW
2803 case ':':
2804 if (s[1] == ':') {
2805 len = 0;
2806 goto just_a_word;
2807 }
2808 s++;
09bef843
SB
2809 switch (PL_expect) {
2810 OP *attrs;
2811 case XOPERATOR:
2812 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2813 break;
2814 PL_bufptr = s; /* update in case we back off */
2815 goto grabattrs;
2816 case XATTRBLOCK:
2817 PL_expect = XBLOCK;
2818 goto grabattrs;
2819 case XATTRTERM:
2820 PL_expect = XTERMBLOCK;
2821 grabattrs:
2822 s = skipspace(s);
2823 attrs = Nullop;
7e2040f0 2824 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2825 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2826 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2827 if (tmp < 0) tmp = -tmp;
2828 switch (tmp) {
2829 case KEY_or:
2830 case KEY_and:
2831 case KEY_for:
2832 case KEY_unless:
2833 case KEY_if:
2834 case KEY_while:
2835 case KEY_until:
2836 goto got_attrs;
2837 default:
2838 break;
2839 }
2840 }
09bef843
SB
2841 if (*d == '(') {
2842 d = scan_str(d,TRUE,TRUE);
2843 if (!d) {
2844 if (PL_lex_stuff) {
2845 SvREFCNT_dec(PL_lex_stuff);
2846 PL_lex_stuff = Nullsv;
2847 }
2848 /* MUST advance bufptr here to avoid bogus
2849 "at end of line" context messages from yyerror().
2850 */
2851 PL_bufptr = s + len;
2852 yyerror("Unterminated attribute parameter in attribute list");
2853 if (attrs)
2854 op_free(attrs);
2855 return 0; /* EOF indicator */
2856 }
2857 }
2858 if (PL_lex_stuff) {
2859 SV *sv = newSVpvn(s, len);
2860 sv_catsv(sv, PL_lex_stuff);
2861 attrs = append_elem(OP_LIST, attrs,
2862 newSVOP(OP_CONST, 0, sv));
2863 SvREFCNT_dec(PL_lex_stuff);
2864 PL_lex_stuff = Nullsv;
2865 }
2866 else {
2867 attrs = append_elem(OP_LIST, attrs,
2868 newSVOP(OP_CONST, 0,
2869 newSVpvn(s, len)));
2870 }
2871 s = skipspace(d);
0120eecf 2872 if (*s == ':' && s[1] != ':')
09bef843 2873 s = skipspace(s+1);
0120eecf
GS
2874 else if (s == d)
2875 break; /* require real whitespace or :'s */
09bef843 2876 }
f9829d6b
GS
2877 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
2878 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
2879 char q = ((*s == '\'') ? '"' : '\'');
2880 /* If here for an expression, and parsed no attrs, back off. */
2881 if (tmp == '=' && !attrs) {
2882 s = PL_bufptr;
2883 break;
2884 }
2885 /* MUST advance bufptr here to avoid bogus "at end of line"
2886 context messages from yyerror().
2887 */
2888 PL_bufptr = s;
2889 if (!*s)
2890 yyerror("Unterminated attribute list");
2891 else
2892 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
2893 q, *s, q));
2894 if (attrs)
2895 op_free(attrs);
2896 OPERATOR(':');
2897 }
f9829d6b 2898 got_attrs:
09bef843
SB
2899 if (attrs) {
2900 PL_nextval[PL_nexttoke].opval = attrs;
2901 force_next(THING);
2902 }
2903 TOKEN(COLONATTR);
2904 }
a0d0e21e 2905 OPERATOR(':');
8990e307
LW
2906 case '(':
2907 s++;
3280af22
NIS
2908 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
2909 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 2910 else
3280af22 2911 PL_expect = XTERM;
a0d0e21e 2912 TOKEN('(');
378cc40b 2913 case ';':
57843af0
GS
2914 if (CopLINE(PL_curcop) < PL_copline)
2915 PL_copline = CopLINE(PL_curcop);
378cc40b
LW
2916 tmp = *s++;
2917 OPERATOR(tmp);
2918 case ')':
378cc40b 2919 tmp = *s++;
16d20bd9
AD
2920 s = skipspace(s);
2921 if (*s == '{')
2922 PREBLOCK(tmp);
378cc40b 2923 TERM(tmp);
79072805
LW
2924 case ']':
2925 s++;
3280af22 2926 if (PL_lex_brackets <= 0)
d98d5fff 2927 yyerror("Unmatched right square bracket");
463ee0b2 2928 else
3280af22
NIS
2929 --PL_lex_brackets;
2930 if (PL_lex_state == LEX_INTERPNORMAL) {
2931 if (PL_lex_brackets == 0) {
a0d0e21e 2932 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 2933 PL_lex_state = LEX_INTERPEND;
79072805
LW
2934 }
2935 }
4633a7c4 2936 TERM(']');
79072805
LW
2937 case '{':
2938 leftbracket:
79072805 2939 s++;
3280af22
NIS
2940 if (PL_lex_brackets > 100) {
2941 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
2942 if (newlb != PL_lex_brackstack) {
8990e307 2943 SAVEFREEPV(newlb);
3280af22 2944 PL_lex_brackstack = newlb;
8990e307
LW
2945 }
2946 }
3280af22 2947 switch (PL_expect) {
a0d0e21e 2948 case XTERM:
3280af22 2949 if (PL_lex_formbrack) {
a0d0e21e
LW
2950 s--;
2951 PRETERMBLOCK(DO);
2952 }
3280af22
NIS
2953 if (PL_oldoldbufptr == PL_last_lop)
2954 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2955 else
3280af22 2956 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 2957 OPERATOR(HASHBRACK);
a0d0e21e 2958 case XOPERATOR:
3280af22 2959 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306 2960 s++;
44a8e56a 2961 d = s;
3280af22
NIS
2962 PL_tokenbuf[0] = '\0';
2963 if (d < PL_bufend && *d == '-') {
2964 PL_tokenbuf[0] = '-';
44a8e56a 2965 d++;
3280af22 2966 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
44a8e56a
PP
2967 d++;
2968 }
7e2040f0 2969 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 2970 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 2971 FALSE, &len);
3280af22 2972 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
748a9306
LW
2973 d++;
2974 if (*d == '}') {
3280af22 2975 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
2976 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
2977 if (minus)
2978 force_next('-');
748a9306
LW
2979 }
2980 }
2981 /* FALL THROUGH */
09bef843 2982 case XATTRBLOCK:
748a9306 2983 case XBLOCK:
3280af22
NIS
2984 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
2985 PL_expect = XSTATE;
a0d0e21e 2986 break;
09bef843 2987 case XATTRTERM:
a0d0e21e 2988 case XTERMBLOCK:
3280af22
NIS
2989 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
2990 PL_expect = XSTATE;
a0d0e21e
LW
2991 break;
2992 default: {
2993 char *t;
3280af22
NIS
2994 if (PL_oldoldbufptr == PL_last_lop)
2995 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2996 else
3280af22 2997 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 2998 s = skipspace(s);
09ecc4b6 2999 if (*s == '}')
a0d0e21e 3000 OPERATOR(HASHBRACK);
b8a4b1be
GS
3001 /* This hack serves to disambiguate a pair of curlies
3002 * as being a block or an anon hash. Normally, expectation
3003 * determines that, but in cases where we're not in a
3004 * position to expect anything in particular (like inside
3005 * eval"") we have to resolve the ambiguity. This code
3006 * covers the case where the first term in the curlies is a
3007 * quoted string. Most other cases need to be explicitly
3008 * disambiguated by prepending a `+' before the opening
3009 * curly in order to force resolution as an anon hash.
3010 *
3011 * XXX should probably propagate the outer expectation
3012 * into eval"" to rely less on this hack, but that could
3013 * potentially break current behavior of eval"".
3014 * GSAR 97-07-21
3015 */
3016 t = s;
3017 if (*s == '\'' || *s == '"' || *s == '`') {
3018 /* common case: get past first string, handling escapes */
3280af22 3019 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3020 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3021 t++;
3022 t++;
a0d0e21e 3023 }
b8a4b1be 3024 else if (*s == 'q') {
3280af22 3025 if (++t < PL_bufend
b8a4b1be 3026 && (!isALNUM(*t)
3280af22 3027 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3028 && !isALNUM(*t))))
3029 {
b8a4b1be
GS
3030 char *tmps;
3031 char open, close, term;
3032 I32 brackets = 1;
3033
3280af22 3034 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3035 t++;
3036 term = *t;
3037 open = term;
3038 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3039 term = tmps[5];
3040 close = term;
3041 if (open == close)
3280af22
NIS
3042 for (t++; t < PL_bufend; t++) {
3043 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3044 t++;
6d07e5e9 3045 else if (*t == open)
b8a4b1be
GS
3046 break;
3047 }
3048 else
3280af22
NIS
3049 for (t++; t < PL_bufend; t++) {
3050 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3051 t++;
6d07e5e9 3052 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3053 break;
3054 else if (*t == open)
3055 brackets++;
3056 }
3057 }
3058 t++;
a0d0e21e 3059 }
7e2040f0 3060 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3061 t += UTF8SKIP(t);
7e2040f0 3062 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3063 t += UTF8SKIP(t);
a0d0e21e 3064 }
3280af22 3065 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3066 t++;
b8a4b1be
GS
3067 /* if comma follows first term, call it an anon hash */
3068 /* XXX it could be a comma expression with loop modifiers */
3280af22 3069 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3070 || (*t == '=' && t[1] == '>')))
a0d0e21e 3071 OPERATOR(HASHBRACK);
3280af22 3072 if (PL_expect == XREF)
4e4e412b 3073 PL_expect = XTERM;
a0d0e21e 3074 else {
3280af22
NIS
3075 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3076 PL_expect = XSTATE;
a0d0e21e 3077 }
8990e307 3078 }
a0d0e21e 3079 break;
463ee0b2 3080 }
57843af0 3081 yylval.ival = CopLINE(PL_curcop);
79072805 3082 if (isSPACE(*s) || *s == '#')
3280af22 3083 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3084 TOKEN('{');
378cc40b 3085 case '}':
79072805
LW
3086 rightbracket:
3087 s++;
3280af22 3088 if (PL_lex_brackets <= 0)
d98d5fff 3089 yyerror("Unmatched right curly bracket");
463ee0b2 3090 else
3280af22
NIS
3091 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3092 if (PL_lex_brackets < PL_lex_formbrack)
3093 PL_lex_formbrack = 0;
3094 if (PL_lex_state == LEX_INTERPNORMAL) {
3095 if (PL_lex_brackets == 0) {
9059aa12
LW
3096 if (PL_expect & XFAKEBRACK) {
3097 PL_expect &= XENUMMASK;
3280af22
NIS
3098 PL_lex_state = LEX_INTERPEND;
3099 PL_bufptr = s;
cea2e8a9 3100 return yylex(); /* ignore fake brackets */
79072805 3101 }
fa83b5b6 3102 if (*s == '-' && s[1] == '>')
3280af22 3103 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3104 else if (*s != '[' && *s != '{')
3280af22 3105 PL_lex_state = LEX_INTERPEND;
79072805
LW
3106 }
3107 }
9059aa12
LW
3108 if (PL_expect & XFAKEBRACK) {
3109 PL_expect &= XENUMMASK;
3280af22 3110 PL_bufptr = s;
cea2e8a9 3111 return yylex(); /* ignore fake brackets */
748a9306 3112 }
79072805
LW
3113 force_next('}');
3114 TOKEN(';');
378cc40b
LW
3115 case '&':
3116 s++;
3117 tmp = *s++;
3118 if (tmp == '&')
a0d0e21e 3119 AOPERATOR(ANDAND);
378cc40b 3120 s--;
3280af22 3121 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3122 if (ckWARN(WARN_SEMICOLON)
3123 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3124 {
57843af0 3125 CopLINE_dec(PL_curcop);
cea2e8a9 3126 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3127 CopLINE_inc(PL_curcop);
463ee0b2 3128 }
79072805 3129 BAop(OP_BIT_AND);
463ee0b2 3130 }
79072805 3131
3280af22
NIS
3132 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3133 if (*PL_tokenbuf) {
3134 PL_expect = XOPERATOR;
3135 force_ident(PL_tokenbuf, '&');
463ee0b2 3136 }
79072805
LW
3137 else
3138 PREREF('&');
c07a80fd 3139 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3140 TERM('&');
3141
378cc40b
LW
3142 case '|':
3143 s++;
3144 tmp = *s++;
3145 if (tmp == '|')
a0d0e21e 3146 AOPERATOR(OROR);
378cc40b 3147 s--;
79072805 3148 BOop(OP_BIT_OR);
378cc40b
LW
3149 case '=':
3150 s++;
3151 tmp = *s++;
3152 if (tmp == '=')
79072805
LW
3153 Eop(OP_EQ);
3154 if (tmp == '>')
3155 OPERATOR(',');
378cc40b 3156 if (tmp == '~')
79072805 3157 PMop(OP_MATCH);
599cee73 3158 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3159 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3160 s--;
3280af22
NIS
3161 if (PL_expect == XSTATE && isALPHA(tmp) &&
3162 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3163 {
3280af22
NIS
3164 if (PL_in_eval && !PL_rsfp) {
3165 d = PL_bufend;
a5f75d66
AD
3166 while (s < d) {
3167 if (*s++ == '\n') {
3168 incline(s);
3169 if (strnEQ(s,"=cut",4)) {
3170 s = strchr(s,'\n');
3171 if (s)
3172 s++;
3173 else
3174 s = d;
3175 incline(s);
3176 goto retry;
3177 }
3178 }
3179 }
3180 goto retry;
3181 }
3280af22
NIS
3182 s = PL_bufend;
3183 PL_doextract = TRUE;
a0d0e21e
LW
3184 goto retry;
3185 }
3280af22 3186 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3187 char *t;
51882d45 3188#ifdef PERL_STRICT_CR
a0d0e21e 3189 for (t = s; *t == ' ' || *t == '\t'; t++) ;
51882d45
GS
3190#else
3191 for (t = s; *t == ' ' || *t == '\t' || *t == '\r'; t++) ;
3192#endif
a0d0e21e
LW
3193 if (*t == '\n' || *t == '#') {
3194 s--;
3280af22 3195 PL_expect = XBLOCK;
a0d0e21e
LW
3196 goto leftbracket;
3197 }
79072805 3198 }
a0d0e21e
LW
3199 yylval.ival = 0;
3200 OPERATOR(ASSIGNOP);
378cc40b
LW
3201 case '!':
3202 s++;
3203 tmp = *s++;
3204 if (tmp == '=')
79072805 3205 Eop(OP_NE);
378cc40b 3206 if (tmp == '~')
79072805 3207 PMop(OP_NOT);
378cc40b
LW
3208 s--;
3209 OPERATOR('!');
3210 case '<':
3280af22 3211 if (PL_expect != XOPERATOR) {
93a17b20 3212 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3213 check_uni();
79072805
LW
3214 if (s[1] == '<')
3215 s = scan_heredoc(s);
3216 else
3217 s = scan_inputsymbol(s);
3218 TERM(sublex_start());
378cc40b
LW
3219 }
3220 s++;
3221 tmp = *s++;
3222 if (tmp == '<')
79072805 3223 SHop(OP_LEFT_SHIFT);
395c3793
LW
3224 if (tmp == '=') {
3225 tmp = *s++;
3226 if (tmp == '>')
79072805 3227 Eop(OP_NCMP);
395c3793 3228 s--;
79072805 3229 Rop(OP_LE);
395c3793 3230 }
378cc40b 3231 s--;
79072805 3232 Rop(OP_LT);
378cc40b
LW
3233 case '>':
3234 s++;
3235 tmp = *s++;
3236 if (tmp == '>')
79072805 3237 SHop(OP_RIGHT_SHIFT);
378cc40b 3238 if (tmp == '=')
79072805 3239 Rop(OP_GE);
378cc40b 3240 s--;
79072805 3241 Rop(OP_GT);
378cc40b
LW
3242
3243 case '$':
bbce6d69
PP
3244 CLINE;
3245
3280af22
NIS
3246 if (PL_expect == XOPERATOR) {
3247 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3248 PL_expect = XTERM;
a0d0e21e 3249 depcom();
bbce6d69 3250 return ','; /* grandfather non-comma-format format */
a0d0e21e 3251 }
8990e307 3252 }
a0d0e21e 3253
7e2040f0 3254 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3255 PL_tokenbuf[0] = '@';
376b8730
SM
3256 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3257 sizeof PL_tokenbuf - 1, FALSE);
3258 if (PL_expect == XOPERATOR)
3259 no_op("Array length", s);
3280af22 3260 if (!PL_tokenbuf[1])
a0d0e21e 3261 PREREF(DOLSHARP);
3280af22
NIS
3262 PL_expect = XOPERATOR;
3263 PL_pending_ident = '#';
463ee0b2 3264 TOKEN(DOLSHARP);
79072805 3265 }
bbce6d69 3266
3280af22 3267 PL_tokenbuf[0] = '$';
376b8730
SM
3268 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3269 sizeof PL_tokenbuf - 1, FALSE);
3270 if (PL_expect == XOPERATOR)
3271 no_op("Scalar", s);
3280af22
NIS
3272 if (!PL_tokenbuf[1]) {
3273 if (s == PL_bufend)
bbce6d69
PP
3274 yyerror("Final $ should be \\$ or $name");
3275 PREREF('$');
8990e307 3276 }
a0d0e21e 3277
bbce6d69 3278 /* This kludge not intended to be bulletproof. */
3280af22 3279 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3280 yylval.opval = newSVOP(OP_CONST, 0,
3280af22 3281 newSViv((IV)PL_compiling.cop_arybase));
bbce6d69
PP
3282 yylval.opval->op_private = OPpCONST_ARYBASE;
3283 TERM(THING);
3284 }
3285
ff68c719 3286 d = s;
69d2bceb 3287 tmp = (I32)*s;
3280af22 3288 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3289 s = skipspace(s);
3290
3280af22 3291 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3292 char *t;
3293 if (*s == '[') {
3280af22 3294 PL_tokenbuf[0] = '@';
599cee73 3295 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3296 for(t = s + 1;
7e2040f0 3297 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3298 t++) ;
a0d0e21e 3299 if (*t++ == ',') {
3280af22
NIS
3300 PL_bufptr = skipspace(PL_bufptr);
3301 while (t < PL_bufend && *t != ']')
bbce6d69 3302 t++;
cea2e8a9 3303 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3304 "Multidimensional syntax %.*s not supported",
3305 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3306 }
3307 }
bbce6d69
PP
3308 }
3309 else if (*s == '{') {
3280af22 3310 PL_tokenbuf[0] = '%';
599cee73 3311 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69
PP
3312 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3313 {
3280af22 3314 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3315 STRLEN len;
3316 for (t++; isSPACE(*t); t++) ;
7e2040f0 3317 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3318 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3319 for (; isSPACE(*t); t++) ;
864dbfa3 3320 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3321 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3322 "You need to quote \"%s\"", tmpbuf);
748a9306 3323 }
93a17b20
LW
3324 }
3325 }
2f3197b3 3326 }
bbce6d69 3327
3280af22 3328 PL_expect = XOPERATOR;
69d2bceb 3329 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3330 bool islop = (PL_last_lop == PL_oldoldbufptr);
3331 if (!islop || PL_last_lop_op == OP_GREPSTART)
3332 PL_expect = XOPERATOR;
bbce6d69 3333 else if (strchr("$@\"'`q", *s))
3280af22 3334 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3335 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3336 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3337 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3338 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3339 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3340 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3341 /* binary operators exclude handle interpretations */
3342 switch (tmp) {
3343 case -KEY_x:
3344 case -KEY_eq:
3345 case -KEY_ne:
3346 case -KEY_gt:
3347 case -KEY_lt:
3348 case -KEY_ge:
3349 case -KEY_le:
3350 case -KEY_cmp:
3351 break;
3352 default:
3280af22 3353 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3354 break;
3355 }
3356 }
68dc0745
PP
3357 else {
3358 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3359 if (gv && GvCVu(gv))
3280af22 3360 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3361 }
93a17b20 3362 }
bbce6d69 3363 else if (isDIGIT(*s))
3280af22 3364 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3365 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3366 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3367 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3368 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3369 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3370 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3371 }
3280af22 3372 PL_pending_ident = '$';
79072805 3373 TOKEN('$');
378cc40b
LW
3374
3375 case '@':
3280af22 3376 if (PL_expect == XOPERATOR)
bbce6d69 3377 no_op("Array", s);
3280af22
NIS
3378 PL_tokenbuf[0] = '@';
3379 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3380 if (!PL_tokenbuf[1]) {
3381 if (s == PL_bufend)
bbce6d69
PP
3382 yyerror("Final @ should be \\@ or @name");
3383 PREREF('@');
3384 }
3280af22 3385 if (PL_lex_state == LEX_NORMAL)
ff68c719 3386 s = skipspace(s);
3280af22 3387 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3388 if (*s == '{')
3280af22 3389 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3390
3391 /* Warn about @ where they meant $. */
599cee73 3392 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3393 if (*s == '[' || *s == '{') {
3394 char *t = s + 1;
7e2040f0 3395 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3396 t++;
3397 if (*t == '}' || *t == ']') {
3398 t++;
3280af22 3399 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3400 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3401 "Scalar value %.*s better written as $%.*s",
3280af22 3402 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3403 }
93a17b20
LW
3404 }
3405 }
463ee0b2 3406 }
3280af22 3407 PL_pending_ident = '@';
79072805 3408 TERM('@');
378cc40b
LW
3409
3410 case '/': /* may either be division or pattern */
3411 case '?': /* may either be conditional or pattern */
3280af22 3412 if (PL_expect != XOPERATOR) {
c277df42 3413 /* Disable warning on "study /blah/" */
3280af22
NIS
3414 if (PL_oldoldbufptr == PL_last_uni
3415 && (*PL_last_uni != 's' || s - PL_last_uni < 5
7e2040f0
GS
3416 || memNE(PL_last_uni, "study", 5)
3417 || isALNUM_lazy_if(PL_last_uni+5,UTF)))
c277df42 3418 check_uni();
8782bef2 3419 s = scan_pat(s,OP_MATCH);
79072805 3420 TERM(sublex_start());
378cc40b
LW
3421 }
3422 tmp = *s++;
a687059c 3423 if (tmp == '/')
79072805 3424 Mop(OP_DIVIDE);
378cc40b
LW
3425 OPERATOR(tmp);
3426
3427 case '.':
51882d45
GS
3428 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3429#ifdef PERL_STRICT_CR
3430 && s[1] == '\n'
3431#else
3432 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3433#endif
3434 && (s == PL_linestart || s[-1] == '\n') )
3435 {
3280af22
NIS
3436 PL_lex_formbrack = 0;
3437 PL_expect = XSTATE;
79072805
LW
3438 goto rightbracket;
3439 }
3280af22 3440 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3441 tmp = *s++;
a687059c
LW
3442 if (*s == tmp) {
3443 s++;
2f3197b3
LW
3444 if (*s == tmp) {
3445 s++;
79072805 3446 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3447 }
3448 else
79072805 3449 yylval.ival = 0;
378cc40b 3450 OPERATOR(DOTDOT);
a687059c 3451 }
3280af22 3452 if (PL_expect != XOPERATOR)
2f3197b3 3453 check_uni();
79072805 3454 Aop(OP_CONCAT);
378cc40b
LW
3455 }
3456 /* FALL THROUGH */
3457 case '0': case '1': case '2': case '3': case '4':
3458 case '5': case '6': case '7': case '8': case '9':
79072805 3459 s = scan_num(s);
3280af22 3460 if (PL_expect == XOPERATOR)
8990e307 3461 no_op("Number",s);
79072805
LW
3462 TERM(THING);
3463
3464 case '\'':
09bef843 3465 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3466 if (PL_expect == XOPERATOR) {
3467 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3468 PL_expect = XTERM;
a0d0e21e
LW
3469 depcom();
3470 return ','; /* grandfather non-comma-format format */
3471 }
463ee0b2 3472 else
8990e307 3473 no_op("String",s);
463ee0b2 3474 }
79072805 3475 if (!s)
85e6fe83 3476 missingterm((char*)0);
79072805
LW
3477 yylval.ival = OP_CONST;
3478 TERM(sublex_start());
3479
3480 case '"':
09bef843 3481 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3482 if (PL_expect == XOPERATOR) {
3483 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3484 PL_expect = XTERM;
a0d0e21e
LW
3485 depcom();
3486 return ','; /* grandfather non-comma-format format */
3487 }
463ee0b2 3488 else
8990e307 3489 no_op("String",s);
463ee0b2 3490 }
79072805 3491 if (!s)
85e6fe83 3492 missingterm((char*)0);
4633a7c4 3493 yylval.ival = OP_CONST;
3280af22 3494 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
a0ed51b3 3495 if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {
4633a7c4
LW
3496 yylval.ival = OP_STRINGIFY;
3497 break;
3498 }
3499 }
79072805
LW
3500 TERM(sublex_start());
3501
3502 case '`':
09bef843 3503 s = scan_str(s,FALSE,FALSE);
3280af22 3504 if (PL_expect == XOPERATOR)
8990e307 3505 no_op("Backticks",s);
79072805 3506 if (!s)
85e6fe83 3507 missingterm((char*)0);
79072805
LW
3508 yylval.ival = OP_BACKTICK;
3509 set_csh();
3510 TERM(sublex_start());
3511
3512 case '\\':
3513 s++;
599cee73 3514 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
cea2e8a9 3515 Perl_warner(aTHX_ WARN_SYNTAX,"Can't use \\%c to mean $%c in expression",
599cee73 3516 *s, *s);
3280af22 3517 if (PL_expect == XOPERATOR)
8990e307 3518 no_op("Backslash",s);
79072805
LW
3519 OPERATOR(REFGEN);
3520
a7cb1f99 3521 case 'v':
e526c9e6 3522 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
a7cb1f99
GS
3523 char *start = s;
3524 start++;
3525 start++;
dd629d5b 3526 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
3527 start++;
3528 if (*start == '.' && isDIGIT(start[1])) {
3529 s = scan_num(s);
3530 TERM(THING);
3531 }
e526c9e6
GS
3532 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
3533 else if (!isALPHA(*start) && (PL_expect == XTERM || PL_expect == XREF)) {
3534 char c = *start;
3535 GV *gv;
3536 *start = '\0';
3537 gv = gv_fetchpv(s, FALSE, SVt_PVCV);
3538 *start = c;
3539 if (!gv) {
3540 s = scan_num(s);
3541 TERM(THING);
3542 }
3543 }
a7cb1f99
GS
3544 }
3545 goto keylookup;
79072805 3546 case 'x':
3280af22 3547 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
3548 s++;
3549 Mop(OP_REPEAT);
2f3197b3 3550 }
79072805
LW
3551 goto keylookup;
3552
378cc40b 3553 case '_':
79072805
LW
3554 case 'a': case 'A':
3555 case 'b': case 'B':
3556 case 'c': case 'C':
3557 case 'd': case 'D':
3558 case 'e': case 'E':
3559 case 'f': case 'F':
3560 case 'g': case 'G':
3561 case 'h': case 'H':
3562 case 'i': case 'I':
3563 case 'j': case 'J':
3564 case 'k': case 'K':
3565 case 'l': case 'L':
3566 case 'm': case 'M':
3567 case 'n': case 'N':
3568 case 'o': case 'O':
3569 case 'p': case 'P':
3570 case 'q': case 'Q':
3571 case 'r': case 'R':
3572 case 's': case 'S':
3573 case 't': case 'T':
3574 case 'u': case 'U':
a7cb1f99 3575 case 'V':
79072805
LW
3576 case 'w': case 'W':
3577 case 'X':
3578 case 'y': case 'Y':
3579 case 'z': case 'Z':
3580
49dc05e3 3581 keylookup: {
161b471a
NIS
3582 gv = Nullgv;
3583 gvp = 0;
49dc05e3 3584
3280af22
NIS
3585 PL_bufptr = s;
3586 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01
PP
3587
3588 /* Some keywords can be followed by any delimiter, including ':' */
155aba94
GS
3589 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
3590 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3591 (PL_tokenbuf[0] == 'q' &&
3592 strchr("qwxr", PL_tokenbuf[1])))));
8ebc5c01
PP
3593
3594 /* x::* is just a word, unless x is "CORE" */
3280af22 3595 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
3596 goto just_a_word;
3597
3643fb5f