This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
new perldelta
[perl5.git] / toke.c
... / ...
CommitLineData
1/* toke.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
15 */
16
17/*
18 * This file is the lexer for Perl. It's closely linked to the
19 * parser, perly.y.
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
24/*
25=head1 Lexer interface
26This is the lower layer of the Perl parser, managing characters and tokens.
27
28=for apidoc AmnU|yy_parser *|PL_parser
29
30Pointer to a structure encapsulating the state of the parsing operation
31currently in progress. The pointer can be locally changed to perform
32a nested parse without interfering with the state of an outer parse.
33Individual members of C<PL_parser> have their own documentation.
34
35=cut
36*/
37
38#include "EXTERN.h"
39#define PERL_IN_TOKE_C
40#include "perl.h"
41#include "invlist_inline.h"
42
43#define new_constant(a,b,c,d,e,f,g, h) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
45
46#define pl_yylval (PL_parser->yylval)
47
48/* XXX temporary backwards compatibility */
49#define PL_lex_brackets (PL_parser->lex_brackets)
50#define PL_lex_allbrackets (PL_parser->lex_allbrackets)
51#define PL_lex_fakeeof (PL_parser->lex_fakeeof)
52#define PL_lex_brackstack (PL_parser->lex_brackstack)
53#define PL_lex_casemods (PL_parser->lex_casemods)
54#define PL_lex_casestack (PL_parser->lex_casestack)
55#define PL_lex_dojoin (PL_parser->lex_dojoin)
56#define PL_lex_formbrack (PL_parser->lex_formbrack)
57#define PL_lex_inpat (PL_parser->lex_inpat)
58#define PL_lex_inwhat (PL_parser->lex_inwhat)
59#define PL_lex_op (PL_parser->lex_op)
60#define PL_lex_repl (PL_parser->lex_repl)
61#define PL_lex_starts (PL_parser->lex_starts)
62#define PL_lex_stuff (PL_parser->lex_stuff)
63#define PL_multi_start (PL_parser->multi_start)
64#define PL_multi_open (PL_parser->multi_open)
65#define PL_multi_close (PL_parser->multi_close)
66#define PL_preambled (PL_parser->preambled)
67#define PL_linestr (PL_parser->linestr)
68#define PL_expect (PL_parser->expect)
69#define PL_copline (PL_parser->copline)
70#define PL_bufptr (PL_parser->bufptr)
71#define PL_oldbufptr (PL_parser->oldbufptr)
72#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
73#define PL_linestart (PL_parser->linestart)
74#define PL_bufend (PL_parser->bufend)
75#define PL_last_uni (PL_parser->last_uni)
76#define PL_last_lop (PL_parser->last_lop)
77#define PL_last_lop_op (PL_parser->last_lop_op)
78#define PL_lex_state (PL_parser->lex_state)
79#define PL_rsfp (PL_parser->rsfp)
80#define PL_rsfp_filters (PL_parser->rsfp_filters)
81#define PL_in_my (PL_parser->in_my)
82#define PL_in_my_stash (PL_parser->in_my_stash)
83#define PL_tokenbuf (PL_parser->tokenbuf)
84#define PL_multi_end (PL_parser->multi_end)
85#define PL_error_count (PL_parser->error_count)
86
87# define PL_nexttoke (PL_parser->nexttoke)
88# define PL_nexttype (PL_parser->nexttype)
89# define PL_nextval (PL_parser->nextval)
90
91
92#define SvEVALED(sv) \
93 (SvTYPE(sv) >= SVt_PVNV \
94 && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
95
96static const char ident_too_long[] = "Identifier too long";
97static const char ident_var_zero_multi_digit[] = "Numeric variables with more than one digit may not start with '0'";
98
99# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
100
101#define XENUMMASK 0x3f
102#define XFAKEEOF 0x40
103#define XFAKEBRACK 0x80
104
105#ifdef USE_UTF8_SCRIPTS
106# define UTF cBOOL(!IN_BYTES)
107#else
108# define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
109#endif
110
111/* The maximum number of characters preceding the unrecognized one to display */
112#define UNRECOGNIZED_PRECEDE_COUNT 10
113
114/* In variables named $^X, these are the legal values for X.
115 * 1999-02-27 mjd-perl-patch@plover.com */
116#define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x)))
117
118/* Non-identifier plugin infix operators are allowed any printing character
119 * except spaces, digits, or identifier chars
120 */
121#define isPLUGINFIX(c) (c && !isSPACE(c) && !isDIGIT(c) && !isALPHA(c))
122/* Plugin infix operators may not begin with a quote symbol */
123#define isPLUGINFIX_FIRST(c) (isPLUGINFIX(c) && c != '"' && c != '\'')
124
125#define PLUGINFIX_IS_ENABLED UNLIKELY(PL_infix_plugin != &Perl_infix_plugin_standard)
126
127#define SPACE_OR_TAB(c) isBLANK_A(c)
128
129#define HEXFP_PEEK(s) \
130 (((s[0] == '.') && \
131 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
132 isALPHA_FOLD_EQ(s[0], 'p'))
133
134/* LEX_* are values for PL_lex_state, the state of the lexer.
135 * They are arranged oddly so that the guard on the switch statement
136 * can get by with a single comparison (if the compiler is smart enough).
137 *
138 * These values refer to the various states within a sublex parse,
139 * i.e. within a double quotish string
140 */
141
142/* #define LEX_NOTPARSING 11 is done in perl.h. */
143
144#define LEX_NORMAL 10 /* normal code (ie not within "...") */
145#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
146#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
147#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
148#define LEX_INTERPSTART 6 /* expecting the start of a $var */
149
150 /* at end of code, eg "$x" followed by: */
151#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
152#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
153
154#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
155 string or after \E, $foo, etc */
156#define LEX_INTERPCONST 2 /* NOT USED */
157#define LEX_FORMLINE 1 /* expecting a format line */
158
159/* returned to yyl_try() to request it to retry the parse loop, expected to only
160 be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof()
161 can also return it.
162
163 yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1,
164 other token values are 258 or higher (see perly.h), so -1 should be
165 a safe value here.
166*/
167#define YYL_RETRY (-1)
168
169#ifdef DEBUGGING
170static const char* const lex_state_names[] = {
171 "KNOWNEXT",
172 "FORMLINE",
173 "INTERPCONST",
174 "INTERPCONCAT",
175 "INTERPENDMAYBE",
176 "INTERPEND",
177 "INTERPSTART",
178 "INTERPPUSH",
179 "INTERPCASEMOD",
180 "INTERPNORMAL",
181 "NORMAL"
182};
183#endif
184
185#include "keywords.h"
186
187/* CLINE is a macro that ensures PL_copline has a sane value */
188
189#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
190
191/*
192 * Convenience functions to return different tokens and prime the
193 * lexer for the next token. They all take an argument.
194 *
195 * TOKEN : generic token (used for '(', DOLSHARP, etc)
196 * OPERATOR : generic operator
197 * AOPERATOR : assignment operator
198 * PREBLOCK : beginning the block after an if, while, foreach, ...
199 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
200 * PREREF : *EXPR where EXPR is not a simple identifier
201 * TERM : expression term
202 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
203 * LOOPX : loop exiting command (goto, last, dump, etc)
204 * FTST : file test operator
205 * FUN0 : zero-argument function
206 * FUN0OP : zero-argument function, with its op created in this file
207 * FUN1 : not used, except for not, which isn't a UNIOP
208 * BOop : bitwise or or xor
209 * BAop : bitwise and
210 * BCop : bitwise complement
211 * SHop : shift operator
212 * PWop : power operator
213 * PMop : pattern-matching operator
214 * Aop : addition-level operator
215 * AopNOASSIGN : addition-level operator that is never part of .=
216 * Mop : multiplication-level operator
217 * ChEop : chaining equality-testing operator
218 * NCEop : non-chaining comparison operator at equality precedence
219 * ChRop : chaining relational operator <= != gt
220 * NCRop : non-chaining relational operator isa
221 *
222 * Also see LOP and lop() below.
223 */
224
225#ifdef DEBUGGING /* Serve -DT. */
226# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
227#else
228# define REPORT(retval) (retval)
229#endif
230
231#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
232#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
233#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
234#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
235#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
236#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
237#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
238#define PHASERBLOCK(f) return (pl_yylval.ival=f, PL_expect = XBLOCK, PL_bufptr = s, REPORT((int)PHASER))
239#define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
240#define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
241 pl_yylval.ival=f, \
242 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
243 REPORT((int)LOOPEX))
244#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
245#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
246#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
247#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
248#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
249#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
250#define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
251 REPORT(PERLY_TILDE)
252#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
253#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
254#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
255#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
256#define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
257#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
258#define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP))
259#define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP))
260#define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP))
261#define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP))
262
263/* This bit of chicanery makes a unary function followed by
264 * a parenthesis into a function with one argument, highest precedence.
265 * The UNIDOR macro is for unary functions that can be followed by the //
266 * operator (such as C<shift // 0>).
267 */
268#define UNI3(f,x,have_x) { \
269 pl_yylval.ival = f; \
270 if (have_x) PL_expect = x; \
271 PL_bufptr = s; \
272 PL_last_uni = PL_oldbufptr; \
273 PL_last_lop_op = (f) < 0 ? -(f) : (f); \
274 if (*s == '(') \
275 return REPORT( (int)FUNC1 ); \
276 s = skipspace(s); \
277 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
278 }
279#define UNI(f) UNI3(f,XTERM,1)
280#define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
281#define UNIPROTO(f,optional) { \
282 if (optional) PL_last_uni = PL_oldbufptr; \
283 OPERATOR(f); \
284 }
285
286#define UNIBRACK(f) UNI3(f,0,0)
287
288/* return has special case parsing.
289 *
290 * List operators have low precedence. Functions have high precedence.
291 * Every built in, *except return*, if written with () around its arguments, is
292 * parsed as a function. Hence every other list built in:
293 *
294 * $ perl -lwe 'sub foo { join 2,4,6 * 1.5 } print for foo()' # join 2,4,9
295 * 429
296 * $ perl -lwe 'sub foo { join(2,4,6) * 1.5 } print for foo()' # 426 * 1.5
297 * 639
298 * $ perl -lwe 'sub foo { join+(2,4,6) * 1.5 } print for foo()'
299 * Useless use of a constant (2) in void context at -e line 1.
300 * Useless use of a constant (4) in void context at -e line 1.
301 *
302 * $
303 *
304 * empty line output because C<(2, 4, 6) * 1.5> is the comma operator, not a
305 * list. * forces scalar context, 6 * 1.5 is 9, and join(9) is the empty string.
306 *
307 * Whereas return:
308 *
309 * $ perl -lwe 'sub foo { return 2,4,6 * 1.5 } print for foo()'
310 * 2
311 * 4
312 * 9
313 * $ perl -lwe 'sub foo { return(2,4,6) * 1.5 } print for foo()'
314 * Useless use of a constant (2) in void context at -e line 1.
315 * Useless use of a constant (4) in void context at -e line 1.
316 * 9
317 * $ perl -lwe 'sub foo { return+(2,4,6) * 1.5 } print for foo()'
318 * Useless use of a constant (2) in void context at -e line 1.
319 * Useless use of a constant (4) in void context at -e line 1.
320 * 9
321 * $
322 *
323 * and:
324 * $ perl -lwe 'sub foo { return(2,4,6) } print for foo()'
325 * 2
326 * 4
327 * 6
328 *
329 * This last example is what we expect, but it's clearly inconsistent with how
330 * C<return(2,4,6) * 1.5> *ought* to behave, if the rules were consistently
331 * followed.
332 *
333 *
334 * Perl 3 attempted to be consistent:
335 *
336 * The rules are more consistent about where parens are needed and
337 * where they are not. In particular, unary operators and list operators now
338 * behave like functions if they're called like functions.
339 *
340 * However, the behaviour for return was reverted to the "old" parsing with
341 * patches 9-12:
342 *
343 * The construct
344 * return (1,2,3);
345 * did not do what was expected, since return was swallowing the
346 * parens in order to consider itself a function. The solution,
347 * since return never wants any trailing expression such as
348 * return (1,2,3) + 2;
349 * is to simply make return an exception to the paren-makes-a-function
350 * rule, and treat it the way it always was, so that it doesn't
351 * strip the parens.
352 *
353 * To demonstrate the special-case parsing, replace OLDLOP(OP_RETURN); with
354 * LOP(OP_RETURN, XTERM);
355 *
356 * and constructs such as
357 *
358 * return (Internals::V())[2]
359 *
360 * turn into syntax errors
361 */
362
363#define OLDLOP(f) \
364 do { \
365 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
366 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
367 pl_yylval.ival = (f); \
368 PL_expect = XTERM; \
369 PL_bufptr = s; \
370 return (int)LSTOP; \
371 } while(0)
372
373#define COPLINE_INC_WITH_HERELINES \
374 STMT_START { \
375 CopLINE_inc(PL_curcop); \
376 if (PL_parser->herelines) \
377 CopLINE(PL_curcop) += PL_parser->herelines, \
378 PL_parser->herelines = 0; \
379 } STMT_END
380/* Called after scan_str to update CopLINE(PL_curcop), but only when there
381 * is no sublex_push to follow. */
382#define COPLINE_SET_FROM_MULTI_END \
383 STMT_START { \
384 CopLINE_set(PL_curcop, PL_multi_end); \
385 if (PL_multi_end != PL_multi_start) \
386 PL_parser->herelines = 0; \
387 } STMT_END
388
389
390/* A file-local structure for passing around information about subroutines and
391 * related definable words */
392struct code {
393 SV *sv;
394 CV *cv;
395 GV *gv, **gvp;
396 OP *rv2cv_op;
397 PADOFFSET off;
398 bool lex;
399};
400
401static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE };
402
403#ifdef DEBUGGING
404
405/* how to interpret the pl_yylval associated with the token */
406enum token_type {
407 TOKENTYPE_NONE,
408 TOKENTYPE_IVAL,
409 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
410 TOKENTYPE_PVAL,
411 TOKENTYPE_OPVAL
412};
413
414#define DEBUG_TOKEN(Type, Name) \
415 { Name, TOKENTYPE_##Type, #Name }
416
417static struct debug_tokens {
418 const int token;
419 enum token_type type;
420 const char *name;
421} const debug_tokens[] =
422{
423 DEBUG_TOKEN (OPNUM, ADDOP),
424 DEBUG_TOKEN (NONE, ANDAND),
425 DEBUG_TOKEN (NONE, ANDOP),
426 DEBUG_TOKEN (NONE, ARROW),
427 DEBUG_TOKEN (OPNUM, ASSIGNOP),
428 DEBUG_TOKEN (OPNUM, BITANDOP),
429 DEBUG_TOKEN (OPNUM, BITOROP),
430 DEBUG_TOKEN (OPNUM, CHEQOP),
431 DEBUG_TOKEN (OPNUM, CHRELOP),
432 DEBUG_TOKEN (NONE, COLONATTR),
433 DEBUG_TOKEN (NONE, DOLSHARP),
434 DEBUG_TOKEN (NONE, DORDOR),
435 DEBUG_TOKEN (IVAL, DOTDOT),
436 DEBUG_TOKEN (NONE, FORMLBRACK),
437 DEBUG_TOKEN (NONE, FORMRBRACK),
438 DEBUG_TOKEN (OPNUM, FUNC),
439 DEBUG_TOKEN (OPNUM, FUNC0),
440 DEBUG_TOKEN (OPVAL, FUNC0OP),
441 DEBUG_TOKEN (OPVAL, FUNC0SUB),
442 DEBUG_TOKEN (OPNUM, FUNC1),
443 DEBUG_TOKEN (NONE, HASHBRACK),
444 DEBUG_TOKEN (IVAL, KW_CATCH),
445 DEBUG_TOKEN (IVAL, KW_CLASS),
446 DEBUG_TOKEN (IVAL, KW_CONTINUE),
447 DEBUG_TOKEN (IVAL, KW_DEFAULT),
448 DEBUG_TOKEN (IVAL, KW_DO),
449 DEBUG_TOKEN (IVAL, KW_ELSE),
450 DEBUG_TOKEN (IVAL, KW_ELSIF),
451 DEBUG_TOKEN (IVAL, KW_FIELD),
452 DEBUG_TOKEN (IVAL, KW_GIVEN),
453 DEBUG_TOKEN (IVAL, KW_FOR),
454 DEBUG_TOKEN (IVAL, KW_FORMAT),
455 DEBUG_TOKEN (IVAL, KW_IF),
456 DEBUG_TOKEN (IVAL, KW_LOCAL),
457 DEBUG_TOKEN (IVAL, KW_METHOD_anon),
458 DEBUG_TOKEN (IVAL, KW_METHOD_named),
459 DEBUG_TOKEN (IVAL, KW_MY),
460 DEBUG_TOKEN (IVAL, KW_PACKAGE),
461 DEBUG_TOKEN (IVAL, KW_REQUIRE),
462 DEBUG_TOKEN (IVAL, KW_SUB_anon),
463 DEBUG_TOKEN (IVAL, KW_SUB_anon_sig),
464 DEBUG_TOKEN (IVAL, KW_SUB_named),
465 DEBUG_TOKEN (IVAL, KW_SUB_named_sig),
466 DEBUG_TOKEN (IVAL, KW_TRY),
467 DEBUG_TOKEN (IVAL, KW_USE_or_NO),
468 DEBUG_TOKEN (IVAL, KW_UNLESS),
469 DEBUG_TOKEN (IVAL, KW_UNTIL),
470 DEBUG_TOKEN (IVAL, KW_WHEN),
471 DEBUG_TOKEN (IVAL, KW_WHILE),
472 DEBUG_TOKEN (OPVAL, LABEL),
473 DEBUG_TOKEN (OPNUM, LOOPEX),
474 DEBUG_TOKEN (OPNUM, LSTOP),
475 DEBUG_TOKEN (OPVAL, LSTOPSUB),
476 DEBUG_TOKEN (OPNUM, MATCHOP),
477 DEBUG_TOKEN (OPVAL, METHCALL),
478 DEBUG_TOKEN (OPVAL, METHCALL0),
479 DEBUG_TOKEN (OPNUM, MULOP),
480 DEBUG_TOKEN (OPNUM, NCEQOP),
481 DEBUG_TOKEN (OPNUM, NCRELOP),
482 DEBUG_TOKEN (NONE, NOAMP),
483 DEBUG_TOKEN (NONE, NOTOP),
484 DEBUG_TOKEN (IVAL, OROP),
485 DEBUG_TOKEN (NONE, OROR),
486 DEBUG_TOKEN (IVAL, PERLY_AMPERSAND),
487 DEBUG_TOKEN (IVAL, PERLY_BRACE_CLOSE),
488 DEBUG_TOKEN (IVAL, PERLY_BRACE_OPEN),
489 DEBUG_TOKEN (IVAL, PERLY_BRACKET_CLOSE),
490 DEBUG_TOKEN (IVAL, PERLY_BRACKET_OPEN),
491 DEBUG_TOKEN (IVAL, PERLY_COLON),
492 DEBUG_TOKEN (IVAL, PERLY_COMMA),
493 DEBUG_TOKEN (IVAL, PERLY_DOT),
494 DEBUG_TOKEN (IVAL, PERLY_EQUAL_SIGN),
495 DEBUG_TOKEN (IVAL, PERLY_EXCLAMATION_MARK),
496 DEBUG_TOKEN (IVAL, PERLY_MINUS),
497 DEBUG_TOKEN (IVAL, PERLY_PAREN_OPEN),
498 DEBUG_TOKEN (IVAL, PERLY_PERCENT_SIGN),
499 DEBUG_TOKEN (IVAL, PERLY_PLUS),
500 DEBUG_TOKEN (IVAL, PERLY_QUESTION_MARK),
501 DEBUG_TOKEN (IVAL, PERLY_SEMICOLON),
502 DEBUG_TOKEN (IVAL, PERLY_SLASH),
503 DEBUG_TOKEN (IVAL, PERLY_SNAIL),
504 DEBUG_TOKEN (IVAL, PERLY_STAR),
505 DEBUG_TOKEN (IVAL, PERLY_TILDE),
506 DEBUG_TOKEN (OPVAL, PLUGEXPR),
507 DEBUG_TOKEN (OPVAL, PLUGSTMT),
508 DEBUG_TOKEN (PVAL, PLUGIN_ADD_OP),
509 DEBUG_TOKEN (PVAL, PLUGIN_ASSIGN_OP),
510 DEBUG_TOKEN (PVAL, PLUGIN_HIGH_OP),
511 DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_AND_OP),
512 DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_OR_OP),
513 DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_AND_LOW_OP),
514 DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_OR_LOW_OP),
515 DEBUG_TOKEN (PVAL, PLUGIN_LOW_OP),
516 DEBUG_TOKEN (PVAL, PLUGIN_MUL_OP),
517 DEBUG_TOKEN (PVAL, PLUGIN_POW_OP),
518 DEBUG_TOKEN (PVAL, PLUGIN_REL_OP),
519 DEBUG_TOKEN (OPVAL, PMFUNC),
520 DEBUG_TOKEN (NONE, POSTJOIN),
521 DEBUG_TOKEN (NONE, POSTDEC),
522 DEBUG_TOKEN (NONE, POSTINC),
523 DEBUG_TOKEN (OPNUM, POWOP),
524 DEBUG_TOKEN (NONE, PREDEC),
525 DEBUG_TOKEN (NONE, PREINC),
526 DEBUG_TOKEN (OPVAL, PRIVATEREF),
527 DEBUG_TOKEN (OPVAL, QWLIST),
528 DEBUG_TOKEN (NONE, REFGEN),
529 DEBUG_TOKEN (OPNUM, SHIFTOP),
530 DEBUG_TOKEN (NONE, SUBLEXEND),
531 DEBUG_TOKEN (NONE, SUBLEXSTART),
532 DEBUG_TOKEN (OPVAL, THING),
533 DEBUG_TOKEN (NONE, UMINUS),
534 DEBUG_TOKEN (OPNUM, UNIOP),
535 DEBUG_TOKEN (OPVAL, UNIOPSUB),
536 DEBUG_TOKEN (OPVAL, BAREWORD),
537 DEBUG_TOKEN (IVAL, YADAYADA),
538 { 0, TOKENTYPE_NONE, NULL }
539};
540
541#undef DEBUG_TOKEN
542
543/* dump the returned token in rv, plus any optional arg in pl_yylval */
544
545STATIC int
546S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
547{
548 PERL_ARGS_ASSERT_TOKEREPORT;
549
550 if (DEBUG_T_TEST) {
551 const char *name = NULL;
552 enum token_type type = TOKENTYPE_NONE;
553 const struct debug_tokens *p;
554 SV* const report = newSVpvs("<== ");
555
556 for (p = debug_tokens; p->token; p++) {
557 if (p->token == (int)rv) {
558 name = p->name;
559 type = p->type;
560 break;
561 }
562 }
563 if (name)
564 Perl_sv_catpv(aTHX_ report, name);
565 else if (isGRAPH(rv))
566 {
567 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
568 if ((char)rv == 'p')
569 sv_catpvs(report, " (pending identifier)");
570 }
571 else if (!rv)
572 sv_catpvs(report, "EOF");
573 else
574 Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
575 switch (type) {
576 case TOKENTYPE_NONE:
577 break;
578 case TOKENTYPE_IVAL:
579 Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
580 break;
581 case TOKENTYPE_OPNUM:
582 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
583 PL_op_name[lvalp->ival]);
584 break;
585 case TOKENTYPE_PVAL:
586 Perl_sv_catpvf(aTHX_ report, "(pval=%p)", lvalp->pval);
587 break;
588 case TOKENTYPE_OPVAL:
589 if (lvalp->opval) {
590 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
591 PL_op_name[lvalp->opval->op_type]);
592 if (lvalp->opval->op_type == OP_CONST) {
593 Perl_sv_catpvf(aTHX_ report, " %s",
594 SvPEEK(cSVOPx_sv(lvalp->opval)));
595 }
596
597 }
598 else
599 sv_catpvs(report, "(opval=null)");
600 break;
601 }
602 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
603 };
604 return (int)rv;
605}
606
607
608/* print the buffer with suitable escapes */
609
610STATIC void
611S_printbuf(pTHX_ const char *const fmt, const char *const s)
612{
613 SV* const tmp = newSVpvs("");
614
615 PERL_ARGS_ASSERT_PRINTBUF;
616
617 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
618 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
619 GCC_DIAG_RESTORE_STMT;
620 SvREFCNT_dec(tmp);
621}
622
623#endif
624
625/*
626 * S_ao
627 *
628 * This subroutine looks for an '=' next to the operator that has just been
629 * parsed and turns it into an ASSIGNOP if it finds one.
630 */
631
632STATIC int
633S_ao(pTHX_ int toketype)
634{
635 if (*PL_bufptr == '=') {
636 PL_bufptr++;
637
638 switch (toketype) {
639 case ANDAND: pl_yylval.ival = OP_ANDASSIGN; break;
640 case OROR: pl_yylval.ival = OP_ORASSIGN; break;
641 case DORDOR: pl_yylval.ival = OP_DORASSIGN; break;
642 }
643
644 toketype = ASSIGNOP;
645 }
646 return REPORT(toketype);
647}
648
649/*
650 * S_no_op
651 * When Perl expects an operator and finds something else, no_op
652 * prints the warning. It always prints "<something> found where
653 * operator expected. It prints "Missing semicolon on previous line?"
654 * if the surprise occurs at the start of the line. "do you need to
655 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
656 * where the compiler doesn't know if foo is a method call or a function.
657 * It prints "Missing operator before end of line" if there's nothing
658 * after the missing operator, or "... before <...>" if there is something
659 * after the missing operator.
660 *
661 * PL_bufptr is expected to point to the start of the thing that was found,
662 * and s after the next token or partial token.
663 */
664
665STATIC void
666S_no_op(pTHX_ const char *const what, char *s)
667{
668 char * const oldbp = PL_bufptr;
669 const bool is_first = (PL_oldbufptr == PL_linestart);
670 SV *message = sv_2mortal( newSVpvf(
671 PERL_DIAG_WARN_SYNTAX("%s found where operator expected"),
672 what
673 ) );
674
675 PERL_ARGS_ASSERT_NO_OP;
676
677 if (!s)
678 s = oldbp;
679 else
680 PL_bufptr = s;
681
682 if (ckWARN_d(WARN_SYNTAX)) {
683 bool has_more = FALSE;
684 if (is_first) {
685 has_more = TRUE;
686 sv_catpvs(message,
687 " (Missing semicolon on previous line?)");
688 }
689 else if (PL_oldoldbufptr) {
690 /* yyerror (via yywarn) would do this itself, so we should too */
691 const char *t;
692 for (t = PL_oldoldbufptr;
693 t < PL_bufptr && isSPACE(*t);
694 t += UTF ? UTF8SKIP(t) : 1)
695 {
696 NOOP;
697 }
698 /* see if we can identify the cause of the warning */
699 if (isIDFIRST_lazy_if_safe(t,PL_bufend,UTF))
700 {
701 const char *t_start= t;
702 for ( ;
703 (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
704 t += UTF ? UTF8SKIP(t) : 1)
705 {
706 NOOP;
707 }
708 if (t < PL_bufptr && isSPACE(*t)) {
709 has_more = TRUE;
710 sv_catpvf( message,
711 " (Do you need to predeclare \"%" UTF8f "\"?)",
712 UTF8fARG(UTF, t - t_start, t_start));
713 }
714 }
715 }
716 if (!has_more) {
717 const char *t= oldbp;
718 assert(s >= oldbp);
719 while (t < s && isSPACE(*t)) {
720 t += UTF ? UTF8SKIP(t) : 1;
721 }
722
723 sv_catpvf(message,
724 " (Missing operator before \"%" UTF8f "\"?)",
725 UTF8fARG(UTF, s - t, t));
726 }
727 }
728 yywarn(SvPV_nolen(message), UTF ? SVf_UTF8 : 0);
729 PL_bufptr = oldbp;
730}
731
732/*
733 * S_missingterm
734 * Complain about missing quote/regexp/heredoc terminator.
735 * If it's called with NULL then it cauterizes the line buffer.
736 * If we're in a delimited string and the delimiter is a control
737 * character, it's reformatted into a two-char sequence like ^C.
738 * This is fatal.
739 */
740
741STATIC void
742S_missingterm(pTHX_ char *s, STRLEN len)
743{
744 char tmpbuf[UTF8_MAXBYTES + 1];
745 char q;
746 bool uni = FALSE;
747 if (s) {
748 char * const nl = (char *) my_memrchr(s, '\n', len);
749 if (nl) {
750 *nl = '\0';
751 len = nl - s;
752 }
753 uni = UTF;
754 }
755 else if (PL_multi_close < 32) {
756 *tmpbuf = '^';
757 tmpbuf[1] = (char)toCTRL(PL_multi_close);
758 tmpbuf[2] = '\0';
759 s = tmpbuf;
760 len = 2;
761 }
762 else {
763 if (! UTF && LIKELY(PL_multi_close < 256)) {
764 *tmpbuf = (char)PL_multi_close;
765 tmpbuf[1] = '\0';
766 len = 1;
767 }
768 else {
769 char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
770 *end = '\0';
771 len = end - tmpbuf;
772 uni = TRUE;
773 }
774 s = tmpbuf;
775 }
776 q = memchr(s, '"', len) ? '\'' : '"';
777 Perl_croak(aTHX_ "Can't find string terminator %c%" UTF8f "%c"
778 " anywhere before EOF", q, UTF8fARG(uni, len, s), q);
779}
780
781#include "feature.h"
782
783/*
784 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
785 * utf16-to-utf8-reversed.
786 */
787
788#ifdef PERL_CR_FILTER
789static void
790strip_return(SV *sv)
791{
792 const char *s = SvPVX_const(sv);
793 const char * const e = s + SvCUR(sv);
794
795 PERL_ARGS_ASSERT_STRIP_RETURN;
796
797 /* outer loop optimized to do nothing if there are no CR-LFs */
798 while (s < e) {
799 if (*s++ == '\r' && *s == '\n') {
800 /* hit a CR-LF, need to copy the rest */
801 char *d = s - 1;
802 *d++ = *s++;
803 while (s < e) {
804 if (*s == '\r' && s[1] == '\n')
805 s++;
806 *d++ = *s++;
807 }
808 SvCUR(sv) -= s - d;
809 return;
810 }
811 }
812}
813
814STATIC I32
815S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
816{
817 const I32 count = FILTER_READ(idx+1, sv, maxlen);
818 if (count > 0 && !maxlen)
819 strip_return(sv);
820 return count;
821}
822#endif
823
824/*
825=for apidoc lex_start
826
827Creates and initialises a new lexer/parser state object, supplying
828a context in which to lex and parse from a new source of Perl code.
829A pointer to the new state object is placed in L</PL_parser>. An entry
830is made on the save stack so that upon unwinding, the new state object
831will be destroyed and the former value of L</PL_parser> will be restored.
832Nothing else need be done to clean up the parsing context.
833
834The code to be parsed comes from C<line> and C<rsfp>. C<line>, if
835non-null, provides a string (in SV form) containing code to be parsed.
836A copy of the string is made, so subsequent modification of C<line>
837does not affect parsing. C<rsfp>, if non-null, provides an input stream
838from which code will be read to be parsed. If both are non-null, the
839code in C<line> comes first and must consist of complete lines of input,
840and C<rsfp> supplies the remainder of the source.
841
842The C<flags> parameter is reserved for future use. Currently it is only
843used by perl internally, so extensions should always pass zero.
844
845=cut
846*/
847
848/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
849 can share filters with the current parser.
850 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
851 caller, hence isn't owned by the parser, so shouldn't be closed on parser
852 destruction. This is used to handle the case of defaulting to reading the
853 script from the standard input because no filename was given on the command
854 line (without getting confused by situation where STDIN has been closed, so
855 the script handle is opened on fd 0) */
856
857void
858Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
859{
860 const char *s = NULL;
861 yy_parser *parser, *oparser;
862
863 if (flags && flags & ~LEX_START_FLAGS)
864 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
865
866 /* create and initialise a parser */
867
868 Newxz(parser, 1, yy_parser);
869 parser->old_parser = oparser = PL_parser;
870 PL_parser = parser;
871
872 parser->stack = NULL;
873 parser->stack_max1 = NULL;
874 parser->ps = NULL;
875
876 /* on scope exit, free this parser and restore any outer one */
877 SAVEPARSER(parser);
878 parser->saved_curcop = PL_curcop;
879
880 /* initialise lexer state */
881
882 parser->nexttoke = 0;
883 parser->error_count = oparser ? oparser->error_count : 0;
884 parser->copline = parser->preambling = NOLINE;
885 parser->lex_state = LEX_NORMAL;
886 parser->expect = XSTATE;
887 parser->rsfp = rsfp;
888 parser->recheck_utf8_validity = TRUE;
889 parser->rsfp_filters =
890 !(flags & LEX_START_SAME_FILTER) || !oparser
891 ? NULL
892 : MUTABLE_AV(SvREFCNT_inc(
893 oparser->rsfp_filters
894 ? oparser->rsfp_filters
895 : (oparser->rsfp_filters = newAV())
896 ));
897
898 Newx(parser->lex_brackstack, 120, char);
899 Newx(parser->lex_casestack, 12, char);
900 *parser->lex_casestack = '\0';
901 Newxz(parser->lex_shared, 1, LEXSHARED);
902
903 if (line) {
904 Size_t len;
905 const U8* first_bad_char_loc;
906
907 s = SvPV_const(line, len);
908
909 if ( SvUTF8(line)
910 && UNLIKELY(! is_utf8_string_loc((U8 *) s,
911 SvCUR(line),
912 &first_bad_char_loc)))
913 {
914 _force_out_malformed_utf8_message(first_bad_char_loc,
915 (U8 *) s + SvCUR(line),
916 0,
917 1 /* 1 means die */ );
918 NOT_REACHED; /* NOTREACHED */
919 }
920
921 parser->linestr = flags & LEX_START_COPIED
922 ? SvREFCNT_inc_simple_NN(line)
923 : newSVpvn_flags(s, len, SvUTF8(line));
924 if (!rsfp)
925 sv_catpvs(parser->linestr, "\n;");
926 } else {
927 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
928 }
929
930 parser->oldoldbufptr =
931 parser->oldbufptr =
932 parser->bufptr =
933 parser->linestart = SvPVX(parser->linestr);
934 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
935 parser->last_lop = parser->last_uni = NULL;
936
937 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
938 |LEX_DONT_CLOSE_RSFP));
939 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
940 |LEX_DONT_CLOSE_RSFP));
941
942 parser->in_pod = parser->filtered = 0;
943}
944
945
946/* delete a parser object */
947
948void
949Perl_parser_free(pTHX_ const yy_parser *parser)
950{
951 PERL_ARGS_ASSERT_PARSER_FREE;
952
953 PL_curcop = parser->saved_curcop;
954 SvREFCNT_dec(parser->linestr);
955
956 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
957 PerlIO_clearerr(parser->rsfp);
958 else if (parser->rsfp && (!parser->old_parser
959 || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
960 PerlIO_close(parser->rsfp);
961 SvREFCNT_dec(parser->rsfp_filters);
962 SvREFCNT_dec(parser->lex_stuff);
963 SvREFCNT_dec(parser->lex_sub_repl);
964
965 Safefree(parser->lex_brackstack);
966 Safefree(parser->lex_casestack);
967 Safefree(parser->lex_shared);
968 PL_parser = parser->old_parser;
969 Safefree(parser);
970}
971
972void
973Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
974{
975 I32 nexttoke = parser->nexttoke;
976 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
977 while (nexttoke--) {
978 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
979 && parser->nextval[nexttoke].opval
980 && parser->nextval[nexttoke].opval->op_slabbed
981 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
982 op_free(parser->nextval[nexttoke].opval);
983 parser->nextval[nexttoke].opval = NULL;
984 }
985 }
986}
987
988
989/*
990=for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
991
992Buffer scalar containing the chunk currently under consideration of the
993text currently being lexed. This is always a plain string scalar (for
994which C<SvPOK> is true). It is not intended to be used as a scalar by
995normal scalar means; instead refer to the buffer directly by the pointer
996variables described below.
997
998The lexer maintains various C<char*> pointers to things in the
999C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
1000reallocated, all of these pointers must be updated. Don't attempt to
1001do this manually, but rather use L</lex_grow_linestr> if you need to
1002reallocate the buffer.
1003
1004The content of the text chunk in the buffer is commonly exactly one
1005complete line of input, up to and including a newline terminator,
1006but there are situations where it is otherwise. The octets of the
1007buffer may be intended to be interpreted as either UTF-8 or Latin-1.
1008The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
1009flag on this scalar, which may disagree with it.
1010
1011For direct examination of the buffer, the variable
1012L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
1013lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
1014of these pointers is usually preferable to examination of the scalar
1015through normal scalar means.
1016
1017=for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
1018
1019Direct pointer to the end of the chunk of text currently being lexed, the
1020end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
1021+ SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is
1022always located at the end of the buffer, and does not count as part of
1023the buffer's contents.
1024
1025=for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
1026
1027Points to the current position of lexing inside the lexer buffer.
1028Characters around this point may be freely examined, within
1029the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
1030L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
1031interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
1032
1033Lexing code (whether in the Perl core or not) moves this pointer past
1034the characters that it consumes. It is also expected to perform some
1035bookkeeping whenever a newline character is consumed. This movement
1036can be more conveniently performed by the function L</lex_read_to>,
1037which handles newlines appropriately.
1038
1039Interpretation of the buffer's octets can be abstracted out by
1040using the slightly higher-level functions L</lex_peek_unichar> and
1041L</lex_read_unichar>.
1042
1043=for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
1044
1045Points to the start of the current line inside the lexer buffer.
1046This is useful for indicating at which column an error occurred, and
1047not much else. This must be updated by any lexing code that consumes
1048a newline; the function L</lex_read_to> handles this detail.
1049
1050=cut
1051*/
1052
1053/*
1054=for apidoc lex_bufutf8
1055
1056Indicates whether the octets in the lexer buffer
1057(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
1058of Unicode characters. If not, they should be interpreted as Latin-1
1059characters. This is analogous to the C<SvUTF8> flag for scalars.
1060
1061In UTF-8 mode, it is not guaranteed that the lexer buffer actually
1062contains valid UTF-8. Lexing code must be robust in the face of invalid
1063encoding.
1064
1065The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
1066is significant, but not the whole story regarding the input character
1067encoding. Normally, when a file is being read, the scalar contains octets
1068and its C<SvUTF8> flag is off, but the octets should be interpreted as
1069UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
1070however, the scalar may have the C<SvUTF8> flag on, and in this case its
1071octets should be interpreted as UTF-8 unless the C<use bytes> pragma
1072is in effect. This logic may change in the future; use this function
1073instead of implementing the logic yourself.
1074
1075=cut
1076*/
1077
1078bool
1079Perl_lex_bufutf8(pTHX)
1080{
1081 return UTF;
1082}
1083
1084/*
1085=for apidoc lex_grow_linestr
1086
1087Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
1088at least C<len> octets (including terminating C<NUL>). Returns a
1089pointer to the reallocated buffer. This is necessary before making
1090any direct modification of the buffer that would increase its length.
1091L</lex_stuff_pvn> provides a more convenient way to insert text into
1092the buffer.
1093
1094Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
1095this function updates all of the lexer's variables that point directly
1096into the buffer.
1097
1098=cut
1099*/
1100
1101char *
1102Perl_lex_grow_linestr(pTHX_ STRLEN len)
1103{
1104 SV *linestr;
1105 char *buf;
1106 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1107 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
1108 bool current;
1109
1110 linestr = PL_parser->linestr;
1111 buf = SvPVX(linestr);
1112 if (len <= SvLEN(linestr))
1113 return buf;
1114
1115 /* Is the lex_shared linestr SV the same as the current linestr SV?
1116 * Only in this case does re_eval_start need adjusting, since it
1117 * points within lex_shared->ls_linestr's buffer */
1118 current = ( !PL_parser->lex_shared->ls_linestr
1119 || linestr == PL_parser->lex_shared->ls_linestr);
1120
1121 bufend_pos = PL_parser->bufend - buf;
1122 bufptr_pos = PL_parser->bufptr - buf;
1123 oldbufptr_pos = PL_parser->oldbufptr - buf;
1124 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1125 linestart_pos = PL_parser->linestart - buf;
1126 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1127 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1128 re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
1129 PL_parser->lex_shared->re_eval_start - buf : 0;
1130
1131 buf = sv_grow(linestr, len);
1132
1133 PL_parser->bufend = buf + bufend_pos;
1134 PL_parser->bufptr = buf + bufptr_pos;
1135 PL_parser->oldbufptr = buf + oldbufptr_pos;
1136 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1137 PL_parser->linestart = buf + linestart_pos;
1138 if (PL_parser->last_uni)
1139 PL_parser->last_uni = buf + last_uni_pos;
1140 if (PL_parser->last_lop)
1141 PL_parser->last_lop = buf + last_lop_pos;
1142 if (current && PL_parser->lex_shared->re_eval_start)
1143 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
1144 return buf;
1145}
1146
1147/*
1148=for apidoc lex_stuff_pvn
1149
1150Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1151immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1152reallocating the buffer if necessary. This means that lexing code that
1153runs later will see the characters as if they had appeared in the input.
1154It is not recommended to do this as part of normal parsing, and most
1155uses of this facility run the risk of the inserted characters being
1156interpreted in an unintended manner.
1157
1158The string to be inserted is represented by C<len> octets starting
1159at C<pv>. These octets are interpreted as either UTF-8 or Latin-1,
1160according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1161The characters are recoded for the lexer buffer, according to how the
1162buffer is currently being interpreted (L</lex_bufutf8>). If a string
1163to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1164function is more convenient.
1165
1166=for apidoc Amnh||LEX_STUFF_UTF8
1167
1168=cut
1169*/
1170
1171void
1172Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1173{
1174 char *bufptr;
1175 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1176 if (flags & ~(LEX_STUFF_UTF8))
1177 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1178 if (UTF) {
1179 if (flags & LEX_STUFF_UTF8) {
1180 goto plain_copy;
1181 } else {
1182 STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1183 (U8 *) pv + len);
1184 const char *p, *e = pv+len;;
1185 if (!highhalf)
1186 goto plain_copy;
1187 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1188 bufptr = PL_parser->bufptr;
1189 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1190 SvCUR_set(PL_parser->linestr,
1191 SvCUR(PL_parser->linestr) + len+highhalf);
1192 PL_parser->bufend += len+highhalf;
1193 for (p = pv; p != e; p++) {
1194 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1195 }
1196 }
1197 } else {
1198 if (flags & LEX_STUFF_UTF8) {
1199 STRLEN highhalf = 0;
1200 const char *p, *e = pv+len;
1201 for (p = pv; p != e; p++) {
1202 U8 c = (U8)*p;
1203 if (UTF8_IS_ABOVE_LATIN1(c)) {
1204 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1205 "non-Latin-1 character into Latin-1 input");
1206 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1207 p++;
1208 highhalf++;
1209 } else assert(UTF8_IS_INVARIANT(c));
1210 }
1211 if (!highhalf)
1212 goto plain_copy;
1213 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1214 bufptr = PL_parser->bufptr;
1215 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1216 SvCUR_set(PL_parser->linestr,
1217 SvCUR(PL_parser->linestr) + len-highhalf);
1218 PL_parser->bufend += len-highhalf;
1219 p = pv;
1220 while (p < e) {
1221 if (UTF8_IS_INVARIANT(*p)) {
1222 *bufptr++ = *p;
1223 p++;
1224 }
1225 else {
1226 assert(p < e -1 );
1227 *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1228 p += 2;
1229 }
1230 }
1231 } else {
1232 plain_copy:
1233 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1234 bufptr = PL_parser->bufptr;
1235 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1236 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1237 PL_parser->bufend += len;
1238 Copy(pv, bufptr, len, char);
1239 }
1240 }
1241}
1242
1243/*
1244=for apidoc lex_stuff_pv
1245
1246Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1247immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1248reallocating the buffer if necessary. This means that lexing code that
1249runs later will see the characters as if they had appeared in the input.
1250It is not recommended to do this as part of normal parsing, and most
1251uses of this facility run the risk of the inserted characters being
1252interpreted in an unintended manner.
1253
1254The string to be inserted is represented by octets starting at C<pv>
1255and continuing to the first nul. These octets are interpreted as either
1256UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1257in C<flags>. The characters are recoded for the lexer buffer, according
1258to how the buffer is currently being interpreted (L</lex_bufutf8>).
1259If it is not convenient to nul-terminate a string to be inserted, the
1260L</lex_stuff_pvn> function is more appropriate.
1261
1262=cut
1263*/
1264
1265void
1266Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1267{
1268 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1269 lex_stuff_pvn(pv, strlen(pv), flags);
1270}
1271
1272/*
1273=for apidoc lex_stuff_sv
1274
1275Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1276immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1277reallocating the buffer if necessary. This means that lexing code that
1278runs later will see the characters as if they had appeared in the input.
1279It is not recommended to do this as part of normal parsing, and most
1280uses of this facility run the risk of the inserted characters being
1281interpreted in an unintended manner.
1282
1283The string to be inserted is the string value of C<sv>. The characters
1284are recoded for the lexer buffer, according to how the buffer is currently
1285being interpreted (L</lex_bufutf8>). If a string to be inserted is
1286not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1287need to construct a scalar.
1288
1289=cut
1290*/
1291
1292void
1293Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1294{
1295 char *pv;
1296 STRLEN len;
1297 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1298 if (flags)
1299 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1300 pv = SvPV(sv, len);
1301 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1302}
1303
1304/*
1305=for apidoc lex_unstuff
1306
1307Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1308C<ptr>. Text following C<ptr> will be moved, and the buffer shortened.
1309This hides the discarded text from any lexing code that runs later,
1310as if the text had never appeared.
1311
1312This is not the normal way to consume lexed text. For that, use
1313L</lex_read_to>.
1314
1315=cut
1316*/
1317
1318void
1319Perl_lex_unstuff(pTHX_ char *ptr)
1320{
1321 char *buf, *bufend;
1322 STRLEN unstuff_len;
1323 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1324 buf = PL_parser->bufptr;
1325 if (ptr < buf)
1326 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1327 if (ptr == buf)
1328 return;
1329 bufend = PL_parser->bufend;
1330 if (ptr > bufend)
1331 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1332 unstuff_len = ptr - buf;
1333 Move(ptr, buf, bufend+1-ptr, char);
1334 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1335 PL_parser->bufend = bufend - unstuff_len;
1336}
1337
1338/*
1339=for apidoc lex_read_to
1340
1341Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1342to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1343performing the correct bookkeeping whenever a newline character is passed.
1344This is the normal way to consume lexed text.
1345
1346Interpretation of the buffer's octets can be abstracted out by
1347using the slightly higher-level functions L</lex_peek_unichar> and
1348L</lex_read_unichar>.
1349
1350=cut
1351*/
1352
1353void
1354Perl_lex_read_to(pTHX_ char *ptr)
1355{
1356 char *s;
1357 PERL_ARGS_ASSERT_LEX_READ_TO;
1358 s = PL_parser->bufptr;
1359 if (ptr < s || ptr > PL_parser->bufend)
1360 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1361 for (; s != ptr; s++)
1362 if (*s == '\n') {
1363 COPLINE_INC_WITH_HERELINES;
1364 PL_parser->linestart = s+1;
1365 }
1366 PL_parser->bufptr = ptr;
1367}
1368
1369/*
1370=for apidoc lex_discard_to
1371
1372Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1373up to C<ptr>. The remaining content of the buffer will be moved, and
1374all pointers into the buffer updated appropriately. C<ptr> must not
1375be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1376it is not permitted to discard text that has yet to be lexed.
1377
1378Normally it is not necessarily to do this directly, because it suffices to
1379use the implicit discarding behaviour of L</lex_next_chunk> and things
1380based on it. However, if a token stretches across multiple lines,
1381and the lexing code has kept multiple lines of text in the buffer for
1382that purpose, then after completion of the token it would be wise to
1383explicitly discard the now-unneeded earlier lines, to avoid future
1384multi-line tokens growing the buffer without bound.
1385
1386=cut
1387*/
1388
1389void
1390Perl_lex_discard_to(pTHX_ char *ptr)
1391{
1392 char *buf;
1393 STRLEN discard_len;
1394 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1395 buf = SvPVX(PL_parser->linestr);
1396 if (ptr < buf)
1397 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1398 if (ptr == buf)
1399 return;
1400 if (ptr > PL_parser->bufptr)
1401 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1402 discard_len = ptr - buf;
1403 if (PL_parser->oldbufptr < ptr)
1404 PL_parser->oldbufptr = ptr;
1405 if (PL_parser->oldoldbufptr < ptr)
1406 PL_parser->oldoldbufptr = ptr;
1407 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1408 PL_parser->last_uni = NULL;
1409 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1410 PL_parser->last_lop = NULL;
1411 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1412 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1413 PL_parser->bufend -= discard_len;
1414 PL_parser->bufptr -= discard_len;
1415 PL_parser->oldbufptr -= discard_len;
1416 PL_parser->oldoldbufptr -= discard_len;
1417 if (PL_parser->last_uni)
1418 PL_parser->last_uni -= discard_len;
1419 if (PL_parser->last_lop)
1420 PL_parser->last_lop -= discard_len;
1421}
1422
1423void
1424Perl_notify_parser_that_changed_to_utf8(pTHX)
1425{
1426 /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1427 * off to on. At compile time, this has the effect of entering a 'use
1428 * utf8' section. This means that any input was not previously checked for
1429 * UTF-8 (because it was off), but now we do need to check it, or our
1430 * assumptions about the input being sane could be wrong, and we could
1431 * segfault. This routine just sets a flag so that the next time we look
1432 * at the input we do the well-formed UTF-8 check. If we aren't in the
1433 * proper phase, there may not be a parser object, but if there is, setting
1434 * the flag is harmless */
1435
1436 if (PL_parser) {
1437 PL_parser->recheck_utf8_validity = TRUE;
1438 }
1439}
1440
1441/*
1442=for apidoc lex_next_chunk
1443
1444Reads in the next chunk of text to be lexed, appending it to
1445L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1446looked to the end of the current chunk and wants to know more. It is
1447usual, but not necessary, for lexing to have consumed the entirety of
1448the current chunk at this time.
1449
1450If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1451chunk (i.e., the current chunk has been entirely consumed), normally the
1452current chunk will be discarded at the same time that the new chunk is
1453read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1454will not be discarded. If the current chunk has not been entirely
1455consumed, then it will not be discarded regardless of the flag.
1456
1457Returns true if some new text was added to the buffer, or false if the
1458buffer has reached the end of the input text.
1459
1460=for apidoc Amnh||LEX_KEEP_PREVIOUS
1461
1462=cut
1463*/
1464
1465#define LEX_FAKE_EOF 0x80000000
1466#define LEX_NO_TERM 0x40000000 /* here-doc */
1467
1468bool
1469Perl_lex_next_chunk(pTHX_ U32 flags)
1470{
1471 SV *linestr;
1472 char *buf;
1473 STRLEN old_bufend_pos, new_bufend_pos;
1474 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1475 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1476 bool got_some_for_debugger = 0;
1477 bool got_some;
1478
1479 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1480 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1481 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1482 return FALSE;
1483 linestr = PL_parser->linestr;
1484 buf = SvPVX(linestr);
1485 if (!(flags & LEX_KEEP_PREVIOUS)
1486 && PL_parser->bufptr == PL_parser->bufend)
1487 {
1488 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1489 linestart_pos = 0;
1490 if (PL_parser->last_uni != PL_parser->bufend)
1491 PL_parser->last_uni = NULL;
1492 if (PL_parser->last_lop != PL_parser->bufend)
1493 PL_parser->last_lop = NULL;
1494 last_uni_pos = last_lop_pos = 0;
1495 *buf = 0;
1496 SvCUR_set(linestr, 0);
1497 } else {
1498 old_bufend_pos = PL_parser->bufend - buf;
1499 bufptr_pos = PL_parser->bufptr - buf;
1500 oldbufptr_pos = PL_parser->oldbufptr - buf;
1501 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1502 linestart_pos = PL_parser->linestart - buf;
1503 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1504 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1505 }
1506 if (flags & LEX_FAKE_EOF) {
1507 goto eof;
1508 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1509 got_some = 0;
1510 } else if (filter_gets(linestr, old_bufend_pos)) {
1511 got_some = 1;
1512 got_some_for_debugger = 1;
1513 } else if (flags & LEX_NO_TERM) {
1514 got_some = 0;
1515 } else {
1516 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1517 SvPVCLEAR(linestr);
1518 eof:
1519 /* End of real input. Close filehandle (unless it was STDIN),
1520 * then add implicit termination.
1521 */
1522 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1523 PerlIO_clearerr(PL_parser->rsfp);
1524 else if (PL_parser->rsfp)
1525 (void)PerlIO_close(PL_parser->rsfp);
1526 PL_parser->rsfp = NULL;
1527 PL_parser->in_pod = PL_parser->filtered = 0;
1528 if (!PL_in_eval && PL_minus_p) {
1529 sv_catpvs(linestr,
1530 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1531 PL_minus_n = PL_minus_p = 0;
1532 } else if (!PL_in_eval && PL_minus_n) {
1533 sv_catpvs(linestr, /*{*/";}");
1534 PL_minus_n = 0;
1535 } else
1536 sv_catpvs(linestr, ";");
1537 got_some = 1;
1538 }
1539 buf = SvPVX(linestr);
1540 new_bufend_pos = SvCUR(linestr);
1541 PL_parser->bufend = buf + new_bufend_pos;
1542 PL_parser->bufptr = buf + bufptr_pos;
1543
1544 if (UTF) {
1545 const U8* first_bad_char_loc;
1546 if (UNLIKELY(! is_utf8_string_loc(
1547 (U8 *) PL_parser->bufptr,
1548 PL_parser->bufend - PL_parser->bufptr,
1549 &first_bad_char_loc)))
1550 {
1551 _force_out_malformed_utf8_message(first_bad_char_loc,
1552 (U8 *) PL_parser->bufend,
1553 0,
1554 1 /* 1 means die */ );
1555 NOT_REACHED; /* NOTREACHED */
1556 }
1557 }
1558
1559 PL_parser->oldbufptr = buf + oldbufptr_pos;
1560 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1561 PL_parser->linestart = buf + linestart_pos;
1562 if (PL_parser->last_uni)
1563 PL_parser->last_uni = buf + last_uni_pos;
1564 if (PL_parser->last_lop)
1565 PL_parser->last_lop = buf + last_lop_pos;
1566 if (PL_parser->preambling != NOLINE) {
1567 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1568 PL_parser->preambling = NOLINE;
1569 }
1570 if ( got_some_for_debugger
1571 && PERLDB_LINE_OR_SAVESRC
1572 && PL_curstash != PL_debstash)
1573 {
1574 /* debugger active and we're not compiling the debugger code,
1575 * so store the line into the debugger's array of lines
1576 */
1577 update_debugger_info(NULL, buf+old_bufend_pos,
1578 new_bufend_pos-old_bufend_pos);
1579 }
1580 return got_some;
1581}
1582
1583/*
1584=for apidoc lex_peek_unichar
1585
1586Looks ahead one (Unicode) character in the text currently being lexed.
1587Returns the codepoint (unsigned integer value) of the next character,
1588or -1 if lexing has reached the end of the input text. To consume the
1589peeked character, use L</lex_read_unichar>.
1590
1591If the next character is in (or extends into) the next chunk of input
1592text, the next chunk will be read in. Normally the current chunk will be
1593discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1594bit set, then the current chunk will not be discarded.
1595
1596If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1597is encountered, an exception is generated.
1598
1599=cut
1600*/
1601
1602I32
1603Perl_lex_peek_unichar(pTHX_ U32 flags)
1604{
1605 char *s, *bufend;
1606 if (flags & ~(LEX_KEEP_PREVIOUS))
1607 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1608 s = PL_parser->bufptr;
1609 bufend = PL_parser->bufend;
1610 if (UTF) {
1611 U8 head;
1612 I32 unichar;
1613 STRLEN len, retlen;
1614 if (s == bufend) {
1615 if (!lex_next_chunk(flags))
1616 return -1;
1617 s = PL_parser->bufptr;
1618 bufend = PL_parser->bufend;
1619 }
1620 head = (U8)*s;
1621 if (UTF8_IS_INVARIANT(head))
1622 return head;
1623 if (UTF8_IS_START(head)) {
1624 len = UTF8SKIP(&head);
1625 while ((STRLEN)(bufend-s) < len) {
1626 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1627 break;
1628 s = PL_parser->bufptr;
1629 bufend = PL_parser->bufend;
1630 }
1631 }
1632 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1633 if (retlen == (STRLEN)-1) {
1634 _force_out_malformed_utf8_message((U8 *) s,
1635 (U8 *) bufend,
1636 0,
1637 1 /* 1 means die */ );
1638 NOT_REACHED; /* NOTREACHED */
1639 }
1640 return unichar;
1641 } else {
1642 if (s == bufend) {
1643 if (!lex_next_chunk(flags))
1644 return -1;
1645 s = PL_parser->bufptr;
1646 }
1647 return (U8)*s;
1648 }
1649}
1650
1651/*
1652=for apidoc lex_read_unichar
1653
1654Reads the next (Unicode) character in the text currently being lexed.
1655Returns the codepoint (unsigned integer value) of the character read,
1656and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1657if lexing has reached the end of the input text. To non-destructively
1658examine the next character, use L</lex_peek_unichar> instead.
1659
1660If the next character is in (or extends into) the next chunk of input
1661text, the next chunk will be read in. Normally the current chunk will be
1662discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1663bit set, then the current chunk will not be discarded.
1664
1665If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1666is encountered, an exception is generated.
1667
1668=cut
1669*/
1670
1671I32
1672Perl_lex_read_unichar(pTHX_ U32 flags)
1673{
1674 I32 c;
1675 if (flags & ~(LEX_KEEP_PREVIOUS))
1676 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1677 c = lex_peek_unichar(flags);
1678 if (c != -1) {
1679 if (c == '\n')
1680 COPLINE_INC_WITH_HERELINES;
1681 if (UTF)
1682 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1683 else
1684 ++(PL_parser->bufptr);
1685 }
1686 return c;
1687}
1688
1689/*
1690=for apidoc lex_read_space
1691
1692Reads optional spaces, in Perl style, in the text currently being
1693lexed. The spaces may include ordinary whitespace characters and
1694Perl-style comments. C<#line> directives are processed if encountered.
1695L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1696at a non-space character (or the end of the input text).
1697
1698If spaces extend into the next chunk of input text, the next chunk will
1699be read in. Normally the current chunk will be discarded at the same
1700time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1701chunk will not be discarded.
1702
1703=cut
1704*/
1705
1706#define LEX_NO_INCLINE 0x40000000
1707#define LEX_NO_NEXT_CHUNK 0x80000000
1708
1709void
1710Perl_lex_read_space(pTHX_ U32 flags)
1711{
1712 char *s, *bufend;
1713 const bool can_incline = !(flags & LEX_NO_INCLINE);
1714 bool need_incline = 0;
1715 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1716 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1717 s = PL_parser->bufptr;
1718 bufend = PL_parser->bufend;
1719 while (1) {
1720 char c = *s;
1721 if (c == '#') {
1722 do {
1723 c = *++s;
1724 } while (!(c == '\n' || (c == 0 && s == bufend)));
1725 } else if (c == '\n') {
1726 s++;
1727 if (can_incline) {
1728 PL_parser->linestart = s;
1729 if (s == bufend)
1730 need_incline = 1;
1731 else
1732 incline(s, bufend);
1733 }
1734 } else if (isSPACE(c)) {
1735 s++;
1736 } else if (c == 0 && s == bufend) {
1737 bool got_more;
1738 line_t l;
1739 if (flags & LEX_NO_NEXT_CHUNK)
1740 break;
1741 PL_parser->bufptr = s;
1742 l = CopLINE(PL_curcop);
1743 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1744 got_more = lex_next_chunk(flags);
1745 CopLINE_set(PL_curcop, l);
1746 s = PL_parser->bufptr;
1747 bufend = PL_parser->bufend;
1748 if (!got_more)
1749 break;
1750 if (can_incline && need_incline && PL_parser->rsfp) {
1751 incline(s, bufend);
1752 need_incline = 0;
1753 }
1754 } else if (!c) {
1755 s++;
1756 } else {
1757 break;
1758 }
1759 }
1760 PL_parser->bufptr = s;
1761}
1762
1763/*
1764
1765=for apidoc validate_proto
1766
1767This function performs syntax checking on a prototype, C<proto>.
1768If C<warn> is true, any illegal characters or mismatched brackets
1769will trigger illegalproto warnings, declaring that they were
1770detected in the prototype for C<name>.
1771
1772The return value is C<true> if this is a valid prototype, and
1773C<false> if it is not, regardless of whether C<warn> was C<true> or
1774C<false>.
1775
1776Note that C<NULL> is a valid C<proto> and will always return C<true>.
1777
1778=cut
1779
1780 */
1781
1782bool
1783Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1784{
1785 STRLEN len, origlen;
1786 char *p;
1787 bool bad_proto = FALSE;
1788 bool in_brackets = FALSE;
1789 bool after_slash = FALSE;
1790 char greedy_proto = ' ';
1791 bool proto_after_greedy_proto = FALSE;
1792 bool must_be_last = FALSE;
1793 bool underscore = FALSE;
1794 bool bad_proto_after_underscore = FALSE;
1795
1796 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1797
1798 if (!proto)
1799 return TRUE;
1800
1801 p = SvPV(proto, len);
1802 origlen = len;
1803 for (; len--; p++) {
1804 if (!isSPACE(*p)) {
1805 if (must_be_last)
1806 proto_after_greedy_proto = TRUE;
1807 if (underscore) {
1808 if (!memCHRs(";@%", *p))
1809 bad_proto_after_underscore = TRUE;
1810 underscore = FALSE;
1811 }
1812 if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
1813 bad_proto = TRUE;
1814 }
1815 else {
1816 if (*p == '[')
1817 in_brackets = TRUE;
1818 else if (*p == ']')
1819 in_brackets = FALSE;
1820 else if ((*p == '@' || *p == '%')
1821 && !after_slash
1822 && !in_brackets )
1823 {
1824 must_be_last = TRUE;
1825 greedy_proto = *p;
1826 }
1827 else if (*p == '_')
1828 underscore = TRUE;
1829 }
1830 if (*p == '\\')
1831 after_slash = TRUE;
1832 else
1833 after_slash = FALSE;
1834 }
1835 }
1836
1837 if (warn) {
1838 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1839 p -= origlen;
1840 p = SvUTF8(proto)
1841 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1842 origlen, UNI_DISPLAY_ISPRINT)
1843 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1844
1845 if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1846 SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1847 sv_catpvs(name2, "::");
1848 sv_catsv(name2, (SV *)name);
1849 name = name2;
1850 }
1851
1852 if (proto_after_greedy_proto)
1853 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1854 "Prototype after '%c' for %" SVf " : %s",
1855 greedy_proto, SVfARG(name), p);
1856 if (in_brackets)
1857 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1858 "Missing ']' in prototype for %" SVf " : %s",
1859 SVfARG(name), p);
1860 if (bad_proto)
1861 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1862 "Illegal character in prototype for %" SVf " : %s",
1863 SVfARG(name), p);
1864 if (bad_proto_after_underscore)
1865 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1866 "Illegal character after '_' in prototype for %" SVf " : %s",
1867 SVfARG(name), p);
1868 }
1869
1870 return (! (proto_after_greedy_proto || bad_proto) );
1871}
1872
1873/*
1874 * S_incline
1875 * This subroutine has nothing to do with tilting, whether at windmills
1876 * or pinball tables. Its name is short for "increment line". It
1877 * increments the current line number in CopLINE(PL_curcop) and checks
1878 * to see whether the line starts with a comment of the form
1879 * # line 500 "foo.pm"
1880 * If so, it sets the current line number and file to the values in the comment.
1881 */
1882
1883STATIC void
1884S_incline(pTHX_ const char *s, const char *end)
1885{
1886 const char *t;
1887 const char *n;
1888 const char *e;
1889 line_t line_num;
1890 UV uv;
1891
1892 PERL_ARGS_ASSERT_INCLINE;
1893
1894 assert(end >= s);
1895
1896 COPLINE_INC_WITH_HERELINES;
1897 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1898 && s+1 == PL_bufend && *s == ';') {
1899 /* fake newline in string eval */
1900 CopLINE_dec(PL_curcop);
1901 return;
1902 }
1903 if (*s++ != '#')
1904 return;
1905 while (SPACE_OR_TAB(*s))
1906 s++;
1907 if (memBEGINs(s, (STRLEN) (end - s), "line"))
1908 s += sizeof("line") - 1;
1909 else
1910 return;
1911 if (SPACE_OR_TAB(*s))
1912 s++;
1913 else
1914 return;
1915 while (SPACE_OR_TAB(*s))
1916 s++;
1917 if (!isDIGIT(*s))
1918 return;
1919
1920 n = s;
1921 while (isDIGIT(*s))
1922 s++;
1923 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1924 return;
1925 while (SPACE_OR_TAB(*s))
1926 s++;
1927 if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1928 s++;
1929 e = t + 1;
1930 }
1931 else {
1932 t = s;
1933 while (*t && !isSPACE(*t))
1934 t++;
1935 e = t;
1936 }
1937 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1938 e++;
1939 if (*e != '\n' && *e != '\0')
1940 return; /* false alarm */
1941
1942 if (!grok_atoUV(n, &uv, &e))
1943 return;
1944 line_num = ((line_t)uv) - 1;
1945
1946 if (t - s > 0) {
1947 const STRLEN len = t - s;
1948
1949 if (!PL_rsfp && !PL_parser->filtered) {
1950 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1951 * to *{"::_<newfilename"} */
1952 /* However, the long form of evals is only turned on by the
1953 debugger - usually they're "(eval %lu)" */
1954 GV * const cfgv = CopFILEGV(PL_curcop);
1955 if (cfgv) {
1956 char smallbuf[128];
1957 STRLEN tmplen2 = len;
1958 char *tmpbuf2;
1959 GV *gv2;
1960
1961 if (tmplen2 + 2 <= sizeof smallbuf)
1962 tmpbuf2 = smallbuf;
1963 else
1964 Newx(tmpbuf2, tmplen2 + 2, char);
1965
1966 tmpbuf2[0] = '_';
1967 tmpbuf2[1] = '<';
1968
1969 memcpy(tmpbuf2 + 2, s, tmplen2);
1970 tmplen2 += 2;
1971
1972 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1973 if (!isGV(gv2)) {
1974 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1975 /* adjust ${"::_<newfilename"} to store the new file name */
1976 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1977 /* The line number may differ. If that is the case,
1978 alias the saved lines that are in the array.
1979 Otherwise alias the whole array. */
1980 if (CopLINE(PL_curcop) == line_num) {
1981 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1982 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1983 }
1984 else if (GvAV(cfgv)) {
1985 AV * const av = GvAV(cfgv);
1986 const line_t start = CopLINE(PL_curcop)+1;
1987 SSize_t items = AvFILLp(av) - start;
1988 if (items > 0) {
1989 AV * const av2 = GvAVn(gv2);
1990 SV **svp = AvARRAY(av) + start;
1991 Size_t l = line_num+1;
1992 while (items-- && l < SSize_t_MAX && l == (line_t)l)
1993 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1994 }
1995 }
1996 }
1997
1998 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1999 }
2000 }
2001 CopFILE_free(PL_curcop);
2002 CopFILE_setn(PL_curcop, s, len);
2003 }
2004 CopLINE_set(PL_curcop, line_num);
2005}
2006
2007STATIC void
2008S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
2009{
2010 AV *av = CopFILEAVx(PL_curcop);
2011 if (av) {
2012 SV * sv;
2013 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
2014 else {
2015 sv = *av_fetch(av, 0, 1);
2016 SvUPGRADE(sv, SVt_PVMG);
2017 }
2018 if (!SvPOK(sv)) SvPVCLEAR(sv);
2019 if (orig_sv)
2020 sv_catsv(sv, orig_sv);
2021 else
2022 sv_catpvn(sv, buf, len);
2023 if (!SvIOK(sv)) {
2024 (void)SvIOK_on(sv);
2025 SvIV_set(sv, 0);
2026 }
2027 if (PL_parser->preambling == NOLINE)
2028 av_store(av, CopLINE(PL_curcop), sv);
2029 }
2030}
2031
2032/*
2033 * skipspace
2034 * Called to gobble the appropriate amount and type of whitespace.
2035 * Skips comments as well.
2036 * Returns the next character after the whitespace that is skipped.
2037 *
2038 * peekspace
2039 * Same thing, but look ahead without incrementing line numbers or
2040 * adjusting PL_linestart.
2041 */
2042
2043#define skipspace(s) skipspace_flags(s, 0)
2044#define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
2045
2046char *
2047Perl_skipspace_flags(pTHX_ char *s, U32 flags)
2048{
2049 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
2050 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2051 while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
2052 s++;
2053 } else {
2054 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
2055 PL_bufptr = s;
2056 lex_read_space(flags | LEX_KEEP_PREVIOUS |
2057 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
2058 LEX_NO_NEXT_CHUNK : 0));
2059 s = PL_bufptr;
2060 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
2061 if (PL_linestart > PL_bufptr)
2062 PL_bufptr = PL_linestart;
2063 return s;
2064 }
2065 return s;
2066}
2067
2068/*
2069 * S_check_uni
2070 * Check the unary operators to ensure there's no ambiguity in how they're
2071 * used. An ambiguous piece of code would be:
2072 * rand + 5
2073 * This doesn't mean rand() + 5. Because rand() is a unary operator,
2074 * the +5 is its argument.
2075 */
2076
2077STATIC void
2078S_check_uni(pTHX)
2079{
2080 const char *s;
2081
2082 if (PL_oldoldbufptr != PL_last_uni)
2083 return;
2084 while (isSPACE(*PL_last_uni))
2085 PL_last_uni++;
2086 s = PL_last_uni;
2087 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
2088 s += UTF ? UTF8SKIP(s) : 1;
2089 if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
2090 return;
2091
2092 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2093 "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
2094 UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
2095}
2096
2097/*
2098 * LOP : macro to build a list operator. Its behaviour has been replaced
2099 * with a subroutine, S_lop() for which LOP is just another name.
2100 */
2101
2102#define LOP(f,x) return lop(f,x,s)
2103
2104/*
2105 * S_lop
2106 * Build a list operator (or something that might be one). The rules:
2107 * - if we have a next token, then it's a list operator (no parens) for
2108 * which the next token has already been parsed; e.g.,
2109 * sort foo @args
2110 * sort foo (@args)
2111 * - if the next thing is an opening paren, then it's a function
2112 * - else it's a list operator
2113 */
2114
2115STATIC I32
2116S_lop(pTHX_ I32 f, U8 x, char *s)
2117{
2118 PERL_ARGS_ASSERT_LOP;
2119
2120 pl_yylval.ival = f;
2121 CLINE;
2122 PL_bufptr = s;
2123 PL_last_lop = PL_oldbufptr;
2124 PL_last_lop_op = (OPCODE)f;
2125 if (PL_nexttoke)
2126 goto lstop;
2127 PL_expect = x;
2128 if (*s == '(')
2129 return REPORT(FUNC);
2130 s = skipspace(s);
2131 if (*s == '(')
2132 return REPORT(FUNC);
2133 else {
2134 lstop:
2135 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2136 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2137 return REPORT(LSTOP);
2138 }
2139}
2140
2141/*
2142 * S_force_next
2143 * When the lexer realizes it knows the next token (for instance,
2144 * it is reordering tokens for the parser) then it can call S_force_next
2145 * to know what token to return the next time the lexer is called. Caller
2146 * will need to set PL_nextval[] and possibly PL_expect to ensure
2147 * the lexer handles the token correctly.
2148 */
2149
2150STATIC void
2151S_force_next(pTHX_ I32 type)
2152{
2153#ifdef DEBUGGING
2154 if (DEBUG_T_TEST) {
2155 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2156 tokereport(type, &NEXTVAL_NEXTTOKE);
2157 }
2158#endif
2159 assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2160 PL_nexttype[PL_nexttoke] = type;
2161 PL_nexttoke++;
2162}
2163
2164/*
2165 * S_postderef
2166 *
2167 * This subroutine handles postfix deref syntax after the arrow has already
2168 * been emitted. @* $* etc. are emitted as two separate tokens right here.
2169 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2170 * only the first, leaving yylex to find the next.
2171 */
2172
2173static int
2174S_postderef(pTHX_ int const funny, char const next)
2175{
2176 assert(funny == DOLSHARP
2177 || funny == PERLY_DOLLAR
2178 || funny == PERLY_SNAIL
2179 || funny == PERLY_PERCENT_SIGN
2180 || funny == PERLY_AMPERSAND
2181 || funny == PERLY_STAR
2182 );
2183 if (next == '*') {
2184 PL_expect = XOPERATOR;
2185 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2186 assert(PERLY_SNAIL == funny || PERLY_DOLLAR == funny || DOLSHARP == funny);
2187 PL_lex_state = LEX_INTERPEND;
2188 if (PERLY_SNAIL == funny)
2189 force_next(POSTJOIN);
2190 }
2191 force_next(PERLY_STAR);
2192 PL_bufptr+=2;
2193 }
2194 else {
2195 if (PERLY_SNAIL == funny && PL_lex_state == LEX_INTERPNORMAL
2196 && !PL_lex_brackets)
2197 PL_lex_dojoin = 2;
2198 PL_expect = XOPERATOR;
2199 PL_bufptr++;
2200 }
2201 return funny;
2202}
2203
2204void
2205Perl_yyunlex(pTHX)
2206{
2207 int yyc = PL_parser->yychar;
2208 if (yyc != YYEMPTY) {
2209 if (yyc) {
2210 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2211 if (yyc == PERLY_BRACE_OPEN || yyc == HASHBRACK || yyc == PERLY_BRACKET_OPEN) {
2212 PL_lex_allbrackets--;
2213 PL_lex_brackets--;
2214 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2215 } else if (yyc == PERLY_PAREN_OPEN) {
2216 PL_lex_allbrackets--;
2217 yyc |= (2<<24);
2218 }
2219 force_next(yyc);
2220 }
2221 PL_parser->yychar = YYEMPTY;
2222 }
2223}
2224
2225STATIC SV *
2226S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2227{
2228 SV * const sv = newSVpvn_utf8(start, len,
2229 ! IN_BYTES
2230 && UTF
2231 && len != 0
2232 && is_utf8_non_invariant_string((const U8*)start, len));
2233 return sv;
2234}
2235
2236/*
2237 * S_force_word
2238 * When the lexer knows the next thing is a word (for instance, it has
2239 * just seen -> and it knows that the next char is a word char, then
2240 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2241 * lookahead.
2242 *
2243 * Arguments:
2244 * char *start : buffer position (must be within PL_linestr)
2245 * int token : PL_next* will be this type of bare word
2246 * (e.g., METHCALL0,BAREWORD)
2247 * int check_keyword : if true, Perl checks to make sure the word isn't
2248 * a keyword (do this if the word is a label, e.g. goto FOO)
2249 * int allow_pack : if true, : characters will also be allowed (require,
2250 * use, etc. do this)
2251 */
2252
2253STATIC char *
2254S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2255{
2256 char *s;
2257 STRLEN len;
2258
2259 PERL_ARGS_ASSERT_FORCE_WORD;
2260
2261 start = skipspace(start);
2262 s = start;
2263 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2264 || (allow_pack && *s == ':' && s[1] == ':') )
2265 {
2266 s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len, allow_pack);
2267 if (check_keyword) {
2268 char *s2 = PL_tokenbuf;
2269 STRLEN len2 = len;
2270 if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2271 s2 += sizeof("CORE::") - 1;
2272 len2 -= sizeof("CORE::") - 1;
2273 }
2274 if (keyword(s2, len2, 0))
2275 return start;
2276 }
2277 if (token == METHCALL0) {
2278 s = skipspace(s);
2279 if (*s == '(')
2280 PL_expect = XTERM;
2281 else {
2282 PL_expect = XOPERATOR;
2283 }
2284 }
2285 NEXTVAL_NEXTTOKE.opval
2286 = newSVOP(OP_CONST,0,
2287 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2288 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2289 force_next(token);
2290 }
2291 return s;
2292}
2293
2294/*
2295 * S_force_ident
2296 * Called when the lexer wants $foo *foo &foo etc, but the program
2297 * text only contains the "foo" portion. The first argument is a pointer
2298 * to the "foo", and the second argument is the type symbol to prefix.
2299 * Forces the next token to be a "BAREWORD".
2300 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2301 */
2302
2303STATIC void
2304S_force_ident(pTHX_ const char *s, int kind)
2305{
2306 PERL_ARGS_ASSERT_FORCE_IDENT;
2307
2308 if (s[0]) {
2309 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2310 OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2311 UTF ? SVf_UTF8 : 0));
2312 NEXTVAL_NEXTTOKE.opval = o;
2313 force_next(BAREWORD);
2314 if (kind) {
2315 o->op_private = OPpCONST_ENTERED;
2316 /* XXX see note in pp_entereval() for why we forgo typo
2317 warnings if the symbol must be introduced in an eval.
2318 GSAR 96-10-12 */
2319 gv_fetchpvn_flags(s, len,
2320 (PL_in_eval ? GV_ADDMULTI
2321 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2322 kind == PERLY_DOLLAR ? SVt_PV :
2323 kind == PERLY_SNAIL ? SVt_PVAV :
2324 kind == PERLY_PERCENT_SIGN ? SVt_PVHV :
2325 SVt_PVGV
2326 );
2327 }
2328 }
2329}
2330
2331static void
2332S_force_ident_maybe_lex(pTHX_ char pit)
2333{
2334 NEXTVAL_NEXTTOKE.ival = pit;
2335 force_next('p');
2336}
2337
2338NV
2339Perl_str_to_version(pTHX_ SV *sv)
2340{
2341 NV retval = 0.0;
2342 NV nshift = 1.0;
2343 STRLEN len;
2344 const char *start = SvPV_const(sv,len);
2345 const char * const end = start + len;
2346 const bool utf = cBOOL(SvUTF8(sv));
2347
2348 PERL_ARGS_ASSERT_STR_TO_VERSION;
2349
2350 while (start < end) {
2351 STRLEN skip;
2352 UV n;
2353 if (utf)
2354 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2355 else {
2356 n = *(U8*)start;
2357 skip = 1;
2358 }
2359 retval += ((NV)n)/nshift;
2360 start += skip;
2361 nshift *= 1000;
2362 }
2363 return retval;
2364}
2365
2366/*
2367 * S_force_version
2368 * Forces the next token to be a version number.
2369 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2370 * and if "guessing" is TRUE, then no new token is created (and the caller
2371 * must use an alternative parsing method).
2372 */
2373
2374STATIC char *
2375S_force_version(pTHX_ char *s, int guessing)
2376{
2377 OP *version = NULL;
2378 char *d;
2379
2380 PERL_ARGS_ASSERT_FORCE_VERSION;
2381
2382 s = skipspace(s);
2383
2384 d = s;
2385 if (*d == 'v')
2386 d++;
2387 if (isDIGIT(*d)) {
2388 while (isDIGIT(*d) || *d == '_' || *d == '.')
2389 d++;
2390 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2391 SV *ver;
2392 s = scan_num(s, &pl_yylval);
2393 version = pl_yylval.opval;
2394 ver = cSVOPx(version)->op_sv;
2395 if (SvPOK(ver) && !SvNIOK(ver)) {
2396 SvUPGRADE(ver, SVt_PVNV);
2397 SvNV_set(ver, str_to_version(ver));
2398 SvNOK_on(ver); /* hint that it is a version */
2399 }
2400 }
2401 else if (guessing) {
2402 return s;
2403 }
2404 }
2405
2406 /* NOTE: The parser sees the package name and the VERSION swapped */
2407 NEXTVAL_NEXTTOKE.opval = version;
2408 force_next(BAREWORD);
2409
2410 return s;
2411}
2412
2413/*
2414 * S_force_strict_version
2415 * Forces the next token to be a version number using strict syntax rules.
2416 */
2417
2418STATIC char *
2419S_force_strict_version(pTHX_ char *s)
2420{
2421 OP *version = NULL;
2422 const char *errstr = NULL;
2423
2424 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2425
2426 while (isSPACE(*s)) /* leading whitespace */
2427 s++;
2428
2429 if (is_STRICT_VERSION(s,&errstr)) {
2430 SV *ver = newSV_type(SVt_NULL);
2431 s = (char *)scan_version(s, ver, 0);
2432 version = newSVOP(OP_CONST, 0, ver);
2433 }
2434 else if ((*s != ';' && *s != ':' && *s != '{' && *s != '}' )
2435 && (s = skipspace(s), (*s != ';' && *s != ':' && *s != '{' && *s != '}' )))
2436 {
2437 PL_bufptr = s;
2438 if (errstr)
2439 yyerror(errstr); /* version required */
2440 return s;
2441 }
2442
2443 /* NOTE: The parser sees the package name and the VERSION swapped */
2444 NEXTVAL_NEXTTOKE.opval = version;
2445 force_next(BAREWORD);
2446
2447 return s;
2448}
2449
2450/*
2451 * S_tokeq
2452 * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2453 * modified as necessary. However, if HINT_NEW_STRING is on, 'sv' is
2454 * unchanged, and a new SV containing the modified input is returned.
2455 */
2456
2457STATIC SV *
2458S_tokeq(pTHX_ SV *sv)
2459{
2460 char *s;
2461 char *send;
2462 char *d;
2463 SV *pv = sv;
2464
2465 PERL_ARGS_ASSERT_TOKEQ;
2466
2467 assert (SvPOK(sv));
2468 assert (SvLEN(sv));
2469 assert (!SvIsCOW(sv));
2470 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2471 goto finish;
2472 s = SvPVX(sv);
2473 send = SvEND(sv);
2474 /* This is relying on the SV being "well formed" with a trailing '\0' */
2475 while (s < send && !(*s == '\\' && s[1] == '\\'))
2476 s++;
2477 if (s == send)
2478 goto finish;
2479 d = s;
2480 if ( PL_hints & HINT_NEW_STRING ) {
2481 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2482 SVs_TEMP | SvUTF8(sv));
2483 }
2484 while (s < send) {
2485 if (*s == '\\') {
2486 if (s + 1 < send && (s[1] == '\\'))
2487 s++; /* all that, just for this */
2488 }
2489 *d++ = *s++;
2490 }
2491 *d = '\0';
2492 SvCUR_set(sv, d - SvPVX_const(sv));
2493 finish:
2494 if ( PL_hints & HINT_NEW_STRING )
2495 return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2496 return sv;
2497}
2498
2499/*
2500 * Now come three functions related to double-quote context,
2501 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2502 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2503 * interact with PL_lex_state, and create fake ( ... ) argument lists
2504 * to handle functions and concatenation.
2505 * For example,
2506 * "foo\lbar"
2507 * is tokenised as
2508 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2509 */
2510
2511/*
2512 * S_sublex_start
2513 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2514 *
2515 * Pattern matching will set PL_lex_op to the pattern-matching op to
2516 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2517 *
2518 * OP_CONST is easy--just make the new op and return.
2519 *
2520 * Everything else becomes a FUNC.
2521 *
2522 * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2523 * had an OP_CONST. This just sets us up for a
2524 * call to S_sublex_push().
2525 */
2526
2527STATIC I32
2528S_sublex_start(pTHX)
2529{
2530 const I32 op_type = pl_yylval.ival;
2531
2532 if (op_type == OP_NULL) {
2533 pl_yylval.opval = PL_lex_op;
2534 PL_lex_op = NULL;
2535 return THING;
2536 }
2537 if (op_type == OP_CONST) {
2538 SV *sv = PL_lex_stuff;
2539 PL_lex_stuff = NULL;
2540 sv = tokeq(sv);
2541
2542 if (SvTYPE(sv) == SVt_PVIV) {
2543 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2544 STRLEN len;
2545 const char * const p = SvPV_const(sv, len);
2546 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2547 SvREFCNT_dec(sv);
2548 sv = nsv;
2549 }
2550 pl_yylval.opval = newSVOP(op_type, 0, sv);
2551 return THING;
2552 }
2553
2554 PL_parser->lex_super_state = PL_lex_state;
2555 PL_parser->lex_sub_inwhat = (U16)op_type;
2556 PL_parser->lex_sub_op = PL_lex_op;
2557 PL_parser->sub_no_recover = FALSE;
2558 PL_parser->sub_error_count = PL_error_count;
2559 PL_lex_state = LEX_INTERPPUSH;
2560
2561 PL_expect = XTERM;
2562 if (PL_lex_op) {
2563 pl_yylval.opval = PL_lex_op;
2564 PL_lex_op = NULL;
2565 return PMFUNC;
2566 }
2567 else
2568 return FUNC;
2569}
2570
2571/*
2572 * S_sublex_push
2573 * Create a new scope to save the lexing state. The scope will be
2574 * ended in S_sublex_done. Returns a '(', starting the function arguments
2575 * to the uc, lc, etc. found before.
2576 * Sets PL_lex_state to LEX_INTERPCONCAT.
2577 */
2578
2579STATIC I32
2580S_sublex_push(pTHX)
2581{
2582 LEXSHARED *shared;
2583 const bool is_heredoc = PL_multi_close == '<';
2584 ENTER;
2585
2586 PL_lex_state = PL_parser->lex_super_state;
2587 SAVEI8(PL_lex_dojoin);
2588 SAVEI32(PL_lex_brackets);
2589 SAVEI32(PL_lex_allbrackets);
2590 SAVEI32(PL_lex_formbrack);
2591 SAVEI8(PL_lex_fakeeof);
2592 SAVEI32(PL_lex_casemods);
2593 SAVEI32(PL_lex_starts);
2594 SAVEI8(PL_lex_state);
2595 SAVESPTR(PL_lex_repl);
2596 SAVEVPTR(PL_lex_inpat);
2597 SAVEI16(PL_lex_inwhat);
2598 if (is_heredoc)
2599 {
2600 SAVECOPLINE(PL_curcop);
2601 SAVEI32(PL_multi_end);
2602 SAVEI32(PL_parser->herelines);
2603 PL_parser->herelines = 0;
2604 }
2605 SAVEIV(PL_multi_close);
2606 SAVEPPTR(PL_bufptr);
2607 SAVEPPTR(PL_bufend);
2608 SAVEPPTR(PL_oldbufptr);
2609 SAVEPPTR(PL_oldoldbufptr);
2610 SAVEPPTR(PL_last_lop);
2611 SAVEPPTR(PL_last_uni);
2612 SAVEPPTR(PL_linestart);
2613 SAVESPTR(PL_linestr);
2614 SAVEGENERICPV(PL_lex_brackstack);
2615 SAVEGENERICPV(PL_lex_casestack);
2616 SAVEGENERICPV(PL_parser->lex_shared);
2617 SAVEBOOL(PL_parser->lex_re_reparsing);
2618 SAVEI32(PL_copline);
2619
2620 /* The here-doc parser needs to be able to peek into outer lexing
2621 scopes to find the body of the here-doc. So we put PL_linestr and
2622 PL_bufptr into lex_shared, to 'share' those values.
2623 */
2624 PL_parser->lex_shared->ls_linestr = PL_linestr;
2625 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2626
2627 PL_linestr = PL_lex_stuff;
2628 PL_lex_repl = PL_parser->lex_sub_repl;
2629 PL_lex_stuff = NULL;
2630 PL_parser->lex_sub_repl = NULL;
2631
2632 /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2633 set for an inner quote-like operator and then an error causes scope-
2634 popping. We must not have a PL_lex_stuff value left dangling, as
2635 that breaks assumptions elsewhere. See bug #123617. */
2636 SAVEGENERICSV(PL_lex_stuff);
2637 SAVEGENERICSV(PL_parser->lex_sub_repl);
2638
2639 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2640 = SvPVX(PL_linestr);
2641 PL_bufend += SvCUR(PL_linestr);
2642 PL_last_lop = PL_last_uni = NULL;
2643 SAVEFREESV(PL_linestr);
2644 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2645
2646 PL_lex_dojoin = FALSE;
2647 PL_lex_brackets = PL_lex_formbrack = 0;
2648 PL_lex_allbrackets = 0;
2649 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2650 Newx(PL_lex_brackstack, 120, char);
2651 Newx(PL_lex_casestack, 12, char);
2652 PL_lex_casemods = 0;
2653 *PL_lex_casestack = '\0';
2654 PL_lex_starts = 0;
2655 PL_lex_state = LEX_INTERPCONCAT;
2656 if (is_heredoc)
2657 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2658 PL_copline = NOLINE;
2659
2660 Newxz(shared, 1, LEXSHARED);
2661 shared->ls_prev = PL_parser->lex_shared;
2662 PL_parser->lex_shared = shared;
2663
2664 PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2665 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2666 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2667 PL_lex_inpat = PL_parser->lex_sub_op;
2668 else
2669 PL_lex_inpat = NULL;
2670
2671 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2672 PL_in_eval &= ~EVAL_RE_REPARSING;
2673
2674 return SUBLEXSTART;
2675}
2676
2677/*
2678 * S_sublex_done
2679 * Restores lexer state after a S_sublex_push.
2680 */
2681
2682STATIC I32
2683S_sublex_done(pTHX)
2684{
2685 if (!PL_lex_starts++) {
2686 SV * const sv = newSVpvs("");
2687 if (SvUTF8(PL_linestr))
2688 SvUTF8_on(sv);
2689 PL_expect = XOPERATOR;
2690 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2691 return THING;
2692 }
2693
2694 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2695 PL_lex_state = LEX_INTERPCASEMOD;
2696 return yylex();
2697 }
2698
2699 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2700 assert(PL_lex_inwhat != OP_TRANSR);
2701 if (PL_lex_repl) {
2702 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2703 PL_linestr = PL_lex_repl;
2704 PL_lex_inpat = 0;
2705 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2706 PL_bufend += SvCUR(PL_linestr);
2707 PL_last_lop = PL_last_uni = NULL;
2708 PL_lex_dojoin = FALSE;
2709 PL_lex_brackets = 0;
2710 PL_lex_allbrackets = 0;
2711 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2712 PL_lex_casemods = 0;
2713 *PL_lex_casestack = '\0';
2714 PL_lex_starts = 0;
2715 if (SvEVALED(PL_lex_repl)) {
2716 PL_lex_state = LEX_INTERPNORMAL;
2717 PL_lex_starts++;
2718 /* we don't clear PL_lex_repl here, so that we can check later
2719 whether this is an evalled subst; that means we rely on the
2720 logic to ensure sublex_done() is called again only via the
2721 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2722 }
2723 else {
2724 PL_lex_state = LEX_INTERPCONCAT;
2725 PL_lex_repl = NULL;
2726 }
2727 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2728 CopLINE(PL_curcop) +=
2729 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2730 + PL_parser->herelines;
2731 PL_parser->herelines = 0;
2732 }
2733 return PERLY_SLASH;
2734 }
2735 else {
2736 const line_t l = CopLINE(PL_curcop);
2737 LEAVE;
2738 if (PL_parser->sub_error_count != PL_error_count) {
2739 if (PL_parser->sub_no_recover) {
2740 yyquit();
2741 NOT_REACHED;
2742 }
2743 }
2744 if (PL_multi_close == '<')
2745 PL_parser->herelines += l - PL_multi_end;
2746 PL_bufend = SvPVX(PL_linestr);
2747 PL_bufend += SvCUR(PL_linestr);
2748 PL_expect = XOPERATOR;
2749 return SUBLEXEND;
2750 }
2751}
2752
2753HV *
2754Perl_load_charnames(pTHX_ SV * char_name, const char * context,
2755 const STRLEN context_len, const char ** error_msg)
2756{
2757 /* Load the official _charnames module if not already there. The
2758 * parameters are just to give info for any error messages generated:
2759 * char_name a name to look up which is the reason for loading this
2760 * context 'char_name' in the context in the input in which it appears
2761 * context_len how many bytes 'context' occupies
2762 * error_msg *error_msg will be set to any error
2763 *
2764 * Returns the ^H table if success; otherwise NULL */
2765
2766 unsigned int i;
2767 HV * table;
2768 SV **cvp;
2769 SV * res;
2770
2771 PERL_ARGS_ASSERT_LOAD_CHARNAMES;
2772
2773 /* This loop is executed 1 1/2 times. On the first time through, if it
2774 * isn't already loaded, try loading it, and iterate just once to see if it
2775 * worked. */
2776 for (i = 0; i < 2; i++) {
2777 table = GvHV(PL_hintgv); /* ^H */
2778
2779 if ( table
2780 && (PL_hints & HINT_LOCALIZE_HH)
2781 && (cvp = hv_fetchs(table, "charnames", FALSE))
2782 && SvOK(*cvp))
2783 {
2784 return table; /* Quit if already loaded */
2785 }
2786
2787 if (i == 0) {
2788 Perl_load_module(aTHX_
2789 0,
2790 newSVpvs("_charnames"),
2791
2792 /* version parameter; no need to specify it, as if we get too early
2793 * a version, will fail anyway, not being able to find 'charnames'
2794 * */
2795 NULL,
2796 newSVpvs(":full"),
2797 newSVpvs(":short"),
2798 NULL);
2799 }
2800 }
2801
2802 /* Here, it failed; new_constant will give appropriate error messages */
2803 *error_msg = NULL;
2804 res = new_constant( NULL, 0, "charnames", char_name, NULL,
2805 context, context_len, error_msg);
2806 SvREFCNT_dec(res);
2807
2808 return NULL;
2809}
2810
2811STATIC SV*
2812S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2813{
2814 /* This justs wraps get_and_check_backslash_N_name() to output any error
2815 * message it returns. */
2816
2817 const char * error_msg = NULL;
2818 SV * result;
2819
2820 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2821
2822 /* charnames doesn't work well if there have been errors found */
2823 if (PL_error_count > 0) {
2824 return NULL;
2825 }
2826
2827 result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2828
2829 if (error_msg) {
2830 yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2831 }
2832
2833 return result;
2834}
2835
2836SV*
2837Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2838 const char* e,
2839 const bool is_utf8,
2840 const char ** error_msg)
2841{
2842 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2843 * interior, hence to the "}". Finds what the name resolves to, returning
2844 * an SV* containing it; NULL if no valid one found.
2845 *
2846 * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2847 * doesn't have to be. */
2848
2849 SV* char_name;
2850 SV* res;
2851 HV * table;
2852 SV **cvp;
2853 SV *cv;
2854 SV *rv;
2855 HV *stash;
2856
2857 /* Points to the beginning of the \N{... so that any messages include the
2858 * context of what's failing*/
2859 const char* context = s - 3;
2860 STRLEN context_len = e - context + 1; /* include all of \N{...} */
2861
2862
2863 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2864
2865 assert(e >= s);
2866 assert(s > (char *) 3);
2867
2868 while (s < e && isBLANK(*s)) {
2869 s++;
2870 }
2871
2872 while (s < e && isBLANK(*(e - 1))) {
2873 e--;
2874 }
2875
2876 char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2877
2878 if (!SvCUR(char_name)) {
2879 SvREFCNT_dec_NN(char_name);
2880 /* diag_listed_as: Unknown charname '%s' */
2881 *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2882 return NULL;
2883 }
2884
2885 /* Autoload the charnames module */
2886
2887 table = load_charnames(char_name, context, context_len, error_msg);
2888 if (table == NULL) {
2889 return NULL;
2890 }
2891
2892 *error_msg = NULL;
2893 res = new_constant( NULL, 0, "charnames", char_name, NULL,
2894 context, context_len, error_msg);
2895 if (*error_msg) {
2896 *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name));
2897
2898 SvREFCNT_dec(res);
2899 return NULL;
2900 }
2901
2902 /* See if the charnames handler is the Perl core's, and if so, we can skip
2903 * the validation needed for a user-supplied one, as Perl's does its own
2904 * validation. */
2905 cvp = hv_fetchs(table, "charnames", FALSE);
2906 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2907 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2908 {
2909 const char * const name = HvNAME(stash);
2910 if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2911 return res;
2912 }
2913 }
2914
2915 /* Here, it isn't Perl's charname handler. We can't rely on a
2916 * user-supplied handler to validate the input name. For non-ut8 input,
2917 * look to see that the first character is legal. Then loop through the
2918 * rest checking that each is a continuation */
2919
2920 /* This code makes the reasonable assumption that the only Latin1-range
2921 * characters that begin a character name alias are alphabetic, otherwise
2922 * would have to create a isCHARNAME_BEGIN macro */
2923
2924 if (! is_utf8) {
2925 if (! isALPHAU(*s)) {
2926 goto bad_charname;
2927 }
2928 s++;
2929 while (s < e) {
2930 if (! isCHARNAME_CONT(*s)) {
2931 goto bad_charname;
2932 }
2933 if (*s == ' ' && *(s-1) == ' ') {
2934 goto multi_spaces;
2935 }
2936 s++;
2937 }
2938 }
2939 else {
2940 /* Similarly for utf8. For invariants can check directly; for other
2941 * Latin1, can calculate their code point and check; otherwise use an
2942 * inversion list */
2943 if (UTF8_IS_INVARIANT(*s)) {
2944 if (! isALPHAU(*s)) {
2945 goto bad_charname;
2946 }
2947 s++;
2948 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2949 if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2950 goto bad_charname;
2951 }
2952 s += 2;
2953 }
2954 else {
2955 if (! _invlist_contains_cp(PL_utf8_charname_begin,
2956 utf8_to_uvchr_buf((U8 *) s,
2957 (U8 *) e,
2958 NULL)))
2959 {
2960 goto bad_charname;
2961 }
2962 s += UTF8SKIP(s);
2963 }
2964
2965 while (s < e) {
2966 if (UTF8_IS_INVARIANT(*s)) {
2967 if (! isCHARNAME_CONT(*s)) {
2968 goto bad_charname;
2969 }
2970 if (*s == ' ' && *(s-1) == ' ') {
2971 goto multi_spaces;
2972 }
2973 s++;
2974 }
2975 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2976 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2977 {
2978 goto bad_charname;
2979 }
2980 s += 2;
2981 }
2982 else {
2983 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2984 utf8_to_uvchr_buf((U8 *) s,
2985 (U8 *) e,
2986 NULL)))
2987 {
2988 goto bad_charname;
2989 }
2990 s += UTF8SKIP(s);
2991 }
2992 }
2993 }
2994 if (*(s-1) == ' ') {
2995 /* diag_listed_as: charnames alias definitions may not contain
2996 trailing white-space; marked by <-- HERE in %s
2997 */
2998 *error_msg = Perl_form(aTHX_
2999 "charnames alias definitions may not contain trailing "
3000 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
3001 (int)(s - context + 1), context,
3002 (int)(e - s + 1), s + 1);
3003 return NULL;
3004 }
3005
3006 if (SvUTF8(res)) { /* Don't accept malformed charname value */
3007 const U8* first_bad_char_loc;
3008 STRLEN len;
3009 const char* const str = SvPV_const(res, len);
3010 if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
3011 &first_bad_char_loc)))
3012 {
3013 _force_out_malformed_utf8_message(first_bad_char_loc,
3014 (U8 *) PL_parser->bufend,
3015 0,
3016 0 /* 0 means don't die */ );
3017 /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
3018 immediately after '%s' */
3019 *error_msg = Perl_form(aTHX_
3020 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
3021 (int) context_len, context,
3022 (int) ((char *) first_bad_char_loc - str), str);
3023 return NULL;
3024 }
3025 }
3026
3027 return res;
3028
3029 bad_charname: {
3030
3031 /* The final %.*s makes sure that should the trailing NUL be missing
3032 * that this print won't run off the end of the string */
3033 /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
3034 in \N{%s} */
3035 *error_msg = Perl_form(aTHX_
3036 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
3037 (int)(s - context + 1), context,
3038 (int)(e - s + 1), s + 1);
3039 return NULL;
3040 }
3041
3042 multi_spaces:
3043 /* diag_listed_as: charnames alias definitions may not contain a
3044 sequence of multiple spaces; marked by <-- HERE
3045 in %s */
3046 *error_msg = Perl_form(aTHX_
3047 "charnames alias definitions may not contain a sequence of "
3048 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
3049 (int)(s - context + 1), context,
3050 (int)(e - s + 1), s + 1);
3051 return NULL;
3052}
3053
3054/*
3055 scan_const
3056
3057 Extracts the next constant part of a pattern, double-quoted string,
3058 or transliteration. This is terrifying code.
3059
3060 For example, in parsing the double-quoted string "ab\x63$d", it would
3061 stop at the '$' and return an OP_CONST containing 'abc'.
3062
3063 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3064 processing a pattern (PL_lex_inpat is true), a transliteration
3065 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3066
3067 Returns a pointer to the character scanned up to. If this is
3068 advanced from the start pointer supplied (i.e. if anything was
3069 successfully parsed), will leave an OP_CONST for the substring scanned
3070 in pl_yylval. Caller must intuit reason for not parsing further
3071 by looking at the next characters herself.
3072
3073 In patterns:
3074 expand:
3075 \N{FOO} => \N{U+hex_for_character_FOO}
3076 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3077
3078 pass through:
3079 all other \-char, including \N and \N{ apart from \N{ABC}
3080
3081 stops on:
3082 @ and $ where it appears to be a var, but not for $ as tail anchor
3083 \l \L \u \U \Q \E
3084 (?{ or (??{ or (*{
3085
3086 In transliterations:
3087 characters are VERY literal, except for - not at the start or end
3088 of the string, which indicates a range. However some backslash sequences
3089 are recognized: \r, \n, and the like
3090 \007 \o{}, \x{}, \N{}
3091 If all elements in the transliteration are below 256,
3092 scan_const expands the range to the full set of intermediate
3093 characters. If the range is in utf8, the hyphen is replaced with
3094 a certain range mark which will be handled by pmtrans() in op.c.
3095
3096 In double-quoted strings:
3097 backslashes:
3098 all those recognized in transliterations
3099 deprecated backrefs: \1 (in substitution replacements)
3100 case and quoting: \U \Q \E
3101 stops on @ and $
3102
3103 scan_const does *not* construct ops to handle interpolated strings.
3104 It stops processing as soon as it finds an embedded $ or @ variable
3105 and leaves it to the caller to work out what's going on.
3106
3107 embedded arrays (whether in pattern or not) could be:
3108 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3109
3110 $ in double-quoted strings must be the symbol of an embedded scalar.
3111
3112 $ in pattern could be $foo or could be tail anchor. Assumption:
3113 it's a tail anchor if $ is the last thing in the string, or if it's
3114 followed by one of "()| \r\n\t"
3115
3116 \1 (backreferences) are turned into $1 in substitutions
3117
3118 The structure of the code is
3119 while (there's a character to process) {
3120 handle transliteration ranges
3121 skip regexp comments /(?#comment)/ and codes /(?{code})/ ((*{code})/
3122 skip #-initiated comments in //x patterns
3123 check for embedded arrays
3124 check for embedded scalars
3125 if (backslash) {
3126 deprecate \1 in substitution replacements
3127 handle string-changing backslashes \l \U \Q \E, etc.
3128 switch (what was escaped) {
3129 handle \- in a transliteration (becomes a literal -)
3130 if a pattern and not \N{, go treat as regular character
3131 handle \132 (octal characters)
3132 handle \x15 and \x{1234} (hex characters)
3133 handle \N{name} (named characters, also \N{3,5} in a pattern)
3134 handle \cV (control characters)
3135 handle printf-style backslashes (\f, \r, \n, etc)
3136 } (end switch)
3137 continue
3138 } (end if backslash)
3139 handle regular character
3140 } (end while character to read)
3141
3142*/
3143
3144STATIC char *
3145S_scan_const(pTHX_ char *start)
3146{
3147 const char * const send = PL_bufend;/* end of the constant */
3148 SV *sv = newSV(send - start); /* sv for the constant. See note below
3149 on sizing. */
3150 char *s = start; /* start of the constant */
3151 char *d = SvPVX(sv); /* destination for copies */
3152 bool dorange = FALSE; /* are we in a translit range? */
3153 bool didrange = FALSE; /* did we just finish a range? */
3154 bool in_charclass = FALSE; /* within /[...]/ */
3155 const bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be
3156 UTF8? But, this can show as true
3157 when the source isn't utf8, as for
3158 example when it is entirely composed
3159 of hex constants */
3160 bool d_is_utf8 = FALSE; /* Output constant is UTF8 */
3161 STRLEN utf8_variant_count = 0; /* When not in UTF-8, this counts the
3162 number of characters found so far
3163 that will expand (into 2 bytes)
3164 should we have to convert to
3165 UTF-8) */
3166 SV *res; /* result from charnames */
3167 STRLEN offset_to_max = 0; /* The offset in the output to where the range
3168 high-end character is temporarily placed */
3169
3170 /* Does something require special handling in tr/// ? This avoids extra
3171 * work in a less likely case. As such, khw didn't feel it was worth
3172 * adding any branches to the more mainline code to handle this, which
3173 * means that this doesn't get set in some circumstances when things like
3174 * \x{100} get expanded out. As a result there needs to be extra testing
3175 * done in the tr code */
3176 bool has_above_latin1 = FALSE;
3177
3178 /* Note on sizing: The scanned constant is placed into sv, which is
3179 * initialized by newSV() assuming one byte of output for every byte of
3180 * input. This routine expects newSV() to allocate an extra byte for a
3181 * trailing NUL, which this routine will append if it gets to the end of
3182 * the input. There may be more bytes of input than output (eg., \N{LATIN
3183 * CAPITAL LETTER A}), or more output than input if the constant ends up
3184 * recoded to utf8, but each time a construct is found that might increase
3185 * the needed size, SvGROW() is called. Its size parameter each time is
3186 * based on the best guess estimate at the time, namely the length used so
3187 * far, plus the length the current construct will occupy, plus room for
3188 * the trailing NUL, plus one byte for every input byte still unscanned */
3189
3190 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3191 before set */
3192#ifdef EBCDIC
3193 int backslash_N = 0; /* ? was the character from \N{} */
3194 int non_portable_endpoint = 0; /* ? In a range is an endpoint
3195 platform-specific like \x65 */
3196#endif
3197
3198 PERL_ARGS_ASSERT_SCAN_CONST;
3199
3200 assert(PL_lex_inwhat != OP_TRANSR);
3201
3202 /* Protect sv from errors and fatal warnings. */
3203 ENTER_with_name("scan_const");
3204 SAVEFREESV(sv);
3205
3206 /* A bunch of code in the loop below assumes that if s[n] exists and is not
3207 * NUL, then s[n+1] exists. This assertion makes sure that assumption is
3208 * valid */
3209 assert(*send == '\0');
3210
3211 while (s < send
3212 || dorange /* Handle tr/// range at right edge of input */
3213 ) {
3214
3215 /* get transliterations out of the way (they're most literal) */
3216 if (PL_lex_inwhat == OP_TRANS) {
3217
3218 /* But there isn't any special handling necessary unless there is a
3219 * range, so for most cases we just drop down and handle the value
3220 * as any other. There are two exceptions.
3221 *
3222 * 1. A hyphen indicates that we are actually going to have a
3223 * range. In this case, skip the '-', set a flag, then drop
3224 * down to handle what should be the end range value.
3225 * 2. After we've handled that value, the next time through, that
3226 * flag is set and we fix up the range.
3227 *
3228 * Ranges entirely within Latin1 are expanded out entirely, in
3229 * order to make the transliteration a simple table look-up.
3230 * Ranges that extend above Latin1 have to be done differently, so
3231 * there is no advantage to expanding them here, so they are
3232 * stored here as Min, RANGE_INDICATOR, Max. 'RANGE_INDICATOR' is
3233 * a byte that can't occur in legal UTF-8, and hence can signify a
3234 * hyphen without any possible ambiguity. On EBCDIC machines, if
3235 * the range is expressed as Unicode, the Latin1 portion is
3236 * expanded out even if the range extends above Latin1. This is
3237 * because each code point in it has to be processed here
3238 * individually to get its native translation */
3239
3240 if (! dorange) {
3241
3242 /* Here, we don't think we're in a range. If the new character
3243 * is not a hyphen; or if it is a hyphen, but it's too close to
3244 * either edge to indicate a range, or if we haven't output any
3245 * characters yet then it's a regular character. */
3246 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv))
3247 {
3248
3249 /* A regular character. Process like any other, but first
3250 * clear any flags */
3251 didrange = FALSE;
3252 dorange = FALSE;
3253#ifdef EBCDIC
3254 non_portable_endpoint = 0;
3255 backslash_N = 0;
3256#endif
3257 /* The tests here for being above Latin1 and similar ones
3258 * in the following 'else' suffice to find all such
3259 * occurences in the constant, except those added by a
3260 * backslash escape sequence, like \x{100}. Mostly, those
3261 * set 'has_above_latin1' as appropriate */
3262 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3263 has_above_latin1 = TRUE;
3264 }
3265
3266 /* Drops down to generic code to process current byte */
3267 }
3268 else { /* Is a '-' in the context where it means a range */
3269 if (didrange) { /* Something like y/A-C-Z// */
3270 Perl_croak(aTHX_ "Ambiguous range in transliteration"
3271 " operator");
3272 }
3273
3274 dorange = TRUE;
3275
3276 s++; /* Skip past the hyphen */
3277
3278 /* d now points to where the end-range character will be
3279 * placed. Drop down to get that character. We'll finish
3280 * processing the range the next time through the loop */
3281
3282 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3283 has_above_latin1 = TRUE;
3284 }
3285
3286 /* Drops down to generic code to process current byte */
3287 }
3288 } /* End of not a range */
3289 else {
3290 /* Here we have parsed a range. Now must handle it. At this
3291 * point:
3292 * 'sv' is a SV* that contains the output string we are
3293 * constructing. The final two characters in that string
3294 * are the range start and range end, in order.
3295 * 'd' points to just beyond the range end in the 'sv' string,
3296 * where we would next place something
3297 */
3298 char * max_ptr;
3299 char * min_ptr;
3300 IV range_min;
3301 IV range_max; /* last character in range */
3302 STRLEN grow;
3303 Size_t offset_to_min = 0;
3304 Size_t extras = 0;
3305#ifdef EBCDIC
3306 bool convert_unicode;
3307 IV real_range_max = 0;
3308#endif
3309 /* Get the code point values of the range ends. */
3310 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1;
3311 offset_to_max = max_ptr - SvPVX_const(sv);
3312 if (d_is_utf8) {
3313 /* We know the utf8 is valid, because we just constructed
3314 * it ourselves in previous loop iterations */
3315 min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3316 range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3317 range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3318
3319 /* This compensates for not all code setting
3320 * 'has_above_latin1', so that we don't skip stuff that
3321 * should be executed */
3322 if (range_max > 255) {
3323 has_above_latin1 = TRUE;
3324 }
3325 }
3326 else {
3327 min_ptr = max_ptr - 1;
3328 range_min = * (U8*) min_ptr;
3329 range_max = * (U8*) max_ptr;
3330 }
3331
3332 /* If the range is just a single code point, like tr/a-a/.../,
3333 * that code point is already in the output, twice. We can
3334 * just back up over the second instance and avoid all the rest
3335 * of the work. But if it is a variant character, it's been
3336 * counted twice, so decrement. (This unlikely scenario is
3337 * special cased, like the one for a range of 2 code points
3338 * below, only because the main-line code below needs a range
3339 * of 3 or more to work without special casing. Might as well
3340 * get it out of the way now.) */
3341 if (UNLIKELY(range_max == range_min)) {
3342 d = max_ptr;
3343 if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3344 utf8_variant_count--;
3345 }
3346 goto range_done;
3347 }
3348
3349#ifdef EBCDIC
3350 /* On EBCDIC platforms, we may have to deal with portable
3351 * ranges. These happen if at least one range endpoint is a
3352 * Unicode value (\N{...}), or if the range is a subset of
3353 * [A-Z] or [a-z], and both ends are literal characters,
3354 * like 'A', and not like \x{C1} */
3355 convert_unicode =
3356 cBOOL(backslash_N) /* \N{} forces Unicode,
3357 hence portable range */
3358 || ( ! non_portable_endpoint
3359 && (( isLOWER_A(range_min) && isLOWER_A(range_max))
3360 || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3361 if (convert_unicode) {
3362
3363 /* Special handling is needed for these portable ranges.
3364 * They are defined to be in Unicode terms, which includes
3365 * all the Unicode code points between the end points.
3366 * Convert to Unicode to get the Unicode range. Later we
3367 * will convert each code point in the range back to
3368 * native. */
3369 range_min = NATIVE_TO_UNI(range_min);
3370 range_max = NATIVE_TO_UNI(range_max);
3371 }
3372#endif
3373
3374 if (range_min > range_max) {
3375#ifdef EBCDIC
3376 if (convert_unicode) {
3377 /* Need to convert back to native for meaningful
3378 * messages for this platform */
3379 range_min = UNI_TO_NATIVE(range_min);
3380 range_max = UNI_TO_NATIVE(range_max);
3381 }
3382#endif
3383 /* Use the characters themselves for the error message if
3384 * ASCII printables; otherwise some visible representation
3385 * of them */
3386 if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3387 Perl_croak(aTHX_
3388 "Invalid range \"%c-%c\" in transliteration operator",
3389 (char)range_min, (char)range_max);
3390 }
3391#ifdef EBCDIC
3392 else if (convert_unicode) {
3393 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3394 Perl_croak(aTHX_
3395 "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3396 UVXf "}\" in transliteration operator",
3397 range_min, range_max);
3398 }
3399#endif
3400 else {
3401 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3402 Perl_croak(aTHX_
3403 "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3404 " in transliteration operator",
3405 range_min, range_max);
3406 }
3407 }
3408
3409 /* If the range is exactly two code points long, they are
3410 * already both in the output */
3411 if (UNLIKELY(range_min + 1 == range_max)) {
3412 goto range_done;
3413 }
3414
3415 /* Here the range contains at least 3 code points */
3416
3417 if (d_is_utf8) {
3418
3419 /* If everything in the transliteration is below 256, we
3420 * can avoid special handling later. A translation table
3421 * for each of those bytes is created by op.c. So we
3422 * expand out all ranges to their constituent code points.
3423 * But if we've encountered something above 255, the
3424 * expanding won't help, so skip doing that. But if it's
3425 * EBCDIC, we may have to look at each character below 256
3426 * if we have to convert to/from Unicode values */
3427 if ( has_above_latin1
3428#ifdef EBCDIC
3429 && (range_min > 255 || ! convert_unicode)
3430#endif
3431 ) {
3432 const STRLEN off = d - SvPVX(sv);
3433 const STRLEN extra = 1 + (send - s) + 1;
3434 char *e;
3435
3436 /* Move the high character one byte to the right; then
3437 * insert between it and the range begin, an illegal
3438 * byte which serves to indicate this is a range (using
3439 * a '-' would be ambiguous). */
3440
3441 if (off + extra > SvLEN(sv)) {
3442 d = off + SvGROW(sv, off + extra);
3443 max_ptr = d - off + offset_to_max;
3444 }
3445
3446 e = d++;
3447 while (e-- > max_ptr) {
3448 *(e + 1) = *e;
3449 }
3450 *(e + 1) = (char) RANGE_INDICATOR;
3451 goto range_done;
3452 }
3453
3454 /* Here, we're going to expand out the range. For EBCDIC
3455 * the range can extend above 255 (not so in ASCII), so
3456 * for EBCDIC, split it into the parts above and below
3457 * 255/256 */
3458#ifdef EBCDIC
3459 if (range_max > 255) {
3460 real_range_max = range_max;
3461 range_max = 255;
3462 }
3463#endif
3464 }
3465
3466 /* Here we need to expand out the string to contain each
3467 * character in the range. Grow the output to handle this.
3468 * For non-UTF8, we need a byte for each code point in the
3469 * range, minus the three that we've already allocated for: the
3470 * hyphen, the min, and the max. For UTF-8, we need this
3471 * plus an extra byte for each code point that occupies two
3472 * bytes (is variant) when in UTF-8 (except we've already
3473 * allocated for the end points, including if they are
3474 * variants). For ASCII platforms and Unicode ranges on EBCDIC
3475 * platforms, it's easy to calculate a precise number. To
3476 * start, we count the variants in the range, which we need
3477 * elsewhere in this function anyway. (For the case where it
3478 * isn't easy to calculate, 'extras' has been initialized to 0,
3479 * and the calculation is done in a loop further down.) */
3480#ifdef EBCDIC
3481 if (convert_unicode)
3482#endif
3483 {
3484 /* This is executed unconditionally on ASCII, and for
3485 * Unicode ranges on EBCDIC. Under these conditions, all
3486 * code points above a certain value are variant; and none
3487 * under that value are. We just need to find out how much
3488 * of the range is above that value. We don't count the
3489 * end points here, as they will already have been counted
3490 * as they were parsed. */
3491 if (range_min >= UTF_CONTINUATION_MARK) {
3492
3493 /* The whole range is made up of variants */
3494 extras = (range_max - 1) - (range_min + 1) + 1;
3495 }
3496 else if (range_max >= UTF_CONTINUATION_MARK) {
3497
3498 /* Only the higher portion of the range is variants */
3499 extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3500 }
3501
3502 utf8_variant_count += extras;
3503 }
3504
3505 /* The base growth is the number of code points in the range,
3506 * not including the endpoints, which have already been sized
3507 * for (and output). We don't subtract for the hyphen, as it
3508 * has been parsed but not output, and the SvGROW below is
3509 * based only on what's been output plus what's left to parse.
3510 * */
3511 grow = (range_max - 1) - (range_min + 1) + 1;
3512
3513 if (d_is_utf8) {
3514#ifdef EBCDIC
3515 /* In some cases in EBCDIC, we haven't yet calculated a
3516 * precise amount needed for the UTF-8 variants. Just
3517 * assume the worst case, that everything will expand by a
3518 * byte */
3519 if (! convert_unicode) {
3520 grow *= 2;
3521 }
3522 else
3523#endif
3524 {
3525 /* Otherwise we know exactly how many variants there
3526 * are in the range. */
3527 grow += extras;
3528 }
3529 }
3530
3531 /* Grow, but position the output to overwrite the range min end
3532 * point, because in some cases we overwrite that */
3533 SvCUR_set(sv, d - SvPVX_const(sv));
3534 offset_to_min = min_ptr - SvPVX_const(sv);
3535
3536 /* See Note on sizing above. */
3537 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3538 + (send - s)
3539 + grow
3540 + 1 /* Trailing NUL */ );
3541
3542 /* Now, we can expand out the range. */
3543#ifdef EBCDIC
3544 if (convert_unicode) {
3545 SSize_t i;
3546
3547 /* Recall that the min and max are now in Unicode terms, so
3548 * we have to convert each character to its native
3549 * equivalent */
3550 if (d_is_utf8) {
3551 for (i = range_min; i <= range_max; i++) {
3552 append_utf8_from_native_byte(
3553 LATIN1_TO_NATIVE((U8) i),
3554 (U8 **) &d);
3555 }
3556 }
3557 else {
3558 for (i = range_min; i <= range_max; i++) {
3559 *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3560 }
3561 }
3562 }
3563 else
3564#endif
3565 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3566 {
3567 /* Here, no conversions are necessary, which means that the
3568 * first character in the range is already in 'd' and
3569 * valid, so we can skip overwriting it */
3570 if (d_is_utf8) {
3571 SSize_t i;
3572 d += UTF8SKIP(d);
3573 for (i = range_min + 1; i <= range_max; i++) {
3574 append_utf8_from_native_byte((U8) i, (U8 **) &d);
3575 }
3576 }
3577 else {
3578 SSize_t i;
3579 d++;
3580 assert(range_min + 1 <= range_max);
3581 for (i = range_min + 1; i < range_max; i++) {
3582#ifdef EBCDIC
3583 /* In this case on EBCDIC, we haven't calculated
3584 * the variants. Do it here, as we go along */
3585 if (! UVCHR_IS_INVARIANT(i)) {
3586 utf8_variant_count++;
3587 }
3588#endif
3589 *d++ = (char)i;
3590 }
3591
3592 /* The range_max is done outside the loop so as to
3593 * avoid having to special case not incrementing
3594 * 'utf8_variant_count' on EBCDIC (it's already been
3595 * counted when originally parsed) */
3596 *d++ = (char) range_max;
3597 }
3598 }
3599
3600#ifdef EBCDIC
3601 /* If the original range extended above 255, add in that
3602 * portion. */
3603 if (real_range_max) {
3604 *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3605 *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3606 if (real_range_max > 0x100) {
3607 if (real_range_max > 0x101) {
3608 *d++ = (char) RANGE_INDICATOR;
3609 }
3610 d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3611 }
3612 }
3613#endif
3614
3615 range_done:
3616 /* mark the range as done, and continue */
3617 didrange = TRUE;
3618 dorange = FALSE;
3619#ifdef EBCDIC
3620 non_portable_endpoint = 0;
3621 backslash_N = 0;
3622#endif
3623 continue;
3624 } /* End of is a range */
3625 } /* End of transliteration. Joins main code after these else's */
3626 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3627 char *s1 = s-1;
3628 int esc = 0;
3629 while (s1 >= start && *s1-- == '\\')
3630 esc = !esc;
3631 if (!esc)
3632 in_charclass = TRUE;
3633 }
3634 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3635 char *s1 = s-1;
3636 int esc = 0;
3637 while (s1 >= start && *s1-- == '\\')
3638 esc = !esc;
3639 if (!esc)
3640 in_charclass = FALSE;
3641 }
3642 /* skip for regexp comments /(?#comment)/, except for the last
3643 * char, which will be done separately. Stop on (?{..}) and
3644 * friends (??{ ... }) or (*{ ... }) */
3645 else if (*s == '(' && PL_lex_inpat && (s[1] == '?' || s[1] == '*') && !in_charclass) {
3646 if (s[1] == '?' && s[2] == '#') {
3647 if (s_is_utf8) {
3648 PERL_UINT_FAST8_T len = UTF8SKIP(s);
3649
3650 while (s + len < send && *s != ')') {
3651 Copy(s, d, len, U8);
3652 d += len;
3653 s += len;
3654 len = UTF8_SAFE_SKIP(s, send);
3655 }
3656 }
3657 else while (s+1 < send && *s != ')') {
3658 *d++ = *s++;
3659 }
3660 }
3661 else
3662 if (!PL_lex_casemods &&
3663 /* The following should match regcomp.c */
3664 ((s[1] == '?' && (s[2] == '{' /* (?{ ... }) */
3665 || (s[2] == '?' && s[3] == '{'))) || /* (??{ ... }) */
3666 (s[1] == '*' && (s[2] == '{' ))) /* (*{ ... }) */
3667 ){
3668 break;
3669 }
3670 }
3671 /* likewise skip #-initiated comments in //x patterns */
3672 else if (*s == '#'
3673 && PL_lex_inpat
3674 && !in_charclass
3675 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3676 {
3677 while (s < send && *s != '\n')
3678 *d++ = *s++;
3679 }
3680 /* no further processing of single-quoted regex */
3681 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3682 goto default_action;
3683
3684 /* check for embedded arrays
3685 * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3686 */
3687 else if (*s == '@' && s[1]) {
3688 if (UTF
3689 ? isIDFIRST_utf8_safe(s+1, send)
3690 : isWORDCHAR_A(s[1]))
3691 {
3692 break;
3693 }
3694 if (memCHRs(":'{$", s[1]))
3695 break;
3696 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3697 break; /* in regexp, neither @+ nor @- are interpolated */
3698 }
3699 /* check for embedded scalars. only stop if we're sure it's a
3700 * variable. */
3701 else if (*s == '$') {
3702 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3703 break;
3704 if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
3705 if (s[1] == '\\') {
3706 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3707 "Possible unintended interpolation of $\\ in regex");
3708 }
3709 break; /* in regexp, $ might be tail anchor */
3710 }
3711 }
3712
3713 /* End of else if chain - OP_TRANS rejoin rest */
3714
3715 if (UNLIKELY(s >= send)) {
3716 assert(s == send);
3717 break;
3718 }
3719
3720 /* backslashes */
3721 if (*s == '\\' && s+1 < send) {
3722 char* bslash = s; /* point to beginning \ */
3723 char* rbrace; /* point to ending '}' */
3724 char* e; /* 1 past the meat (non-blanks) before the
3725 brace */
3726 s++;
3727
3728 /* warn on \1 - \9 in substitution replacements, but note that \11
3729 * is an octal; and \19 is \1 followed by '9' */
3730 if (PL_lex_inwhat == OP_SUBST
3731 && !PL_lex_inpat
3732 && isDIGIT(*s)
3733 && *s != '0'
3734 && !isDIGIT(s[1]))
3735 {
3736 /* diag_listed_as: \%d better written as $%d */
3737 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3738 s = bslash;
3739 *s = '$';
3740 break;
3741 }
3742
3743 /* string-change backslash escapes */
3744 if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
3745 s = bslash;
3746 break;
3747 }
3748 /* In a pattern, process \N, but skip any other backslash escapes.
3749 * This is because we don't want to translate an escape sequence
3750 * into a meta symbol and have the regex compiler use the meta
3751 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3752 * in spite of this, we do have to process \N here while the proper
3753 * charnames handler is in scope. See bugs #56444 and #62056.
3754 *
3755 * There is a complication because \N in a pattern may also stand
3756 * for 'match a non-nl', and not mean a charname, in which case its
3757 * processing should be deferred to the regex compiler. To be a
3758 * charname it must be followed immediately by a '{', and not look
3759 * like \N followed by a curly quantifier, i.e., not something like
3760 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3761 * quantifier */
3762 else if (PL_lex_inpat
3763 && (*s != 'N'
3764 || s[1] != '{'
3765 || regcurly(s + 1, send, NULL)))
3766 {
3767 *d++ = '\\';
3768 goto default_action;
3769 }
3770
3771 switch (*s) {
3772 default:
3773 {
3774 if ((isALPHANUMERIC(*s)))
3775 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3776 "Unrecognized escape \\%c passed through",
3777 *s);
3778 /* default action is to copy the quoted character */
3779 goto default_action;
3780 }
3781
3782 /* eg. \132 indicates the octal constant 0132 */
3783 case '0': case '1': case '2': case '3':
3784 case '4': case '5': case '6': case '7':
3785 {
3786 I32 flags = PERL_SCAN_SILENT_ILLDIGIT
3787 | PERL_SCAN_NOTIFY_ILLDIGIT;
3788 STRLEN len = 3;
3789 uv = grok_oct(s, &len, &flags, NULL);
3790 s += len;
3791 if ( (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
3792 && s < send
3793 && isDIGIT(*s) /* like \08, \178 */
3794 && ckWARN(WARN_MISC))
3795 {
3796 Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
3797 form_alien_digit_msg(8, len, s, send, UTF, FALSE));
3798 }
3799 }
3800 goto NUM_ESCAPE_INSERT;
3801
3802 /* eg. \o{24} indicates the octal constant \024 */
3803 case 'o':
3804 {
3805 const char* error;
3806
3807 if (! grok_bslash_o(&s, send,
3808 &uv, &error,
3809 NULL,
3810 FALSE, /* Not strict */
3811 FALSE, /* No illegal cp's */
3812 UTF))
3813 {
3814 yyerror(error);
3815 uv = 0; /* drop through to ensure range ends are set */
3816 }
3817 goto NUM_ESCAPE_INSERT;
3818 }
3819
3820 /* eg. \x24 indicates the hex constant 0x24 */
3821 case 'x':
3822 {
3823 const char* error;
3824
3825 if (! grok_bslash_x(&s, send,
3826 &uv, &error,
3827 NULL,
3828 FALSE, /* Not strict */
3829 FALSE, /* No illegal cp's */
3830 UTF))
3831 {
3832 yyerror(error);
3833 uv = 0; /* drop through to ensure range ends are set */
3834 }
3835 }
3836
3837 NUM_ESCAPE_INSERT:
3838 /* Insert oct or hex escaped character. */
3839
3840 /* Here uv is the ordinal of the next character being added */
3841 if (UVCHR_IS_INVARIANT(uv)) {
3842 *d++ = (char) uv;
3843 }
3844 else {
3845 if (!d_is_utf8 && uv > 255) {
3846
3847 /* Here, 'uv' won't fit unless we convert to UTF-8.
3848 * If we've only seen invariants so far, all we have to
3849 * do is turn on the flag */
3850 if (utf8_variant_count == 0) {
3851 SvUTF8_on(sv);
3852 }
3853 else {
3854 SvCUR_set(sv, d - SvPVX_const(sv));
3855 SvPOK_on(sv);
3856 *d = '\0';
3857
3858 sv_utf8_upgrade_flags_grow(
3859 sv,
3860 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3861
3862 /* Since we're having to grow here,
3863 * make sure we have enough room for
3864 * this escape and a NUL, so the
3865 * code immediately below won't have
3866 * to actually grow again */
3867 UVCHR_SKIP(uv)
3868 + (STRLEN)(send - s) + 1);
3869 d = SvPVX(sv) + SvCUR(sv);
3870 }
3871
3872 has_above_latin1 = TRUE;
3873 d_is_utf8 = TRUE;
3874 }
3875
3876 if (! d_is_utf8) {
3877 *d++ = (char)uv;
3878 utf8_variant_count++;
3879 }
3880 else {
3881 /* Usually, there will already be enough room in 'sv'
3882 * since such escapes are likely longer than any UTF-8
3883 * sequence they can end up as. This isn't the case on
3884 * EBCDIC where \x{40000000} contains 12 bytes, and the
3885 * UTF-8 for it contains 14. And, we have to allow for
3886 * a trailing NUL. It probably can't happen on ASCII
3887 * platforms, but be safe. See Note on sizing above. */
3888 const STRLEN needed = d - SvPVX(sv)
3889 + UVCHR_SKIP(uv)
3890 + (send - s)
3891 + 1;
3892 if (UNLIKELY(needed > SvLEN(sv))) {
3893 SvCUR_set(sv, d - SvPVX_const(sv));
3894 d = SvCUR(sv) + SvGROW(sv, needed);
3895 }
3896
3897 d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
3898 (ckWARN(WARN_PORTABLE))
3899 ? UNICODE_WARN_PERL_EXTENDED
3900 : 0);
3901 }
3902 }
3903#ifdef EBCDIC
3904 non_portable_endpoint++;
3905#endif
3906 continue;
3907
3908 case 'N':
3909 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3910 * named character, like \N{LATIN SMALL LETTER A}, or a named
3911 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3912 * GRAVE} (except y/// can't handle the latter, croaking). For
3913 * convenience all three forms are referred to as "named
3914 * characters" below.
3915 *
3916 * For patterns, \N also can mean to match a non-newline. Code
3917 * before this 'switch' statement should already have handled
3918 * this situation, and hence this code only has to deal with
3919 * the named character cases.
3920 *
3921 * For non-patterns, the named characters are converted to
3922 * their string equivalents. In patterns, named characters are
3923 * not converted to their ultimate forms for the same reasons
3924 * that other escapes aren't (mainly that the ultimate
3925 * character could be considered a meta-symbol by the regex
3926 * compiler). Instead, they are converted to the \N{U+...}
3927 * form to get the value from the charnames that is in effect
3928 * right now, while preserving the fact that it was a named
3929 * character, so that the regex compiler knows this.
3930 *
3931 * The structure of this section of code (besides checking for
3932 * errors and upgrading to utf8) is:
3933 * If the named character is of the form \N{U+...}, pass it
3934 * through if a pattern; otherwise convert the code point
3935 * to utf8
3936 * Otherwise must be some \N{NAME}: convert to
3937 * \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3938 *
3939 * Transliteration is an exception. The conversion to utf8 is
3940 * only done if the code point requires it to be representable.
3941 *
3942 * Here, 's' points to the 'N'; the test below is guaranteed to
3943 * succeed if we are being called on a pattern, as we already
3944 * know from a test above that the next character is a '{'. A
3945 * non-pattern \N must mean 'named character', which requires
3946 * braces */
3947 s++;
3948 if (*s != '{') {
3949 yyerror("Missing braces on \\N{}");
3950 *d++ = '\0';
3951 continue;
3952 }
3953 s++;
3954
3955 /* If there is no matching '}', it is an error. */
3956 if (! (rbrace = (char *) memchr(s, '}', send - s))) {
3957 if (! PL_lex_inpat) {
3958 yyerror("Missing right brace on \\N{}");
3959 } else {
3960 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3961 }
3962 yyquit(); /* Have exhausted the input. */
3963 }
3964
3965 /* Here it looks like a named character */
3966 while (s < rbrace && isBLANK(*s)) {
3967 s++;
3968 }
3969
3970 e = rbrace;
3971 while (s < e && isBLANK(*(e - 1))) {
3972 e--;
3973 }
3974
3975 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3976 s += 2; /* Skip to next char after the 'U+' */
3977 if (PL_lex_inpat) {
3978
3979 /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3980 /* Check the syntax. */
3981 if (!isXDIGIT(*s)) {
3982 bad_NU:
3983 yyerror(
3984 "Invalid hexadecimal number in \\N{U+...}"
3985 );
3986 s = rbrace + 1;
3987 *d++ = '\0';
3988 continue;
3989 }
3990 while (++s < e) {
3991 if (isXDIGIT(*s))
3992 continue;
3993 else if ((*s == '.' || *s == '_')
3994 && isXDIGIT(s[1]))
3995 continue;
3996 goto bad_NU;
3997 }
3998
3999 /* Pass everything through unchanged.
4000 * +1 is to include the '}' */
4001 Copy(bslash, d, rbrace - bslash + 1, char);
4002 d += rbrace - bslash + 1;
4003 }
4004 else { /* Not a pattern: convert the hex to string */
4005 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
4006 | PERL_SCAN_SILENT_ILLDIGIT
4007 | PERL_SCAN_SILENT_OVERFLOW
4008 | PERL_SCAN_DISALLOW_PREFIX;
4009 STRLEN len = e - s;
4010
4011 uv = grok_hex(s, &len, &flags, NULL);
4012 if (len == 0 || (len != (STRLEN)(e - s)))
4013 goto bad_NU;
4014
4015 if ( uv > MAX_LEGAL_CP
4016 || (flags & PERL_SCAN_GREATER_THAN_UV_MAX))
4017 {
4018 yyerror(form_cp_too_large_msg(16, s, len, 0));
4019 uv = 0; /* drop through to ensure range ends are
4020 set */
4021 }
4022
4023 /* For non-tr///, if the destination is not in utf8,
4024 * unconditionally recode it to be so. This is
4025 * because \N{} implies Unicode semantics, and scalars
4026 * have to be in utf8 to guarantee those semantics.
4027 * tr/// doesn't care about Unicode rules, so no need
4028 * there to upgrade to UTF-8 for small enough code
4029 * points */
4030 if (! d_is_utf8 && ( uv > 0xFF
4031 || PL_lex_inwhat != OP_TRANS))
4032 {
4033 /* See Note on sizing above. */
4034 const STRLEN extra = OFFUNISKIP(uv) + (send - rbrace) + 1;
4035
4036 SvCUR_set(sv, d - SvPVX_const(sv));
4037 SvPOK_on(sv);
4038 *d = '\0';
4039
4040 if (utf8_variant_count == 0) {
4041 SvUTF8_on(sv);
4042 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4043 }
4044 else {
4045 sv_utf8_upgrade_flags_grow(
4046 sv,
4047 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4048 extra);
4049 d = SvPVX(sv) + SvCUR(sv);
4050 }
4051
4052 d_is_utf8 = TRUE;
4053 has_above_latin1 = TRUE;
4054 }
4055
4056 /* Add the (Unicode) code point to the output. */
4057 if (OFFUNI_IS_INVARIANT(uv)) {
4058 *d++ = (char) LATIN1_TO_NATIVE(uv);
4059 }
4060 else if (! d_is_utf8) {
4061 *d++ = (char) LATIN1_TO_NATIVE(uv);
4062 utf8_variant_count++;
4063 }
4064 else {
4065 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
4066 (ckWARN(WARN_PORTABLE))
4067 ? UNICODE_WARN_PERL_EXTENDED
4068 : 0);
4069 }
4070 }
4071 }
4072 else /* Here is \N{NAME} but not \N{U+...}. */
4073 if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
4074 { /* Failed. We should die eventually, but for now use a NUL
4075 to keep parsing */
4076 *d++ = '\0';
4077 }
4078 else { /* Successfully evaluated the name */
4079 STRLEN len;
4080 const char *str = SvPV_const(res, len);
4081 if (PL_lex_inpat) {
4082
4083 if (! len) { /* The name resolved to an empty string */
4084 const char empty_N[] = "\\N{_}";
4085 Copy(empty_N, d, sizeof(empty_N) - 1, char);
4086 d += sizeof(empty_N) - 1;
4087 }
4088 else {
4089 /* In order to not lose information for the regex
4090 * compiler, pass the result in the specially made
4091 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
4092 * the code points in hex of each character
4093 * returned by charnames */
4094
4095 const char *str_end = str + len;
4096 const STRLEN off = d - SvPVX_const(sv);
4097
4098 if (! SvUTF8(res)) {
4099 /* For the non-UTF-8 case, we can determine the
4100 * exact length needed without having to parse
4101 * through the string. Each character takes up
4102 * 2 hex digits plus either a trailing dot or
4103 * the "}" */
4104 const char initial_text[] = "\\N{U+";
4105 const STRLEN initial_len = sizeof(initial_text)
4106 - 1;
4107 d = off + SvGROW(sv, off
4108 + 3 * len
4109
4110 /* +1 for trailing NUL */
4111 + initial_len + 1
4112
4113 + (STRLEN)(send - rbrace));
4114 Copy(initial_text, d, initial_len, char);
4115 d += initial_len;
4116 while (str < str_end) {
4117 char hex_string[4];
4118 int len =
4119 my_snprintf(hex_string,
4120 sizeof(hex_string),
4121 "%02X.",
4122
4123 /* The regex compiler is
4124 * expecting Unicode, not
4125 * native */
4126 NATIVE_TO_LATIN1(*str));
4127 PERL_MY_SNPRINTF_POST_GUARD(len,
4128 sizeof(hex_string));
4129 Copy(hex_string, d, 3, char);
4130 d += 3;
4131 str++;
4132 }
4133 d--; /* Below, we will overwrite the final
4134 dot with a right brace */
4135 }
4136 else {
4137 STRLEN char_length; /* cur char's byte length */
4138
4139 /* and the number of bytes after this is
4140 * translated into hex digits */
4141 STRLEN output_length;
4142
4143 /* 2 hex per byte; 2 chars for '\N'; 2 chars
4144 * for max('U+', '.'); and 1 for NUL */
4145 char hex_string[2 * UTF8_MAXBYTES + 5];
4146
4147 /* Get the first character of the result. */
4148 U32 uv = utf8n_to_uvchr((U8 *) str,
4149 len,
4150 &char_length,
4151 UTF8_ALLOW_ANYUV);
4152 /* Convert first code point to Unicode hex,
4153 * including the boiler plate before it. */
4154 output_length =
4155 my_snprintf(hex_string, sizeof(hex_string),
4156 "\\N{U+%X",
4157 (unsigned int) NATIVE_TO_UNI(uv));
4158
4159 /* Make sure there is enough space to hold it */
4160 d = off + SvGROW(sv, off
4161 + output_length
4162 + (STRLEN)(send - rbrace)
4163 + 2); /* '}' + NUL */
4164 /* And output it */
4165 Copy(hex_string, d, output_length, char);
4166 d += output_length;
4167
4168 /* For each subsequent character, append dot and
4169 * its Unicode code point in hex */
4170 while ((str += char_length) < str_end) {
4171 const STRLEN off = d - SvPVX_const(sv);
4172 U32 uv = utf8n_to_uvchr((U8 *) str,
4173 str_end - str,
4174 &char_length,
4175 UTF8_ALLOW_ANYUV);
4176 output_length =
4177 my_snprintf(hex_string,
4178 sizeof(hex_string),
4179 ".%X",
4180 (unsigned int) NATIVE_TO_UNI(uv));
4181
4182 d = off + SvGROW(sv, off
4183 + output_length
4184 + (STRLEN)(send - rbrace)
4185 + 2); /* '}' + NUL */
4186 Copy(hex_string, d, output_length, char);
4187 d += output_length;
4188 }
4189 }
4190
4191 *d++ = '}'; /* Done. Add the trailing brace */
4192 }
4193 }
4194 else { /* Here, not in a pattern. Convert the name to a
4195 * string. */
4196
4197 if (PL_lex_inwhat == OP_TRANS) {
4198 str = SvPV_const(res, len);
4199 if (len > ((SvUTF8(res))
4200 ? UTF8SKIP(str)
4201 : 1U))
4202 {
4203 yyerror(Perl_form(aTHX_
4204 "%.*s must not be a named sequence"
4205 " in transliteration operator",
4206 /* +1 to include the "}" */
4207 (int) (rbrace + 1 - start), start));
4208 *d++ = '\0';
4209 goto end_backslash_N;
4210 }
4211
4212 if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
4213 has_above_latin1 = TRUE;
4214 }
4215
4216 }
4217 else if (! SvUTF8(res)) {
4218 /* Make sure \N{} return is UTF-8. This is because
4219 * \N{} implies Unicode semantics, and scalars have
4220 * to be in utf8 to guarantee those semantics; but
4221 * not needed in tr/// */
4222 sv_utf8_upgrade_flags(res, 0);
4223 str = SvPV_const(res, len);
4224 }
4225
4226 /* Upgrade destination to be utf8 if this new
4227 * component is */
4228 if (! d_is_utf8 && SvUTF8(res)) {
4229 /* See Note on sizing above. */
4230 const STRLEN extra = len + (send - s) + 1;
4231
4232 SvCUR_set(sv, d - SvPVX_const(sv));
4233 SvPOK_on(sv);
4234 *d = '\0';
4235
4236 if (utf8_variant_count == 0) {
4237 SvUTF8_on(sv);
4238 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4239 }
4240 else {
4241 sv_utf8_upgrade_flags_grow(sv,
4242 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4243 extra);
4244 d = SvPVX(sv) + SvCUR(sv);
4245 }
4246 d_is_utf8 = TRUE;
4247 } else if (len > (STRLEN)(e - s + 4)) { /* +4 is for \N{} */
4248
4249 /* See Note on sizing above. (NOTE: SvCUR() is not
4250 * set correctly here). */
4251 const STRLEN extra = len + (send - rbrace) + 1;
4252 const STRLEN off = d - SvPVX_const(sv);
4253 d = off + SvGROW(sv, off + extra);
4254 }
4255 Copy(str, d, len, char);
4256 d += len;
4257 }
4258
4259 SvREFCNT_dec(res);
4260
4261 } /* End \N{NAME} */
4262
4263 end_backslash_N:
4264#ifdef EBCDIC
4265 backslash_N++; /* \N{} is defined to be Unicode */
4266#endif
4267 s = rbrace + 1; /* Point to just after the '}' */
4268 continue;
4269
4270 /* \c is a control character */
4271 case 'c':
4272 s++;
4273 if (s < send) {
4274 const char * message;
4275
4276 if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
4277 yyerror(message);
4278 yyquit(); /* Have always immediately croaked on
4279 errors in this */
4280 }
4281 d++;
4282 }
4283 else {
4284 yyerror("Missing control char name in \\c");
4285 yyquit(); /* Are at end of input, no sense continuing */
4286 }
4287#ifdef EBCDIC
4288 non_portable_endpoint++;
4289#endif
4290 break;
4291
4292 /* printf-style backslashes, formfeeds, newlines, etc */
4293 case 'b':
4294 *d++ = '\b';
4295 break;
4296 case 'n':
4297 *d++ = '\n';
4298 break;
4299 case 'r':
4300 *d++ = '\r';
4301 break;
4302 case 'f':
4303 *d++ = '\f';
4304 break;
4305 case 't':
4306 *d++ = '\t';
4307 break;
4308 case 'e':
4309 *d++ = ESC_NATIVE;
4310 break;
4311 case 'a':
4312 *d++ = '\a';
4313 break;
4314 } /* end switch */
4315
4316 s++;
4317 continue;
4318 } /* end if (backslash) */
4319
4320 default_action:
4321 /* Just copy the input to the output, though we may have to convert
4322 * to/from UTF-8.
4323 *
4324 * If the input has the same representation in UTF-8 as not, it will be
4325 * a single byte, and we don't care about UTF8ness; just copy the byte */
4326 if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4327 *d++ = *s++;
4328 }
4329 else if (! s_is_utf8 && ! d_is_utf8) {
4330 /* If neither source nor output is UTF-8, is also a single byte,
4331 * just copy it; but this byte counts should we later have to
4332 * convert to UTF-8 */
4333 *d++ = *s++;
4334 utf8_variant_count++;
4335 }
4336 else if (s_is_utf8 && d_is_utf8) { /* Both UTF-8, can just copy */
4337 const STRLEN len = UTF8SKIP(s);
4338
4339 /* We expect the source to have already been checked for
4340 * malformedness */
4341 assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4342
4343 Copy(s, d, len, U8);
4344 d += len;
4345 s += len;
4346 }
4347 else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4348 STRLEN need = send - s + 1; /* See Note on sizing above. */
4349
4350 SvCUR_set(sv, d - SvPVX_const(sv));
4351 SvPOK_on(sv);
4352 *d = '\0';
4353
4354 if (utf8_variant_count == 0) {
4355 SvUTF8_on(sv);
4356 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4357 }
4358 else {
4359 sv_utf8_upgrade_flags_grow(sv,
4360 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4361 need);
4362 d = SvPVX(sv) + SvCUR(sv);
4363 }
4364 d_is_utf8 = TRUE;
4365 goto default_action; /* Redo, having upgraded so both are UTF-8 */
4366 }
4367 else { /* UTF8ness matters: convert this non-UTF8 source char to
4368 UTF-8 for output. It will occupy 2 bytes, but don't include
4369 the input byte since we haven't incremented 's' yet. See
4370 Note on sizing above. */
4371 const STRLEN off = d - SvPVX(sv);
4372 const STRLEN extra = 2 + (send - s - 1) + 1;
4373 if (off + extra > SvLEN(sv)) {
4374 d = off + SvGROW(sv, off + extra);
4375 }
4376 *d++ = UTF8_EIGHT_BIT_HI(*s);
4377 *d++ = UTF8_EIGHT_BIT_LO(*s);
4378 s++;
4379 }
4380 } /* while loop to process each character */
4381
4382 {
4383 const STRLEN off = d - SvPVX(sv);
4384
4385 /* See if room for the terminating NUL */
4386 if (UNLIKELY(off >= SvLEN(sv))) {
4387
4388#ifndef DEBUGGING
4389
4390 if (off > SvLEN(sv))
4391#endif
4392 Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
4393 " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
4394
4395 /* Whew! Here we don't have room for the terminating NUL, but
4396 * everything else so far has fit. It's not too late to grow
4397 * to fit the NUL and continue on. But it is a bug, as the code
4398 * above was supposed to have made room for this, so under
4399 * DEBUGGING builds, we panic anyway. */
4400 d = off + SvGROW(sv, off + 1);
4401 }
4402 }
4403
4404 /* terminate the string and set up the sv */
4405 *d = '\0';
4406 SvCUR_set(sv, d - SvPVX_const(sv));
4407
4408 SvPOK_on(sv);
4409 if (d_is_utf8) {
4410 SvUTF8_on(sv);
4411 }
4412
4413 /* shrink the sv if we allocated more than we used */
4414 if (SvCUR(sv) + 5 < SvLEN(sv)) {
4415 SvPV_shrink_to_cur(sv);
4416 }
4417
4418 /* return the substring (via pl_yylval) only if we parsed anything */
4419 if (s > start) {
4420 char *s2 = start;
4421 for (; s2 < s; s2++) {
4422 if (*s2 == '\n')
4423 COPLINE_INC_WITH_HERELINES;
4424 }
4425 SvREFCNT_inc_simple_void_NN(sv);
4426 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4427 && ! PL_parser->lex_re_reparsing)
4428 {
4429 const char *const key = PL_lex_inpat ? "qr" : "q";
4430 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4431 const char *type;
4432 STRLEN typelen;
4433
4434 if (PL_lex_inwhat == OP_TRANS) {
4435 type = "tr";
4436 typelen = 2;
4437 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4438 type = "s";
4439 typelen = 1;
4440 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4441 type = "q";
4442 typelen = 1;
4443 } else {
4444 type = "qq";
4445 typelen = 2;
4446 }
4447
4448 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4449 type, typelen, NULL);
4450 }
4451 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4452 }
4453 LEAVE_with_name("scan_const");
4454 return s;
4455}
4456
4457/* S_intuit_more
4458 * Returns TRUE if there's more to the expression (e.g., a subscript),
4459 * FALSE otherwise.
4460 *
4461 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4462 *
4463 * ->[ and ->{ return TRUE
4464 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4465 * { and [ outside a pattern are always subscripts, so return TRUE
4466 * if we're outside a pattern and it's not { or [, then return FALSE
4467 * if we're in a pattern and the first char is a {
4468 * {4,5} (any digits around the comma) returns FALSE
4469 * if we're in a pattern and the first char is a [
4470 * [] returns FALSE
4471 * [SOMETHING] has a funky heuristic to decide whether it's a
4472 * character class or not. It has to deal with things like
4473 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4474 * anything else returns TRUE
4475 */
4476
4477/* This is the one truly awful dwimmer necessary to conflate C and sed. */
4478
4479STATIC int
4480S_intuit_more(pTHX_ char *s, char *e)
4481{
4482 PERL_ARGS_ASSERT_INTUIT_MORE;
4483
4484 /* This function has been mostly untouched for a long time, due to its,
4485 * 'scariness', and lack of comments. khw has gone through and done some
4486 * cleanup, while finding various instances of problematic behavior.
4487 * Rather than change this base-level function immediately, khw has added
4488 * commentary to those areas. */
4489
4490 /* If recursed within brackets, there is more to the expression */
4491 if (PL_lex_brackets)
4492 return TRUE;
4493
4494 /* If begins with '->' ... */
4495 if (s[0] == '-' && s[1] == '>') {
4496
4497 /* '->[' and '->{' imply more to the expression */
4498 if (s[2] == '[' || s[2] == '{') {
4499 return TRUE;
4500 }
4501
4502 /* Any post deref construct implies more to the expression */
4503 if ( FEATURE_POSTDEREF_QQ_IS_ENABLED
4504 && ( (s[2] == '$' && ( s[3] == '*'
4505 || (s[3] == '#' && s[4] == '*')))
4506 || (s[2] == '@' && memCHRs("*[{", s[3])) ))
4507 {
4508 return TRUE;
4509 }
4510 }
4511
4512 if (s[0] != '{' && s[0] != '[')
4513 return FALSE;
4514
4515 /* quit immediately from any errors from now on */
4516 PL_parser->sub_no_recover = TRUE;
4517
4518 /* Here is '{' or '['. Outside patterns, they're always subscripts */
4519 if (!PL_lex_inpat)
4520 return TRUE;
4521
4522 /* In a pattern, so maybe we have {n,m}, in which case, there isn't more to
4523 * the expression.
4524 *
4525 * khw: This assumes that anything matching regcurly is a character class.
4526 * The syntax of regcurly has been loosened since this function was
4527 * written, and regcurly never required a comma, as in {0}. Probably it is
4528 * ok as-is */
4529 if (s[0] == '{') {
4530 if (regcurly(s, e, NULL)) {
4531 return FALSE;
4532 }
4533 return TRUE;
4534 }
4535
4536 /* Here is '[': maybe we have a character class. Examine the guts */
4537 s++;
4538
4539 /* '^' implies a character class; An empty '[]' isn't legal, but it does
4540 * mean there isn't more to come */
4541 if (s[0] == ']' || s[0] == '^')
4542 return FALSE;
4543
4544 /* Find matching ']'. khw: This means any s[1] below is guaranteed to
4545 * exist */
4546 const char * const send = (char *) memchr(s, ']', e - s);
4547 if (! send) /* has to be an expression */
4548 return TRUE;
4549
4550 /* If the construct consists entirely of one or two digits, call it a
4551 * subscript. */
4552 if (isDIGIT(s[0]) && send - s <= 2 && (send - s == 1 || (isDIGIT(s[1])))) {
4553 return TRUE;
4554 }
4555
4556 /* this is terrifying, and it mostly works. See GH #16478.
4557 *
4558 * khw: That ticket shows that the heuristics here get things wrong. That
4559 * most of the weights are divisible by 5 indicates that not a lot of
4560 * tuning was done, and that the values are fairly arbitrary. Especially
4561 * problematic are when all characters in the construct are numeric. We
4562 * have [89] always resolving to a subscript, though that could well be a
4563 * character class that is related to finding non-octals. And [100] is a
4564 * character class when it could well be a subscript. */
4565
4566 int weight;
4567
4568 if (s[0] == '$') { /* First char is dollar; lean very slightly to it
4569 being a subscript */
4570 weight = -1;
4571 }
4572 else { /* Otherwise, lean a little more towards it being a
4573 character class. */
4574 weight = 2;
4575 }
4576
4577 /* Unsigned version of current character */
4578 unsigned char un_char = 0;
4579
4580 /* Keep track of how many multiple occurrences of the same character there
4581 * are */
4582 char seen[256];
4583 Zero(seen, 256, char);
4584
4585 /* Examine each character in the construct */
4586 bool first_time = true;
4587 for (; s < send; s++, first_time = false) {
4588 unsigned char prev_un_char = un_char;
4589 un_char = (unsigned char) s[0];
4590 switch (s[0]) {
4591 case '@':
4592 case '&':
4593 case '$':
4594
4595 /* Each additional occurrence of one of these three strongly
4596 * indicates it is a subscript */
4597 weight -= seen[un_char] * 10;
4598
4599 /* Following one of these characters, we look to see if there is an
4600 * identifier already found in the program by that name. If so,
4601 * strongly suspect this isn't a character class */
4602 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4603 int len;
4604 char tmpbuf[sizeof PL_tokenbuf * 4];
4605 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4606 len = (int)strlen(tmpbuf);
4607 if ( len > 1
4608 && gv_fetchpvn_flags(tmpbuf,
4609 len,
4610 UTF ? SVf_UTF8 : 0,
4611 SVt_PV))
4612 weight -= 100;
4613 else /* Not a multi-char identifier already known in the
4614 program; is somewhat likely to be a subscript */
4615 weight -= 10;
4616 }
4617 else if ( s[0] == '$'
4618 && s[1]
4619 && memCHRs("[#!%*<>()-=", s[1]))
4620 {
4621 /* Here we have what could be a punctuation variable. If the
4622 * next character after it is a closing bracket, it makes it
4623 * quite likely to be that, and hence a subscript. If it is
4624 * something else, more mildly a subscript */
4625 if (/*{*/ memCHRs("])} =", s[2]))
4626 weight -= 10;
4627 else
4628 weight -= 1;
4629 }
4630 break;
4631
4632 case '\\':
4633 if (s[1]) {
4634 if (memCHRs("wds]", s[1]))
4635 weight += 100; /* \w \d \s => strongly charclass */
4636 /* khw: Why not \W \D \S \h \v, etc as well? */
4637 else if (seen[(U8)'\''] || seen[(U8)'"'])
4638 weight += 1; /* \' => mildly charclass */
4639 else if (memCHRs("abcfnrtvx", s[1]))
4640 weight += 40; /* \n, etc => charclass */
4641 /* khw: Why not \e etc as well? */
4642 else if (isDIGIT(s[1])) {
4643 weight += 40; /* \123 => charclass */
4644 while (s[1] && isDIGIT(s[1]))
4645 s++;
4646 }
4647 }
4648 else /* \ followed by NUL strongly indicates character class */
4649 weight += 100;
4650 break;
4651
4652 case '-':
4653 /* If it is something like '-\', it is more likely to be a
4654 * character class.
4655 *
4656 * khw: The rest of the conditionals in this 'case' really should
4657 * be subject to an 'else' of this condition */
4658 if (s[1] == '\\')
4659 weight += 50;
4660
4661 /* If it is something like 'a-' or '0-', it is more likely to
4662 * be a character class. '!' is the first ASCII graphic, so '!-'
4663 * would be the start of a range of graphics. */
4664 if (! first_time && memCHRs("aA01! ", prev_un_char))
4665 weight += 30;
4666
4667 /* If it is something like '-Z' or '-7' (for octal) or '-9' it
4668 * is more likely to be a character class. '~' is the final ASCII
4669 * graphic, so '-~' would be the end of a range of graphics.
4670 *
4671 * khw: Having [-z] really doesn't imply what the comments above
4672 * indicate, so this should only be tested when '! first_time' */
4673 if (memCHRs("zZ79~", s[1]))
4674 weight += 30;
4675
4676 /* If it is something like -1 or -$foo, it is more likely to be a
4677 * subscript. */
4678 if (first_time && (isDIGIT(s[1]) || s[1] == '$')) {
4679 weight -= 5; /* cope with negative subscript */
4680 }
4681 break;
4682
4683 default:
4684 if ( (first_time || ( ! isWORDCHAR(prev_un_char)
4685 && prev_un_char != '$'
4686 && prev_un_char != '@'
4687 && prev_un_char != '&'))
4688 && isALPHA(s[0])
4689 && isALPHA(s[1]))
4690 {
4691 /* Here it's \W (that isn't [$@&] ) followed immediately by two
4692 * alphas in a row. Accumulate all the consecutive alphas */
4693 char *d = s;
4694 while (isALPHA(s[0]))
4695 s++;
4696
4697 /* If those alphas spell a keyword, it's almost certainly not a
4698 * character class */
4699 if (keyword(d, s - d, 0))
4700 weight -= 150;
4701
4702 /* khw: Should those alphas be marked as seen? */
4703 }
4704
4705 /* Consecutive chars like [...12...] and [...ab...] are presumed
4706 * more likely to be character classes */
4707 if ( ! first_time
4708 && ( NATIVE_TO_LATIN1(un_char)
4709 == NATIVE_TO_LATIN1(prev_un_char) + 1))
4710 {
4711 weight += 5;
4712 }
4713
4714 /* But repeating a character inside a character class does nothing,
4715 * like [aba], so less likely that someone makes such a class, more
4716 * likely that it is a subscript; the more repeats, the less
4717 * likely. */
4718 weight -= seen[un_char];
4719 break;
4720 } /* End of switch */
4721
4722 /* khw: 'seen' is declared as a char. This ++ can cause it to wrap.
4723 * This gives different results with compilers for which a plain 'char'
4724 * is actually unsigned, versus those where it is signed. I believe it
4725 * is undefined behavior to wrap a 'signed'. I think it should be
4726 * instead declared an unsigned int to make the chances of wrapping
4727 * essentially zero.
4728 *
4729 * And I believe that extra backslashes are different from other
4730 * repeated characters. */
4731 seen[un_char]++;
4732 } /* End of loop through each character of the construct */
4733
4734 if (weight >= 0) /* probably a character class */
4735 return FALSE;
4736
4737 return TRUE;
4738}
4739
4740/*
4741 * S_intuit_method
4742 *
4743 * Does all the checking to disambiguate
4744 * foo bar
4745 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4746 * METHCALL (bar->foo(args)) or METHCALL0 (bar->foo args).
4747 *
4748 * First argument is the stuff after the first token, e.g. "bar".
4749 *
4750 * Not a method if foo is a filehandle.
4751 * Not a method if foo is a subroutine prototyped to take a filehandle.
4752 * Not a method if it's really "Foo $bar"
4753 * Method if it's "foo $bar"
4754 * Not a method if it's really "print foo $bar"
4755 * Method if it's really "foo package::" (interpreted as package->foo)
4756 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4757 * Not a method if bar is a filehandle or package, but is quoted with
4758 * =>
4759 */
4760
4761STATIC int
4762S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4763{
4764 char *s = start + (*start == '$');
4765 char tmpbuf[sizeof PL_tokenbuf];
4766 STRLEN len;
4767 GV* indirgv;
4768 /* Mustn't actually add anything to a symbol table.
4769 But also don't want to "initialise" any placeholder
4770 constants that might already be there into full
4771 blown PVGVs with attached PVCV. */
4772 GV * const gv =
4773 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4774
4775 PERL_ARGS_ASSERT_INTUIT_METHOD;
4776
4777 if (!FEATURE_INDIRECT_IS_ENABLED)
4778 return 0;
4779
4780 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4781 return 0;
4782 if (cv && SvPOK(cv)) {
4783 const char *proto = CvPROTO(cv);
4784 if (proto) {
4785 while (*proto && (isSPACE(*proto) || *proto == ';'))
4786 proto++;
4787 if (*proto == '*')
4788 return 0;
4789 }
4790 }
4791
4792 if (*start == '$') {
4793 SSize_t start_off = start - SvPVX(PL_linestr);
4794 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4795 || isUPPER(*PL_tokenbuf))
4796 return 0;
4797 /* this could be $# */
4798 if (isSPACE(*s))
4799 s = skipspace(s);
4800 PL_bufptr = SvPVX(PL_linestr) + start_off;
4801 PL_expect = XREF;
4802 return *s == '(' ? METHCALL : METHCALL0;
4803 }
4804
4805 s = scan_word6(s, tmpbuf, sizeof tmpbuf, TRUE, &len, FALSE);
4806 /* start is the beginning of the possible filehandle/object,
4807 * and s is the end of it
4808 * tmpbuf is a copy of it (but with single quotes as double colons)
4809 */
4810
4811 if (!keyword(tmpbuf, len, 0)) {
4812 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4813 len -= 2;
4814 tmpbuf[len] = '\0';
4815 goto bare_package;
4816 }
4817 indirgv = gv_fetchpvn_flags(tmpbuf, len,
4818 GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4819 SVt_PVCV);
4820 if (indirgv && SvTYPE(indirgv) != SVt_NULL
4821 && (!isGV(indirgv) || GvCVu(indirgv)))
4822 return 0;
4823 /* filehandle or package name makes it a method */
4824 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4825 s = skipspace(s);
4826 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4827 return 0; /* no assumptions -- "=>" quotes bareword */
4828 bare_package:
4829 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4830 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4831 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4832 PL_expect = XTERM;
4833 force_next(BAREWORD);
4834 PL_bufptr = s;
4835 return *s == '(' ? METHCALL : METHCALL0;
4836 }
4837 }
4838 return 0;
4839}
4840
4841/* Encoded script support. filter_add() effectively inserts a
4842 * 'pre-processing' function into the current source input stream.
4843 * Note that the filter function only applies to the current source file
4844 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4845 *
4846 * The datasv parameter (which may be NULL) can be used to pass
4847 * private data to this instance of the filter. The filter function
4848 * can recover the SV using the FILTER_DATA macro and use it to
4849 * store private buffers and state information.
4850 *
4851 * The supplied datasv parameter is upgraded to a PVIO type
4852 * and the IoDIRP/IoANY field is used to store the function pointer,
4853 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4854 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4855 * private use must be set using malloc'd pointers.
4856 */
4857
4858SV *
4859Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4860{
4861 if (!funcp)
4862 return NULL;
4863
4864 if (!PL_parser)
4865 return NULL;
4866
4867 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4868 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4869
4870 if (!PL_rsfp_filters)
4871 PL_rsfp_filters = newAV();
4872 if (!datasv)
4873 datasv = newSV(0);
4874 SvUPGRADE(datasv, SVt_PVIO);
4875 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4876 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4877 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4878 FPTR2DPTR(void *, IoANY(datasv)),
4879 SvPV_nolen(datasv)));
4880 av_unshift(PL_rsfp_filters, 1);
4881 av_store(PL_rsfp_filters, 0, datasv) ;
4882 if (
4883 !PL_parser->filtered
4884 && PL_parser->lex_flags & LEX_EVALBYTES
4885 && PL_bufptr < PL_bufend
4886 ) {
4887 const char *s = PL_bufptr;
4888 while (s < PL_bufend) {
4889 if (*s == '\n') {
4890 SV *linestr = PL_parser->linestr;
4891 char *buf = SvPVX(linestr);
4892 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4893 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4894 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4895 STRLEN const linestart_pos = PL_parser->linestart - buf;
4896 STRLEN const last_uni_pos =
4897 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4898 STRLEN const last_lop_pos =
4899 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4900 av_push(PL_rsfp_filters, linestr);
4901 PL_parser->linestr =
4902 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4903 buf = SvPVX(PL_parser->linestr);
4904 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4905 PL_parser->bufptr = buf + bufptr_pos;
4906 PL_parser->oldbufptr = buf + oldbufptr_pos;
4907 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4908 PL_parser->linestart = buf + linestart_pos;
4909 if (PL_parser->last_uni)
4910 PL_parser->last_uni = buf + last_uni_pos;
4911 if (PL_parser->last_lop)
4912 PL_parser->last_lop = buf + last_lop_pos;
4913 SvLEN_set(linestr, SvCUR(linestr));
4914 SvCUR_set(linestr, s - SvPVX(linestr));
4915 PL_parser->filtered = 1;
4916 break;
4917 }
4918 s++;
4919 }
4920 }
4921 return(datasv);
4922}
4923
4924/*
4925=for apidoc_section $filters
4926=for apidoc filter_del
4927
4928Delete most recently added instance of the filter function argument
4929
4930=cut
4931*/
4932
4933void
4934Perl_filter_del(pTHX_ filter_t funcp)
4935{
4936 SV *datasv;
4937
4938 PERL_ARGS_ASSERT_FILTER_DEL;
4939
4940#ifdef DEBUGGING
4941 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4942 FPTR2DPTR(void*, funcp)));
4943#endif
4944 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4945 return;
4946 /* if filter is on top of stack (usual case) just pop it off */
4947 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4948 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4949 SvREFCNT_dec(av_pop(PL_rsfp_filters));
4950
4951 return;
4952 }
4953 /* we need to search for the correct entry and clear it */
4954 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4955}
4956
4957
4958/* Invoke the idxth filter function for the current rsfp. */
4959/* maxlen 0 = read one text line */
4960I32
4961Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4962{
4963 filter_t funcp;
4964 I32 ret;
4965 SV *datasv = NULL;
4966 /* This API is bad. It should have been using unsigned int for maxlen.
4967 Not sure if we want to change the API, but if not we should sanity
4968 check the value here. */
4969 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4970
4971 PERL_ARGS_ASSERT_FILTER_READ;
4972
4973 if (!PL_parser || !PL_rsfp_filters)
4974 return -1;
4975 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4976 /* Provide a default input filter to make life easy. */
4977 /* Note that we append to the line. This is handy. */
4978 DEBUG_P(PerlIO_printf(Perl_debug_log,
4979 "filter_read %d: from rsfp\n", idx));
4980 if (correct_length) {
4981 /* Want a block */
4982 int len ;
4983 const int old_len = SvCUR(buf_sv);
4984
4985 /* ensure buf_sv is large enough */
4986 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4987 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4988 correct_length)) <= 0) {
4989 if (PerlIO_error(PL_rsfp))
4990 return -1; /* error */
4991 else
4992 return 0 ; /* end of file */
4993 }
4994 SvCUR_set(buf_sv, old_len + len) ;
4995 SvPVX(buf_sv)[old_len + len] = '\0';
4996 } else {
4997 /* Want a line */
4998 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4999 if (PerlIO_error(PL_rsfp))
5000 return -1; /* error */
5001 else
5002 return 0 ; /* end of file */
5003 }
5004 }
5005 return SvCUR(buf_sv);
5006 }
5007 /* Skip this filter slot if filter has been deleted */
5008 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
5009 DEBUG_P(PerlIO_printf(Perl_debug_log,
5010 "filter_read %d: skipped (filter deleted)\n",
5011 idx));
5012 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
5013 }
5014 if (SvTYPE(datasv) != SVt_PVIO) {
5015 if (correct_length) {
5016 /* Want a block */
5017 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
5018 if (!remainder) return 0; /* eof */
5019 if (correct_length > remainder) correct_length = remainder;
5020 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
5021 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
5022 } else {
5023 /* Want a line */
5024 const char *s = SvEND(datasv);
5025 const char *send = SvPVX(datasv) + SvLEN(datasv);
5026 while (s < send) {
5027 if (*s == '\n') {
5028 s++;
5029 break;
5030 }
5031 s++;
5032 }
5033 if (s == send) return 0; /* eof */
5034 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
5035 SvCUR_set(datasv, s-SvPVX(datasv));
5036 }
5037 return SvCUR(buf_sv);
5038 }
5039 /* Get function pointer hidden within datasv */
5040 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
5041 DEBUG_P(PerlIO_printf(Perl_debug_log,
5042 "filter_read %d: via function %p (%s)\n",
5043 idx, (void*)datasv, SvPV_nolen_const(datasv)));
5044 /* Call function. The function is expected to */
5045 /* call "FILTER_READ(idx+1, buf_sv)" first. */
5046 /* Return: <0:error, =0:eof, >0:not eof */
5047 ENTER;
5048 save_scalar(PL_errgv);
5049
5050 /* although this calls out to a random C function, there's a good
5051 * chance that that function will call back into perl (e.g. using
5052 * Filter::Util::Call). So downgrade the stack to
5053 * non-reference-counted for backwards compatibility - i.e. do the
5054 * equivalent of xs_wrap(), but this time we know there are no
5055 * args to be passed or returned on the stack, simplifying it.
5056 */
5057#ifdef PERL_RC_STACK
5058 assert(AvREAL(PL_curstack));
5059 I32 oldbase = PL_curstackinfo->si_stack_nonrc_base;
5060 I32 oldsp = PL_stack_sp - PL_stack_base;
5061 if (!oldbase)
5062 PL_curstackinfo->si_stack_nonrc_base = oldsp + 1;
5063#endif
5064
5065 ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
5066
5067#ifdef PERL_RC_STACK
5068 assert(oldsp == PL_stack_sp - PL_stack_base);
5069 assert(AvREAL(PL_curstack));
5070 assert(PL_curstackinfo->si_stack_nonrc_base ==
5071 oldbase ? oldbase : oldsp + 1);
5072 PL_curstackinfo->si_stack_nonrc_base = oldbase;
5073#endif
5074
5075 LEAVE;
5076 return ret;
5077}
5078
5079STATIC char *
5080S_filter_gets(pTHX_ SV *sv, STRLEN append)
5081{
5082 PERL_ARGS_ASSERT_FILTER_GETS;
5083
5084#ifdef PERL_CR_FILTER
5085 if (!PL_rsfp_filters) {
5086 filter_add(S_cr_textfilter,NULL);
5087 }
5088#endif
5089 if (PL_rsfp_filters) {
5090 if (!append)
5091 SvCUR_set(sv, 0); /* start with empty line */
5092 if (FILTER_READ(0, sv, 0) > 0)
5093 return ( SvPVX(sv) ) ;
5094 else
5095 return NULL ;
5096 }
5097 else
5098 return (sv_gets(sv, PL_rsfp, append));
5099}
5100
5101STATIC HV *
5102S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
5103{
5104 GV *gv;
5105
5106 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
5107
5108 if (memEQs(pkgname, len, "__PACKAGE__"))
5109 return PL_curstash;
5110
5111 if (len > 2
5112 && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
5113 && (gv = gv_fetchpvn_flags(pkgname,
5114 len,
5115 ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
5116 {
5117 return GvHV(gv); /* Foo:: */
5118 }
5119
5120 /* use constant CLASS => 'MyClass' */
5121 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
5122 if (gv && GvCV(gv)) {
5123 SV * const sv = cv_const_sv(GvCV(gv));
5124 if (sv)
5125 return gv_stashsv(sv, 0);
5126 }
5127
5128 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
5129}
5130
5131
5132STATIC char *
5133S_tokenize_use(pTHX_ int is_use, char *s) {
5134 PERL_ARGS_ASSERT_TOKENIZE_USE;
5135
5136 if (PL_expect != XSTATE)
5137 /* diag_listed_as: "use" not allowed in expression */
5138 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
5139 is_use ? "use" : "no"));
5140 PL_expect = XTERM;
5141 s = skipspace(s);
5142 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
5143 s = force_version(s, TRUE);
5144 if (*s == ';' || *s == '}'
5145 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
5146 NEXTVAL_NEXTTOKE.opval = NULL;
5147 force_next(BAREWORD);
5148 }
5149 else if (*s == 'v') {
5150 s = force_word(s,BAREWORD,FALSE,TRUE);
5151 s = force_version(s, FALSE);
5152 }
5153 }
5154 else {
5155 s = force_word(s,BAREWORD,FALSE,TRUE);
5156 s = force_version(s, FALSE);
5157 }
5158 pl_yylval.ival = is_use;
5159 return s;
5160}
5161#ifdef DEBUGGING
5162 static const char* const exp_name[] =
5163 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
5164 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
5165 "SIGVAR", "TERMORDORDOR"
5166 };
5167#endif
5168
5169#define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
5170STATIC bool
5171S_word_takes_any_delimiter(char *p, STRLEN len)
5172{
5173 return (len == 1 && memCHRs("msyq", p[0]))
5174 || (len == 2
5175 && ((p[0] == 't' && p[1] == 'r')
5176 || (p[0] == 'q' && memCHRs("qwxr", p[1]))));
5177}
5178
5179static void
5180S_check_scalar_slice(pTHX_ char *s)
5181{
5182 s++;
5183 while (SPACE_OR_TAB(*s)) s++;
5184 if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
5185 PL_bufend,
5186 UTF))
5187 {
5188 return;
5189 }
5190 while ( isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
5191 || (*s && memCHRs(" \t$#+-'\"", *s)))
5192 {
5193 s += UTF ? UTF8SKIP(s) : 1;
5194 }
5195 if (*s == '}' || *s == ']')
5196 pl_yylval.ival = OPpSLICEWARNING;
5197}
5198
5199#define lex_token_boundary() S_lex_token_boundary(aTHX)
5200static void
5201S_lex_token_boundary(pTHX)
5202{
5203 PL_oldoldbufptr = PL_oldbufptr;
5204 PL_oldbufptr = PL_bufptr;
5205}
5206
5207#define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
5208static char *
5209S_vcs_conflict_marker(pTHX_ char *s)
5210{
5211 lex_token_boundary();
5212 PL_bufptr = s;
5213 yyerror("Version control conflict marker");
5214 while (s < PL_bufend && *s != '\n')
5215 s++;
5216 return s;
5217}
5218
5219static int
5220yyl_sigvar(pTHX_ char *s)
5221{
5222 /* we expect the sigil and optional var name part of a
5223 * signature element here. Since a '$' is not necessarily
5224 * followed by a var name, handle it specially here; the general
5225 * yylex code would otherwise try to interpret whatever follows
5226 * as a var; e.g. ($, ...) would be seen as the var '$,'
5227 */
5228
5229 U8 sigil;
5230
5231 s = skipspace(s);
5232 sigil = *s++;
5233 PL_bufptr = s; /* for error reporting */
5234 switch (sigil) {
5235 case '$':
5236 case '@':
5237 case '%':
5238 /* spot stuff that looks like an prototype */
5239 if (memCHRs("$:@%&*;\\[]", *s)) {
5240 yyerror("Illegal character following sigil in a subroutine signature");
5241 break;
5242 }
5243 /* '$#' is banned, while '$ # comment' isn't */
5244 if (*s == '#') {
5245 yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
5246 break;
5247 }
5248 s = skipspace(s);
5249 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5250 char *dest = PL_tokenbuf + 1;
5251 /* read var name, including sigil, into PL_tokenbuf */
5252 PL_tokenbuf[0] = sigil;
5253 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
5254 0, cBOOL(UTF), FALSE, FALSE);
5255 *dest = '\0';
5256 assert(PL_tokenbuf[1]); /* we have a variable name */
5257 }
5258 else {
5259 *PL_tokenbuf = 0;
5260 PL_in_my = 0;
5261 }
5262
5263 s = skipspace(s);
5264 /* parse the = for the default ourselves to avoid '+=' etc being accepted here
5265 * as the ASSIGNOP, and exclude other tokens that start with =
5266 */
5267 if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) {
5268 /* save now to report with the same context as we did when
5269 * all ASSIGNOPS were accepted */
5270 PL_oldbufptr = s;
5271
5272 ++s;
5273 NEXTVAL_NEXTTOKE.ival = OP_SASSIGN;
5274 force_next(ASSIGNOP);
5275 PL_expect = XTERM;
5276 }
5277 else if(*s == '/' && s[1] == '/' && s[2] == '=') {
5278 PL_oldbufptr = s;
5279
5280 s += 3;
5281 NEXTVAL_NEXTTOKE.ival = OP_DORASSIGN;
5282 force_next(ASSIGNOP);
5283 PL_expect = XTERM;
5284 }
5285 else if(*s == '|' && s[1] == '|' && s[2] == '=') {
5286 PL_oldbufptr = s;
5287
5288 s += 3;
5289 NEXTVAL_NEXTTOKE.ival = OP_ORASSIGN;
5290 force_next(ASSIGNOP);
5291 PL_expect = XTERM;
5292 }
5293 else if (*s == ',' || *s == ')') {
5294 PL_expect = XOPERATOR;
5295 }
5296 else {
5297 /* make sure the context shows the unexpected character and
5298 * hopefully a bit more */
5299 if (*s) ++s;
5300 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5301 s++;
5302 PL_bufptr = s; /* for error reporting */
5303 yyerror("Illegal operator following parameter in a subroutine signature");
5304 PL_in_my = 0;
5305 }
5306 if (*PL_tokenbuf) {
5307 NEXTVAL_NEXTTOKE.ival = sigil;
5308 force_next('p'); /* force a signature pending identifier */
5309 }
5310 break;
5311
5312 case ')':
5313 PL_expect = XBLOCK;
5314 break;
5315 case ',': /* handle ($a,,$b) */
5316 break;
5317
5318 default:
5319 PL_in_my = 0;
5320 yyerror("A signature parameter must start with '$', '@' or '%'");
5321 /* very crude error recovery: skip to likely next signature
5322 * element */
5323 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5324 s++;
5325 break;
5326 }
5327
5328 switch (sigil) {
5329 case ',': TOKEN (PERLY_COMMA);
5330 case '$': TOKEN (PERLY_DOLLAR);
5331 case '@': TOKEN (PERLY_SNAIL);
5332 case '%': TOKEN (PERLY_PERCENT_SIGN);
5333 case ')': TOKEN (PERLY_PAREN_CLOSE);
5334 default: TOKEN (sigil);
5335 }
5336}
5337
5338static int
5339yyl_dollar(pTHX_ char *s)
5340{
5341 CLINE;
5342
5343 if (PL_expect == XPOSTDEREF) {
5344 if (s[1] == '#') {
5345 s++;
5346 POSTDEREF(DOLSHARP);
5347 }
5348 POSTDEREF(PERLY_DOLLAR);
5349 }
5350
5351 if ( s[1] == '#'
5352 && ( isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
5353 || memCHRs("{$:+-@", s[2])))
5354 {
5355 PL_tokenbuf[0] = '@';
5356 s = scan_ident(s + 1, PL_tokenbuf + 1,
5357 sizeof PL_tokenbuf - 1, FALSE);
5358 if (PL_expect == XOPERATOR) {
5359 char *d = s;
5360 if (PL_bufptr > s) {
5361 d = PL_bufptr-1;
5362 PL_bufptr = PL_oldbufptr;
5363 }
5364 no_op("Array length", d);
5365 }
5366 if (!PL_tokenbuf[1])
5367 PREREF(DOLSHARP);
5368 PL_expect = XOPERATOR;
5369 force_ident_maybe_lex('#');
5370 TOKEN(DOLSHARP);
5371 }
5372
5373 PL_tokenbuf[0] = '$';
5374 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5375 if (PL_expect == XOPERATOR) {
5376 char *d = s;
5377 if (PL_bufptr > s) {
5378 d = PL_bufptr-1;
5379 PL_bufptr = PL_oldbufptr;
5380 }
5381 no_op("Scalar", d);
5382 }
5383 if (!PL_tokenbuf[1]) {
5384 if (s == PL_bufend)
5385 yyerror("Final $ should be \\$ or $name");
5386 PREREF(PERLY_DOLLAR);
5387 }
5388
5389 {
5390 const char tmp = *s;
5391 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5392 s = skipspace(s);
5393
5394 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5395 && intuit_more(s, PL_bufend)) {
5396 if (*s == '[') {
5397 PL_tokenbuf[0] = '@';
5398 if (ckWARN(WARN_SYNTAX)) {
5399 char *t = s+1;
5400
5401 while ( t < PL_bufend ) {
5402 if (isSPACE(*t)) {
5403 do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
5404 /* consumed one or more space chars */
5405 } else if (*t == '$' || *t == '@') {
5406 /* could be more than one '$' like $$ref or @$ref */
5407 do { t++; } while (t < PL_bufend && *t == '$');
5408
5409 /* could be an abigail style identifier like $ foo */
5410 while (t < PL_bufend && *t == ' ') t++;
5411
5412 /* strip off the name of the var */
5413 while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5414 t += UTF ? UTF8SKIP(t) : 1;
5415 /* consumed a varname */
5416 } else if (isDIGIT(*t)) {
5417 /* deal with hex constants like 0x11 */
5418 if (t[0] == '0' && t[1] == 'x') {
5419 t += 2;
5420 while (t < PL_bufend && isXDIGIT(*t)) t++;
5421 } else {
5422 /* deal with decimal/octal constants like 1 and 0123 */
5423 do { t++; } while (isDIGIT(*t));
5424 if (t<PL_bufend && *t == '.') {
5425 do { t++; } while (isDIGIT(*t));
5426 }
5427 }
5428 /* consumed a number */
5429 } else {
5430 /* not a var nor a space nor a number */
5431 break;
5432 }
5433 }
5434 if (t < PL_bufend && *t++ == ',') {
5435 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
5436 while (t < PL_bufend && *t != ']')
5437 t++;
5438 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5439 "Multidimensional syntax %" UTF8f " not supported",
5440 UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
5441 }
5442 }
5443 }
5444 else if (*s == '{') {
5445 char *t;
5446 PL_tokenbuf[0] = '%';
5447 if ( strEQ(PL_tokenbuf+1, "SIG")
5448 && ckWARN(WARN_SYNTAX)
5449 && (t = (char *) memchr(s, '}', PL_bufend - s))
5450 && (t = (char *) memchr(t, '=', PL_bufend - t)))
5451 {
5452 char tmpbuf[sizeof PL_tokenbuf];
5453 do {
5454 t++;
5455 } while (isSPACE(*t));
5456 if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
5457 STRLEN len;
5458 t = scan_word6(t, tmpbuf, sizeof tmpbuf, TRUE,
5459 &len, TRUE);
5460 while (isSPACE(*t))
5461 t++;
5462 if ( *t == ';'
5463 && get_cvn_flags(tmpbuf, len, UTF
5464 ? SVf_UTF8
5465 : 0))
5466 {
5467 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5468 "You need to quote \"%" UTF8f "\"",
5469 UTF8fARG(UTF, len, tmpbuf));
5470 }
5471 }
5472 }
5473 }
5474 }
5475
5476 PL_expect = XOPERATOR;
5477 if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
5478 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5479 if (!islop || PL_last_lop_op == OP_GREPSTART)
5480 PL_expect = XOPERATOR;
5481 else if (memCHRs("$@\"'`q", *s))
5482 PL_expect = XTERM; /* e.g. print $fh "foo" */
5483 else if ( memCHRs("&*<%", *s)
5484 && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
5485 {
5486 PL_expect = XTERM; /* e.g. print $fh &sub */
5487 }
5488 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5489 char tmpbuf[sizeof PL_tokenbuf];
5490 int t2;
5491 STRLEN len;
5492 scan_word6(s, tmpbuf, sizeof tmpbuf, TRUE, &len, FALSE);
5493 if ((t2 = keyword(tmpbuf, len, 0))) {
5494 /* binary operators exclude handle interpretations */
5495 switch (t2) {
5496 case -KEY_x:
5497 case -KEY_eq:
5498 case -KEY_ne:
5499 case -KEY_gt:
5500 case -KEY_lt:
5501 case -KEY_ge:
5502 case -KEY_le:
5503 case -KEY_cmp:
5504 break;
5505 default:
5506 PL_expect = XTERM; /* e.g. print $fh length() */
5507 break;
5508 }
5509 }
5510 else {
5511 PL_expect = XTERM; /* e.g. print $fh subr() */
5512 }
5513 }
5514 else if (isDIGIT(*s))
5515 PL_expect = XTERM; /* e.g. print $fh 3 */
5516 else if (*s == '.' && isDIGIT(s[1]))
5517 PL_expect = XTERM; /* e.g. print $fh .3 */
5518 else if ((*s == '?' || *s == '-' || *s == '+')
5519 && !isSPACE(s[1]) && s[1] != '=')
5520 PL_expect = XTERM; /* e.g. print $fh -1 */
5521 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5522 && s[1] != '/')
5523 PL_expect = XTERM; /* e.g. print $fh /.../
5524 XXX except DORDOR operator
5525 */
5526 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5527 && s[2] != '=')
5528 PL_expect = XTERM; /* print $fh <<"EOF" */
5529 }
5530 }
5531 force_ident_maybe_lex('$');
5532 TOKEN(PERLY_DOLLAR);
5533}
5534
5535static int
5536yyl_sub(pTHX_ char *s, const int key)
5537{
5538 char * const tmpbuf = PL_tokenbuf + 1;
5539 bool have_name, have_proto;
5540 STRLEN len;
5541 SV *format_name = NULL;
5542 bool is_method = (key == KEY_method);
5543
5544 /* method always implies signatures */
5545 bool is_sigsub = is_method || FEATURE_SIGNATURES_IS_ENABLED;
5546
5547 SSize_t off = s-SvPVX(PL_linestr);
5548 char *d;
5549
5550 s = skipspace(s); /* can move PL_linestr */
5551
5552 d = SvPVX(PL_linestr)+off;
5553
5554 SAVEBOOL(PL_parser->sig_seen);
5555 PL_parser->sig_seen = FALSE;
5556
5557 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5558 || *s == '\''
5559 || (*s == ':' && s[1] == ':'))
5560 {
5561
5562 PL_expect = XATTRBLOCK;
5563 d = scan_word6(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
5564 &len, TRUE);
5565 if (key == KEY_format)
5566 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
5567 *PL_tokenbuf = '&';
5568 if (memchr(tmpbuf, ':', len) || key != KEY_sub
5569 || pad_findmy_pvn(
5570 PL_tokenbuf, len + 1, 0
5571 ) != NOT_IN_PAD)
5572 sv_setpvn(PL_subname, tmpbuf, len);
5573 else {
5574 sv_setsv(PL_subname,PL_curstname);
5575 sv_catpvs(PL_subname,"::");
5576 sv_catpvn(PL_subname,tmpbuf,len);
5577 }
5578 if (SvUTF8(PL_linestr))
5579 SvUTF8_on(PL_subname);
5580 have_name = TRUE;
5581
5582 s = skipspace(d);
5583 }
5584 else {
5585 if (key == KEY_my || key == KEY_our || key==KEY_state) {
5586 *d = '\0';
5587 /* diag_listed_as: Missing name in "%s sub" */
5588 Perl_croak(aTHX_
5589 "Missing name in \"%s\"", PL_bufptr);
5590 }
5591 PL_expect = XATTRTERM;
5592 sv_setpvs(PL_subname,"?");
5593 have_name = FALSE;
5594 }
5595
5596 if (key == KEY_format) {
5597 if (format_name) {
5598 NEXTVAL_NEXTTOKE.opval
5599 = newSVOP(OP_CONST,0, format_name);
5600 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
5601 force_next(BAREWORD);
5602 }
5603 PREBLOCK(KW_FORMAT);
5604 }
5605
5606 /* Look for a prototype */
5607 if (*s == '(' && !is_sigsub) {
5608 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5609 if (!s)
5610 Perl_croak(aTHX_ "Prototype not terminated");
5611 COPLINE_SET_FROM_MULTI_END;
5612 (void)validate_proto(PL_subname, PL_lex_stuff,
5613 ckWARN(WARN_ILLEGALPROTO), 0);
5614 have_proto = TRUE;
5615
5616 s = skipspace(s);
5617 }
5618 else
5619 have_proto = FALSE;
5620
5621 if ( !(*s == ':' && s[1] != ':')
5622 && (*s != '{' && *s != '(') && key != KEY_format)
5623 {
5624 assert(key == KEY_sub || key == KEY_method ||
5625 key == KEY_AUTOLOAD || key == KEY_DESTROY ||
5626 key == KEY_BEGIN || key == KEY_UNITCHECK || key == KEY_CHECK ||
5627 key == KEY_INIT || key == KEY_END ||
5628 key == KEY_my || key == KEY_state ||
5629 key == KEY_our);
5630 if (!have_name)
5631 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5632 else if (*s != ';' && *s != '}')
5633 Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
5634 }
5635
5636 if (have_proto) {
5637 NEXTVAL_NEXTTOKE.opval =
5638 newSVOP(OP_CONST, 0, PL_lex_stuff);
5639 PL_lex_stuff = NULL;
5640 force_next(THING);
5641 }
5642
5643 if (!have_name) {
5644 if (PL_curstash)
5645 sv_setpvs(PL_subname, "__ANON__");
5646 else
5647 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5648 if (is_method)
5649 TOKEN(KW_METHOD_anon);
5650 else if (is_sigsub)
5651 TOKEN(KW_SUB_anon_sig);
5652 else
5653 TOKEN(KW_SUB_anon);
5654 }
5655 force_ident_maybe_lex('&');
5656 if (is_method)
5657 TOKEN(KW_METHOD_named);
5658 else if (is_sigsub)
5659 TOKEN(KW_SUB_named_sig);
5660 else
5661 TOKEN(KW_SUB_named);
5662}
5663
5664static int
5665yyl_interpcasemod(pTHX_ char *s)
5666{
5667#ifdef DEBUGGING
5668 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5669 Perl_croak(aTHX_
5670 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
5671 PL_bufptr, PL_bufend, *PL_bufptr);
5672#endif
5673
5674 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
5675 /* if at a \E */
5676 if (PL_lex_casemods) {
5677 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
5678 PL_lex_casestack[PL_lex_casemods] = '\0';
5679
5680 if (PL_bufptr != PL_bufend
5681 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
5682 || oldmod == 'F')) {
5683 PL_bufptr += 2;
5684 PL_lex_state = LEX_INTERPCONCAT;
5685 }
5686 PL_lex_allbrackets--;
5687 return REPORT(PERLY_PAREN_CLOSE);
5688 }
5689 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
5690 /* Got an unpaired \E */
5691 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5692 "Useless use of \\E");
5693 }
5694 if (PL_bufptr != PL_bufend)
5695 PL_bufptr += 2;
5696 PL_lex_state = LEX_INTERPCONCAT;
5697 return yylex();
5698 }
5699 else {
5700 DEBUG_T({
5701 PerlIO_printf(Perl_debug_log, "### Saw case modifier\n");
5702 });
5703 s = PL_bufptr + 1;
5704 if (s[1] == '\\' && s[2] == 'E') {
5705 PL_bufptr = s + 3;
5706 PL_lex_state = LEX_INTERPCONCAT;
5707 return yylex();
5708 }
5709 else {
5710 I32 tmp;
5711 if ( memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
5712 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
5713 {
5714 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
5715 }
5716 if ((*s == 'L' || *s == 'U' || *s == 'F')
5717 && (strpbrk(PL_lex_casestack, "LUF")))
5718 {
5719 PL_lex_casestack[--PL_lex_casemods] = '\0';
5720 PL_lex_allbrackets--;
5721 return REPORT(PERLY_PAREN_CLOSE);
5722 }
5723 if (PL_lex_casemods > 10)
5724 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
5725 PL_lex_casestack[PL_lex_casemods++] = *s;
5726 PL_lex_casestack[PL_lex_casemods] = '\0';
5727 PL_lex_state = LEX_INTERPCONCAT;
5728 NEXTVAL_NEXTTOKE.ival = 0;
5729 force_next((2<<24)|PERLY_PAREN_OPEN);
5730 if (*s == 'l')
5731 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
5732 else if (*s == 'u')
5733 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
5734 else if (*s == 'L')
5735 NEXTVAL_NEXTTOKE.ival = OP_LC;
5736 else if (*s == 'U')
5737 NEXTVAL_NEXTTOKE.ival = OP_UC;
5738 else if (*s == 'Q')
5739 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
5740 else if (*s == 'F')
5741 NEXTVAL_NEXTTOKE.ival = OP_FC;
5742 else
5743 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5744 PL_bufptr = s + 1;
5745 }
5746 force_next(FUNC);
5747 if (PL_lex_starts) {
5748 s = PL_bufptr;
5749 PL_lex_starts = 0;
5750 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5751 if (PL_lex_casemods == 1 && PL_lex_inpat)
5752 TOKEN(PERLY_COMMA);
5753 else
5754 AopNOASSIGN(OP_CONCAT);
5755 }
5756 else
5757 return yylex();
5758 }
5759}
5760
5761static int
5762yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword,
5763 GV **pgv, GV ***pgvp)
5764{
5765 GV *ogv = NULL; /* override (winner) */
5766 GV *hgv = NULL; /* hidden (loser) */
5767 GV *gv = *pgv;
5768
5769 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5770 CV *cv;
5771 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5772 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5773 SVt_PVCV))
5774 && (cv = GvCVu(gv)))
5775 {
5776 if (GvIMPORTED_CV(gv))
5777 ogv = gv;
5778 else if (! CvNOWARN_AMBIGUOUS(cv))
5779 hgv = gv;
5780 }
5781 if (!ogv
5782 && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE))
5783 && (gv = **pgvp)
5784 && (isGV_with_GP(gv)
5785 ? GvCVu(gv) && GvIMPORTED_CV(gv)
5786 : SvPCS_IMPORTED(gv)
5787 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
5788 len, 0), 1)))
5789 {
5790 ogv = gv;
5791 }
5792 }
5793
5794 *pgv = gv;
5795
5796 if (ogv) {
5797 *orig_keyword = key;
5798 return 0; /* overridden by import or by GLOBAL */
5799 }
5800 else if (gv && !*pgvp
5801 && -key==KEY_lock /* XXX generalizable kludge */
5802 && GvCVu(gv))
5803 {
5804 return 0; /* any sub overrides "weak" keyword */
5805 }
5806 else { /* no override */
5807 key = -key;
5808 if (key == KEY_dump) {
5809 Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
5810 }
5811 *pgv = NULL;
5812 *pgvp = 0;
5813 if (hgv && key != KEY_x) /* never ambiguous */
5814 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5815 "Ambiguous call resolved as CORE::%s(), "
5816 "qualify as such or use &",
5817 GvENAME(hgv));
5818 return key;
5819 }
5820}
5821
5822static int
5823yyl_qw(pTHX_ char *s, STRLEN len)
5824{
5825 OP *words = NULL;
5826
5827 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5828 if (!s)
5829 missingterm(NULL, 0);
5830
5831 COPLINE_SET_FROM_MULTI_END;
5832 PL_expect = XOPERATOR;
5833 if (SvCUR(PL_lex_stuff)) {
5834 int warned_comma = !ckWARN(WARN_QW);
5835 int warned_comment = warned_comma;
5836 char *d = SvPV_force(PL_lex_stuff, len);
5837 while (len) {
5838 for (; isSPACE(*d) && len; --len, ++d)
5839 /**/;
5840 if (len) {
5841 SV *sv;
5842 const char *b = d;
5843 if (!warned_comma || !warned_comment) {
5844 for (; !isSPACE(*d) && len; --len, ++d) {
5845 if (!warned_comma && *d == ',') {
5846 Perl_warner(aTHX_ packWARN(WARN_QW),
5847 "Possible attempt to separate words with commas");
5848 ++warned_comma;
5849 }
5850 else if (!warned_comment && *d == '#') {
5851 Perl_warner(aTHX_ packWARN(WARN_QW),
5852 "Possible attempt to put comments in qw() list");
5853 ++warned_comment;
5854 }
5855 }
5856 }
5857 else {
5858 for (; !isSPACE(*d) && len; --len, ++d)
5859 /**/;
5860 }
5861 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
5862 words = op_append_elem(OP_LIST, words,
5863 newSVOP(OP_CONST, 0, tokeq(sv)));
5864 }
5865 }
5866 }
5867 if (!words)
5868 words = newNULLLIST();
5869 SvREFCNT_dec_NN(PL_lex_stuff);
5870 PL_lex_stuff = NULL;
5871 PL_expect = XOPERATOR;
5872 pl_yylval.opval = sawparens(words);
5873 TOKEN(QWLIST);
5874}
5875
5876static int
5877yyl_hyphen(pTHX_ char *s)
5878{
5879 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5880 I32 ftst = 0;
5881 char tmp;
5882
5883 s++;
5884 PL_bufptr = s;
5885 tmp = *s++;
5886
5887 while (s < PL_bufend && SPACE_OR_TAB(*s))
5888 s++;
5889
5890 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5891 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5892 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5893 OPERATOR(PERLY_MINUS); /* unary minus */
5894 }
5895 switch (tmp) {
5896 case 'r': ftst = OP_FTEREAD; break;
5897 case 'w': ftst = OP_FTEWRITE; break;
5898 case 'x': ftst = OP_FTEEXEC; break;
5899 case 'o': ftst = OP_FTEOWNED; break;
5900 case 'R': ftst = OP_FTRREAD; break;
5901 case 'W': ftst = OP_FTRWRITE; break;
5902 case 'X': ftst = OP_FTREXEC; break;
5903 case 'O': ftst = OP_FTROWNED; break;
5904 case 'e': ftst = OP_FTIS; break;
5905 case 'z': ftst = OP_FTZERO; break;
5906 case 's': ftst = OP_FTSIZE; break;
5907 case 'f': ftst = OP_FTFILE; break;
5908 case 'd': ftst = OP_FTDIR; break;
5909 case 'l': ftst = OP_FTLINK; break;
5910 case 'p': ftst = OP_FTPIPE; break;
5911 case 'S': ftst = OP_FTSOCK; break;
5912 case 'u': ftst = OP_FTSUID; break;
5913 case 'g': ftst = OP_FTSGID; break;
5914 case 'k': ftst = OP_FTSVTX; break;
5915 case 'b': ftst = OP_FTBLK; break;
5916 case 'c': ftst = OP_FTCHR; break;
5917 case 't': ftst = OP_FTTTY; break;
5918 case 'T': ftst = OP_FTTEXT; break;
5919 case 'B': ftst = OP_FTBINARY; break;
5920 case 'M': case 'A': case 'C':
5921 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5922 switch (tmp) {
5923 case 'M': ftst = OP_FTMTIME; break;
5924 case 'A': ftst = OP_FTATIME; break;
5925 case 'C': ftst = OP_FTCTIME; break;
5926 default: break;
5927 }
5928 break;
5929 default:
5930 break;
5931 }
5932 if (ftst) {
5933 PL_last_uni = PL_oldbufptr;
5934 PL_last_lop_op = (OPCODE)ftst;
5935 DEBUG_T( {
5936 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp);
5937 } );
5938 FTST(ftst);
5939 }
5940 else {
5941 /* Assume it was a minus followed by a one-letter named
5942 * subroutine call (or a -bareword), then. */
5943 DEBUG_T( {
5944 PerlIO_printf(Perl_debug_log,
5945 "### '-%c' looked like a file test but was not\n",
5946 (int) tmp);
5947 } );
5948 s = --PL_bufptr;
5949 }
5950 }
5951 {
5952 const char tmp = *s++;
5953 if (*s == tmp) {
5954 s++;
5955 if (PL_expect == XOPERATOR)
5956 TERM(POSTDEC);
5957 else
5958 OPERATOR(PREDEC);
5959 }
5960 else if (*s == '>') {
5961 s++;
5962 s = skipspace(s);
5963 if (((*s == '$' || *s == '&') && s[1] == '*')
5964 ||(*s == '$' && s[1] == '#' && s[2] == '*')
5965 ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1]))
5966 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5967 )
5968 {
5969 PL_expect = XPOSTDEREF;
5970 TOKEN(ARROW);
5971 }
5972 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5973 s = force_word(s,METHCALL0,FALSE,TRUE);
5974 TOKEN(ARROW);
5975 }
5976 else if (*s == '$')
5977 OPERATOR(ARROW);
5978 else
5979 TERM(ARROW);
5980 }
5981 if (PL_expect == XOPERATOR) {
5982 if (*s == '='
5983 && !PL_lex_allbrackets
5984 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5985 {
5986 s--;
5987 TOKEN(0);
5988 }
5989 Aop(OP_SUBTRACT);
5990 }
5991 else {
5992 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5993 check_uni();
5994 OPERATOR(PERLY_MINUS); /* unary minus */
5995 }
5996 }
5997}
5998
5999static int
6000yyl_plus(pTHX_ char *s)
6001{
6002 const char tmp = *s++;
6003 if (*s == tmp) {
6004 s++;
6005 if (PL_expect == XOPERATOR)
6006 TERM(POSTINC);
6007 else
6008 OPERATOR(PREINC);
6009 }
6010 if (PL_expect == XOPERATOR) {
6011 if (*s == '='
6012 && !PL_lex_allbrackets
6013 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6014 {
6015 s--;
6016 TOKEN(0);
6017 }
6018 Aop(OP_ADD);
6019 }
6020 else {
6021 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
6022 check_uni();
6023 OPERATOR(PERLY_PLUS);
6024 }
6025}
6026
6027static int
6028yyl_star(pTHX_ char *s)
6029{
6030 if (PL_expect == XPOSTDEREF)
6031 POSTDEREF(PERLY_STAR);
6032
6033 if (PL_expect != XOPERATOR) {
6034 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6035 PL_expect = XOPERATOR;
6036 force_ident(PL_tokenbuf, PERLY_STAR);
6037 if (!*PL_tokenbuf)
6038 PREREF(PERLY_STAR);
6039 TERM(PERLY_STAR);
6040 }
6041
6042 s++;
6043 if (*s == '*') {
6044 s++;
6045 if (*s == '=' && !PL_lex_allbrackets
6046 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6047 {
6048 s -= 2;
6049 TOKEN(0);
6050 }
6051 PWop(OP_POW);
6052 }
6053
6054 if (*s == '='
6055 && !PL_lex_allbrackets
6056 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6057 {
6058 s--;
6059 TOKEN(0);
6060 }
6061
6062 Mop(OP_MULTIPLY);
6063}
6064
6065static int
6066yyl_percent(pTHX_ char *s)
6067{
6068 if (PL_expect == XOPERATOR) {
6069 if (s[1] == '='
6070 && !PL_lex_allbrackets
6071 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6072 {
6073 TOKEN(0);
6074 }
6075 ++s;
6076 Mop(OP_MODULO);
6077 }
6078 else if (PL_expect == XPOSTDEREF)
6079 POSTDEREF(PERLY_PERCENT_SIGN);
6080
6081 PL_tokenbuf[0] = '%';
6082 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6083 pl_yylval.ival = 0;
6084 if (!PL_tokenbuf[1]) {
6085 PREREF(PERLY_PERCENT_SIGN);
6086 }
6087 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6088 && intuit_more(s, PL_bufend)) {
6089 if (*s == '[')
6090 PL_tokenbuf[0] = '@';
6091 }
6092 PL_expect = XOPERATOR;
6093 force_ident_maybe_lex('%');
6094 TERM(PERLY_PERCENT_SIGN);
6095}
6096
6097static int
6098yyl_caret(pTHX_ char *s)
6099{
6100 char *d = s;
6101 const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED);
6102 if (bof && s[1] == '.')
6103 s++;
6104 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6105 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
6106 {
6107 s = d;
6108 TOKEN(0);
6109 }
6110 s++;
6111 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
6112}
6113
6114static int
6115yyl_colon(pTHX_ char *s)
6116{
6117 OP *attrs;
6118
6119 switch (PL_expect) {
6120 case XOPERATOR:
6121 if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
6122 break;
6123 PL_bufptr = s; /* update in case we back off */
6124 if (*s == '=') {
6125 Perl_croak(aTHX_
6126 "Use of := for an empty attribute list is not allowed");
6127 }
6128 goto grabattrs;
6129 case XATTRBLOCK:
6130 PL_expect = XBLOCK;
6131 goto grabattrs;
6132 case XATTRTERM:
6133 PL_expect = XTERMBLOCK;
6134 grabattrs:
6135 /* NB: as well as parsing normal attributes, we also end up
6136 * here if there is something looking like attributes
6137 * following a signature (which is illegal, but used to be
6138 * legal in 5.20..5.26). If the latter, we still parse the
6139 * attributes so that error messages(s) are less confusing,
6140 * but ignore them (parser->sig_seen).
6141 */
6142 s = skipspace(s);
6143 attrs = NULL;
6144 while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6145 I32 tmp;
6146 SV *sv;
6147 STRLEN len;
6148 char *d = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
6149 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
6150 if (tmp < 0) tmp = -tmp;
6151 switch (tmp) {
6152 case KEY_or:
6153 case KEY_and:
6154 case KEY_for:
6155 case KEY_foreach:
6156 case KEY_unless:
6157 case KEY_if:
6158 case KEY_while:
6159 case KEY_until:
6160 goto got_attrs;
6161 default:
6162 break;
6163 }
6164 }
6165 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
6166 if (*d == '(') {
6167 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
6168 if (!d) {
6169 op_free(attrs);
6170 ASSUME(sv && SvREFCNT(sv) == 1);
6171 SvREFCNT_dec(sv);
6172 Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
6173 }
6174 COPLINE_SET_FROM_MULTI_END;
6175 }
6176 if (PL_lex_stuff) {
6177 sv_catsv(sv, PL_lex_stuff);
6178 attrs = op_append_elem(OP_LIST, attrs,
6179 newSVOP(OP_CONST, 0, sv));
6180 SvREFCNT_dec_NN(PL_lex_stuff);
6181 PL_lex_stuff = NULL;
6182 }
6183 else {
6184 attrs = op_append_elem(OP_LIST, attrs,
6185 newSVOP(OP_CONST, 0, sv));
6186 }
6187 s = skipspace(d);
6188 if (*s == ':' && s[1] != ':')
6189 s = skipspace(s+1);
6190 else if (s == d)
6191 break; /* require real whitespace or :'s */
6192 /* XXX losing whitespace on sequential attributes here */
6193 }
6194
6195 if (*s != ';'
6196 && *s != '}'
6197 && !(PL_expect == XOPERATOR
6198 /* if an operator is expected, permit =, //= and ||= or ) to end */
6199 ? (*s == '=' || *s == ')' || *s == '/' || *s == '|')
6200 : (*s == '{' || *s == '(')))
6201 {
6202 const char q = ((*s == '\'') ? '"' : '\'');
6203 /* If here for an expression, and parsed no attrs, back off. */
6204 if (PL_expect == XOPERATOR && !attrs) {
6205 s = PL_bufptr;
6206 break;
6207 }
6208 /* MUST advance bufptr here to avoid bogus "at end of line"
6209 context messages from yyerror().
6210 */
6211 PL_bufptr = s;
6212 yyerror( (const char *)
6213 (*s
6214 ? Perl_form(aTHX_ "Invalid separator character "
6215 "%c%c%c in attribute list", q, *s, q)
6216 : "Unterminated attribute list" ) );
6217 op_free(attrs);
6218 OPERATOR(PERLY_COLON);
6219 }
6220
6221 got_attrs:
6222 if (PL_parser->sig_seen) {
6223 /* see comment about about sig_seen and parser error
6224 * handling */
6225 op_free(attrs);
6226 Perl_croak(aTHX_ "Subroutine attributes must come "
6227 "before the signature");
6228 }
6229 if (attrs) {
6230 NEXTVAL_NEXTTOKE.opval = attrs;
6231 force_next(THING);
6232 }
6233 TOKEN(COLONATTR);
6234 }
6235
6236 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6237 s--;
6238 TOKEN(0);
6239 }
6240
6241 PL_lex_allbrackets--;
6242 OPERATOR(PERLY_COLON);
6243}
6244
6245static int
6246yyl_subproto(pTHX_ char *s, CV *cv)
6247{
6248 STRLEN protolen = CvPROTOLEN(cv);
6249 const char *proto = CvPROTO(cv);
6250 bool optional;
6251
6252 proto = S_strip_spaces(aTHX_ proto, &protolen);
6253 if (!protolen)
6254 TERM(FUNC0SUB);
6255 if ((optional = *proto == ';')) {
6256 do {
6257 proto++;
6258 } while (*proto == ';');
6259 }
6260
6261 if (
6262 (
6263 (
6264 *proto == '$' || *proto == '_'
6265 || *proto == '*' || *proto == '+'
6266 )
6267 && proto[1] == '\0'
6268 )
6269 || (
6270 *proto == '\\' && proto[1] && proto[2] == '\0'
6271 )
6272 ) {
6273 UNIPROTO(UNIOPSUB,optional);
6274 }
6275
6276 if (*proto == '\\' && proto[1] == '[') {
6277 const char *p = proto + 2;
6278 while(*p && *p != ']')
6279 ++p;
6280 if(*p == ']' && !p[1])
6281 UNIPROTO(UNIOPSUB,optional);
6282 }
6283
6284 if (*proto == '&' && *s == '{') {
6285 if (PL_curstash)
6286 sv_setpvs(PL_subname, "__ANON__");
6287 else
6288 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6289 if (!PL_lex_allbrackets
6290 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6291 {
6292 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6293 }
6294 PREBLOCK(LSTOPSUB);
6295 }
6296
6297 return KEY_NULL;
6298}
6299
6300static int
6301yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
6302{
6303 char *d;
6304 if (PL_lex_brackets > 100) {
6305 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6306 }
6307
6308 switch (PL_expect) {
6309 case XTERM:
6310 case XTERMORDORDOR:
6311 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6312 PL_lex_allbrackets++;
6313 OPERATOR(HASHBRACK);
6314 case XOPERATOR:
6315 while (s < PL_bufend && SPACE_OR_TAB(*s))
6316 s++;
6317 d = s;
6318 PL_tokenbuf[0] = '\0';
6319 if (d < PL_bufend && *d == '-') {
6320 PL_tokenbuf[0] = '-';
6321 d++;
6322 while (d < PL_bufend && SPACE_OR_TAB(*d))
6323 d++;
6324 }
6325 if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6326 STRLEN len;
6327 d = scan_word6(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6328 FALSE, &len, FALSE);
6329 while (d < PL_bufend && SPACE_OR_TAB(*d))
6330 d++;
6331 if (*d == '}') {
6332 const char minus = (PL_tokenbuf[0] == '-');
6333 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6334 if (minus)
6335 force_next(PERLY_MINUS);
6336 }
6337 }
6338 /* FALLTHROUGH */
6339 case XATTRTERM:
6340 case XTERMBLOCK:
6341 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6342 PL_lex_allbrackets++;
6343 PL_expect = XSTATE;
6344 break;
6345 case XATTRBLOCK:
6346 case XBLOCK:
6347 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6348 PL_lex_allbrackets++;
6349 PL_expect = XSTATE;
6350 break;
6351 case XBLOCKTERM:
6352 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6353 PL_lex_allbrackets++;
6354 PL_expect = XSTATE;
6355 break;
6356 default: {
6357 const char *t;
6358 if (PL_oldoldbufptr == PL_last_lop)
6359 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6360 else
6361 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6362 PL_lex_allbrackets++;
6363 s = skipspace(s);
6364 if (*s == '}') {
6365 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6366 PL_expect = XTERM;
6367 /* This hack is to get the ${} in the message. */
6368 PL_bufptr = s+1;
6369 yyerror("syntax error");
6370 yyquit();
6371 break;
6372 }
6373 OPERATOR(HASHBRACK);
6374 }
6375 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6376 /* ${...} or @{...} etc., but not print {...}
6377 * Skip the disambiguation and treat this as a block.
6378 */
6379 goto block_expectation;
6380 }
6381 /* This hack serves to disambiguate a pair of curlies
6382 * as being a block or an anon hash. Normally, expectation
6383 * determines that, but in cases where we're not in a
6384 * position to expect anything in particular (like inside
6385 * eval"") we have to resolve the ambiguity. This code
6386 * covers the case where the first term in the curlies is a
6387 * quoted string. Most other cases need to be explicitly
6388 * disambiguated by prepending a "+" before the opening
6389 * curly in order to force resolution as an anon hash.
6390 *
6391 * XXX should probably propagate the outer expectation
6392 * into eval"" to rely less on this hack, but that could
6393 * potentially break current behavior of eval"".
6394 * GSAR 97-07-21
6395 */
6396 t = s;
6397 if (*s == '\'' || *s == '"' || *s == '`') {
6398 /* common case: get past first string, handling escapes */
6399 for (t++; t < PL_bufend && *t != *s;)
6400 if (*t++ == '\\')
6401 t++;
6402 t++;
6403 }
6404 else if (*s == 'q') {
6405 if (++t < PL_bufend
6406 && (!isWORDCHAR(*t)
6407 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6408 && !isWORDCHAR(*t))))
6409 {
6410 /* skip q//-like construct */
6411 const char *tmps;
6412 char open, close, term;
6413 I32 brackets = 1;
6414
6415 while (t < PL_bufend && isSPACE(*t))
6416 t++;
6417 /* check for q => */
6418 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6419 OPERATOR(HASHBRACK);
6420 }
6421 term = *t;
6422 open = term;
6423 if (term && (tmps = memCHRs("([{< )]}> )]}>",term)))
6424 term = tmps[5];
6425 close = term;
6426 if (open == close)
6427 for (t++; t < PL_bufend; t++) {
6428 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6429 t++;
6430 else if (*t == open)
6431 break;
6432 }
6433 else {
6434 for (t++; t < PL_bufend; t++) {
6435 if (*t == '\\' && t+1 < PL_bufend)
6436 t++;
6437 else if (*t == close && --brackets <= 0)
6438 break;
6439 else if (*t == open)
6440 brackets++;
6441 }
6442 }
6443 t++;
6444 }
6445 else
6446 /* skip plain q word */
6447 while ( t < PL_bufend
6448 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6449 {
6450 t += UTF ? UTF8SKIP(t) : 1;
6451 }
6452 }
6453 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6454 t += UTF ? UTF8SKIP(t) : 1;
6455 while ( t < PL_bufend
6456 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6457 {
6458 t += UTF ? UTF8SKIP(t) : 1;
6459 }
6460 }
6461 while (t < PL_bufend && isSPACE(*t))
6462 t++;
6463 /* if comma follows first term, call it an anon hash */
6464 /* XXX it could be a comma expression with loop modifiers */
6465 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6466 || (*t == '=' && t[1] == '>')))
6467 OPERATOR(HASHBRACK);
6468 if (PL_expect == XREF) {
6469 block_expectation:
6470 /* If there is an opening brace or 'sub:', treat it
6471 as a term to make ${{...}}{k} and &{sub:attr...}
6472 dwim. Otherwise, treat it as a statement, so
6473 map {no strict; ...} works.
6474 */
6475 s = skipspace(s);
6476 if (*s == '{') {
6477 PL_expect = XTERM;
6478 break;
6479 }
6480 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6481 PL_bufptr = s;
6482 d = s + 3;
6483 d = skipspace(d);
6484 s = PL_bufptr;
6485 if (*d == ':') {
6486 PL_expect = XTERM;
6487 break;
6488 }
6489 }
6490 PL_expect = XSTATE;
6491 }
6492 else {
6493 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6494 PL_expect = XSTATE;
6495 }
6496 }
6497 break;
6498 }
6499
6500 pl_yylval.ival = CopLINE(PL_curcop);
6501 PL_copline = NOLINE; /* invalidate current command line number */
6502 TOKEN(formbrack ? PERLY_EQUAL_SIGN : PERLY_BRACE_OPEN);
6503}
6504
6505static int
6506yyl_rightcurly(pTHX_ char *s, const U8 formbrack)
6507{
6508 assert(s != PL_bufend);
6509 s++;
6510
6511 if (PL_lex_brackets <= 0)
6512 /* diag_listed_as: Unmatched right %s bracket */
6513 yyerror("Unmatched right curly bracket");
6514 else
6515 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6516
6517 PL_lex_allbrackets--;
6518
6519 if (PL_lex_state == LEX_INTERPNORMAL) {
6520 if (PL_lex_brackets == 0) {
6521 if (PL_expect & XFAKEBRACK) {
6522 PL_expect &= XENUMMASK;
6523 PL_lex_state = LEX_INTERPEND;
6524 PL_bufptr = s;
6525 return yylex(); /* ignore fake brackets */
6526 }
6527 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6528 && SvEVALED(PL_lex_repl))
6529 PL_lex_state = LEX_INTERPEND;
6530 else if (*s == '-' && s[1] == '>')
6531 PL_lex_state = LEX_INTERPENDMAYBE;
6532 else if (*s != '[' && *s != '{')
6533 PL_lex_state = LEX_INTERPEND;
6534 }
6535 }
6536
6537 if (PL_expect & XFAKEBRACK) {
6538 PL_expect &= XENUMMASK;
6539 PL_bufptr = s;
6540 return yylex(); /* ignore fake brackets */
6541 }
6542
6543 force_next(formbrack ? PERLY_DOT : PERLY_BRACE_CLOSE);
6544 if (formbrack) LEAVE_with_name("lex_format");
6545 if (formbrack == 2) { /* means . where arguments were expected */
6546 force_next(PERLY_SEMICOLON);
6547 TOKEN(FORMRBRACK);
6548 }
6549
6550 TOKEN(PERLY_SEMICOLON);
6551}
6552
6553static int
6554yyl_ampersand(pTHX_ char *s)
6555{
6556 if (PL_expect == XPOSTDEREF)
6557 POSTDEREF(PERLY_AMPERSAND);
6558
6559 s++;
6560 if (*s++ == '&') {
6561 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6562 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6563 s -= 2;
6564 TOKEN(0);
6565 }
6566 AOPERATOR(ANDAND);
6567 }
6568 s--;
6569
6570 if (PL_expect == XOPERATOR) {
6571 char *d;
6572 bool bof;
6573 if ( PL_bufptr == PL_linestart
6574 && ckWARN(WARN_SEMICOLON)
6575 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6576 {
6577 CopLINE_dec(PL_curcop);
6578 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6579 CopLINE_inc(PL_curcop);
6580 }
6581 d = s;
6582 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6583 s++;
6584 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6585 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6586 s = d;
6587 s--;
6588 TOKEN(0);
6589 }
6590 if (d == s)
6591 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6592 else
6593 BAop(OP_SBIT_AND);
6594 }
6595
6596 PL_tokenbuf[0] = '&';
6597 s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6598 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6599
6600 if (PL_tokenbuf[1])
6601 force_ident_maybe_lex('&');
6602 else
6603 PREREF(PERLY_AMPERSAND);
6604
6605 TERM(PERLY_AMPERSAND);
6606}
6607
6608static int
6609yyl_verticalbar(pTHX_ char *s)
6610{
6611 char *d;
6612 bool bof;
6613
6614 s++;
6615 if (*s++ == '|') {
6616 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6617 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6618 s -= 2;
6619 TOKEN(0);
6620 }
6621 AOPERATOR(OROR);
6622 }
6623
6624 s--;
6625 d = s;
6626 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6627 s++;
6628
6629 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6630 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6631 s = d - 1;
6632 TOKEN(0);
6633 }
6634
6635 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6636}
6637
6638static int
6639yyl_bang(pTHX_ char *s)
6640{
6641 const char tmp = *s++;
6642 if (tmp == '=') {
6643 /* was this !=~ where !~ was meant?
6644 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6645
6646 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6647 const char *t = s+1;
6648
6649 while (t < PL_bufend && isSPACE(*t))
6650 ++t;
6651
6652 if (*t == '/' || *t == '?'
6653 || ((*t == 'm' || *t == 's' || *t == 'y')
6654 && !isWORDCHAR(t[1]))
6655 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6656 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6657 "!=~ should be !~");
6658 }
6659
6660 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6661 s -= 2;
6662 TOKEN(0);
6663 }
6664
6665 ChEop(OP_NE);
6666 }
6667
6668 if (tmp == '~')
6669 PMop(OP_NOT);
6670
6671 s--;
6672 OPERATOR(PERLY_EXCLAMATION_MARK);
6673}
6674
6675static int
6676yyl_snail(pTHX_ char *s)
6677{
6678 if (PL_expect == XPOSTDEREF)
6679 POSTDEREF(PERLY_SNAIL);
6680 PL_tokenbuf[0] = '@';
6681 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6682 if (PL_expect == XOPERATOR) {
6683 char *d = s;
6684 if (PL_bufptr > s) {
6685 d = PL_bufptr-1;
6686 PL_bufptr = PL_oldbufptr;
6687 }
6688 no_op("Array", d);
6689 }
6690 pl_yylval.ival = 0;
6691 if (!PL_tokenbuf[1]) {
6692 PREREF(PERLY_SNAIL);
6693 }
6694 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6695 s = skipspace(s);
6696 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6697 && intuit_more(s, PL_bufend))
6698 {
6699 if (*s == '{')
6700 PL_tokenbuf[0] = '%';
6701
6702 /* Warn about @ where they meant $. */
6703 if (*s == '[' || *s == '{') {
6704 if (ckWARN(WARN_SYNTAX)) {
6705 S_check_scalar_slice(aTHX_ s);
6706 }
6707 }
6708 }
6709 PL_expect = XOPERATOR;
6710 force_ident_maybe_lex('@');
6711 TERM(PERLY_SNAIL);
6712}
6713
6714static int
6715yyl_slash(pTHX_ char *s)
6716{
6717 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6718 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6719 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6720 TOKEN(0);
6721 s += 2;
6722 AOPERATOR(DORDOR);
6723 }
6724 else if (PL_expect == XOPERATOR) {
6725 s++;
6726 if (*s == '=' && !PL_lex_allbrackets
6727 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6728 {
6729 s--;
6730 TOKEN(0);
6731 }
6732 Mop(OP_DIVIDE);
6733 }
6734 else {
6735 /* Disable warning on "study /blah/" */
6736 if ( PL_oldoldbufptr == PL_last_uni
6737 && ( *PL_last_uni != 's' || s - PL_last_uni < 5
6738 || memNE(PL_last_uni, "study", 5)
6739 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6740 ))
6741 check_uni();
6742 s = scan_pat(s,OP_MATCH);
6743 TERM(sublex_start());
6744 }
6745}
6746
6747static int
6748yyl_leftsquare(pTHX_ char *s)
6749{
6750 if (PL_lex_brackets > 100)
6751 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6752 PL_lex_brackstack[PL_lex_brackets++] = 0;
6753 PL_lex_allbrackets++;
6754 s++;
6755 OPERATOR(PERLY_BRACKET_OPEN);
6756}
6757
6758static int
6759yyl_rightsquare(pTHX_ char *s)
6760{
6761 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6762 TOKEN(0);
6763 s++;
6764 if (PL_lex_brackets <= 0)
6765 /* diag_listed_as: Unmatched right %s bracket */
6766 yyerror("Unmatched right square bracket");
6767 else
6768 --PL_lex_brackets;
6769 PL_lex_allbrackets--;
6770 if (PL_lex_state == LEX_INTERPNORMAL) {
6771 if (PL_lex_brackets == 0) {
6772 if (*s == '-' && s[1] == '>')
6773 PL_lex_state = LEX_INTERPENDMAYBE;
6774 else if (*s != '[' && *s != '{')
6775 PL_lex_state = LEX_INTERPEND;
6776 }
6777 }
6778 TERM(PERLY_BRACKET_CLOSE);
6779}
6780
6781static int
6782yyl_tilde(pTHX_ char *s)
6783{
6784 bool bof;
6785 if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) {
6786 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6787 TOKEN(0);
6788 s += 2;
6789 Perl_ck_warner_d(aTHX_
6790 packWARN(WARN_DEPRECATED__SMARTMATCH),
6791 "Smartmatch is deprecated");
6792 NCEop(OP_SMARTMATCH);
6793 }
6794 s++;
6795 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
6796 s++;
6797 BCop(OP_SCOMPLEMENT);
6798 }
6799 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
6800}
6801
6802static int
6803yyl_leftparen(pTHX_ char *s)
6804{
6805 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6806 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
6807 else
6808 PL_expect = XTERM;
6809 s = skipspace(s);
6810 PL_lex_allbrackets++;
6811 TOKEN(PERLY_PAREN_OPEN);
6812}
6813
6814static int
6815yyl_rightparen(pTHX_ char *s)
6816{
6817 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6818 TOKEN(0);
6819 s++;
6820 PL_lex_allbrackets--;
6821 s = skipspace(s);
6822 if (*s == '{')
6823 PREBLOCK(PERLY_PAREN_CLOSE);
6824 TERM(PERLY_PAREN_CLOSE);
6825}
6826
6827static int
6828yyl_leftpointy(pTHX_ char *s)
6829{
6830 char tmp;
6831
6832 if (PL_expect != XOPERATOR) {
6833 if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6834 check_uni();
6835 if (s[1] == '<' && s[2] != '>')
6836 s = scan_heredoc(s);
6837 else
6838 s = scan_inputsymbol(s);
6839 PL_expect = XOPERATOR;
6840 TOKEN(sublex_start());
6841 }
6842
6843 s++;
6844
6845 tmp = *s++;
6846 if (tmp == '<') {
6847 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6848 s -= 2;
6849 TOKEN(0);
6850 }
6851 SHop(OP_LEFT_SHIFT);
6852 }
6853 if (tmp == '=') {
6854 tmp = *s++;
6855 if (tmp == '>') {
6856 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6857 s -= 3;
6858 TOKEN(0);
6859 }
6860 NCEop(OP_NCMP);
6861 }
6862 s--;
6863 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6864 s -= 2;
6865 TOKEN(0);
6866 }
6867 ChRop(OP_LE);
6868 }
6869
6870 s--;
6871 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6872 s--;
6873 TOKEN(0);
6874 }
6875
6876 ChRop(OP_LT);
6877}
6878
6879static int
6880yyl_rightpointy(pTHX_ char *s)
6881{
6882 const char tmp = *s++;
6883
6884 if (tmp == '>') {
6885 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6886 s -= 2;
6887 TOKEN(0);
6888 }
6889 SHop(OP_RIGHT_SHIFT);
6890 }
6891 else if (tmp == '=') {
6892 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6893 s -= 2;
6894 TOKEN(0);
6895 }
6896 ChRop(OP_GE);
6897 }
6898
6899 s--;
6900 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6901 s--;
6902 TOKEN(0);
6903 }
6904
6905 ChRop(OP_GT);
6906}
6907
6908static int
6909yyl_sglquote(pTHX_ char *s)
6910{
6911 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6912 if (!s)
6913 missingterm(NULL, 0);
6914 COPLINE_SET_FROM_MULTI_END;
6915 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6916 if (PL_expect == XOPERATOR) {
6917 no_op("String",s);
6918 }
6919 pl_yylval.ival = OP_CONST;
6920 TERM(sublex_start());
6921}
6922
6923static int
6924yyl_dblquote(pTHX_ char *s)
6925{
6926 char *d;
6927 STRLEN len;
6928 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6929 DEBUG_T( {
6930 if (s)
6931 printbuf("### Saw string before %s\n", s);
6932 else
6933 PerlIO_printf(Perl_debug_log,
6934 "### Saw unterminated string\n");
6935 } );
6936 if (PL_expect == XOPERATOR) {
6937 no_op("String",s);
6938 }
6939 if (!s)
6940 missingterm(NULL, 0);
6941 pl_yylval.ival = OP_CONST;
6942 /* FIXME. I think that this can be const if char *d is replaced by
6943 more localised variables. */
6944 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6945 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6946 pl_yylval.ival = OP_STRINGIFY;
6947 break;
6948 }
6949 }
6950 if (pl_yylval.ival == OP_CONST)
6951 COPLINE_SET_FROM_MULTI_END;
6952 TERM(sublex_start());
6953}
6954
6955static int
6956yyl_backtick(pTHX_ char *s)
6957{
6958 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6959 DEBUG_T( {
6960 if (s)
6961 printbuf("### Saw backtick string before %s\n", s);
6962 else
6963 PerlIO_printf(Perl_debug_log,
6964 "### Saw unterminated backtick string\n");
6965 } );
6966 if (PL_expect == XOPERATOR)
6967 no_op("Backticks",s);
6968 if (!s)
6969 missingterm(NULL, 0);
6970 pl_yylval.ival = OP_BACKTICK;
6971 TERM(sublex_start());
6972}
6973
6974static int
6975yyl_backslash(pTHX_ char *s)
6976{
6977 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s))
6978 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6979 *s, *s);
6980 if (PL_expect == XOPERATOR)
6981 no_op("Backslash",s);
6982 OPERATOR(REFGEN);
6983}
6984
6985static void
6986yyl_data_handle(pTHX)
6987{
6988 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6989 ? PL_curstash
6990 : PL_defstash;
6991 GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6992
6993 if (!isGV(gv))
6994 gv_init(gv,stash,"DATA",4,0);
6995
6996 GvMULTI_on(gv);
6997 if (!GvIO(gv))
6998 GvIOp(gv) = newIO();
6999 IoIFP(GvIOp(gv)) = PL_rsfp;
7000
7001 /* Mark this internal pseudo-handle as clean */
7002 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7003 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7004 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7005 else
7006 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7007
7008#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7009 /* if the script was opened in binmode, we need to revert
7010 * it to text mode for compatibility; but only iff it has CRs
7011 * XXX this is a questionable hack at best. */
7012 if (PL_bufend-PL_bufptr > 2
7013 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7014 {
7015 Off_t loc = 0;
7016 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7017 loc = PerlIO_tell(PL_rsfp);
7018 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7019 }
7020 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7021 if (loc > 0)
7022 PerlIO_seek(PL_rsfp, loc, 0);
7023 }
7024 }
7025#endif
7026
7027#ifdef PERLIO_LAYERS
7028 if (!IN_BYTES) {
7029 if (UTF)
7030 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7031 }
7032#endif
7033
7034 PL_rsfp = NULL;
7035}
7036
7037PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*)
7038 __attribute__noreturn__;
7039
7040PERL_STATIC_NO_RET void
7041yyl_croak_unrecognised(pTHX_ char *s)
7042{
7043 SV *dsv = newSVpvs_flags("", SVs_TEMP);
7044 const char *c;
7045 char *d;
7046 STRLEN len;
7047
7048 if (UTF) {
7049 STRLEN skiplen = UTF8SKIP(s);
7050 STRLEN stravail = PL_bufend - s;
7051 c = sv_uni_display(dsv, newSVpvn_flags(s,
7052 skiplen > stravail ? stravail : skiplen,
7053 SVs_TEMP | SVf_UTF8),
7054 10, UNI_DISPLAY_ISPRINT);
7055 }
7056 else {
7057 c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
7058 }
7059
7060 if (s >= PL_linestart) {
7061 d = PL_linestart;
7062 }
7063 else {
7064 /* somehow (probably due to a parse failure), PL_linestart has advanced
7065 * pass PL_bufptr, get a reasonable beginning of line
7066 */
7067 d = s;
7068 while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
7069 --d;
7070 }
7071 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
7072 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
7073 d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
7074 }
7075
7076 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
7077 UTF8fARG(UTF, (s - d), d),
7078 (int) len + 1);
7079}
7080
7081static int
7082yyl_require(pTHX_ char *s, I32 orig_keyword)
7083{
7084 s = skipspace(s);
7085 if (isDIGIT(*s)) {
7086 s = force_version(s, FALSE);
7087 }
7088 else if (*s != 'v' || !isDIGIT(s[1])
7089 || (s = force_version(s, TRUE), *s == 'v'))
7090 {
7091 *PL_tokenbuf = '\0';
7092 s = force_word(s,BAREWORD,TRUE,TRUE);
7093 if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
7094 PL_tokenbuf + sizeof(PL_tokenbuf),
7095 UTF))
7096 {
7097 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
7098 GV_ADD | (UTF ? SVf_UTF8 : 0));
7099 }
7100 else if (*s == '<')
7101 yyerror("<> at require-statement should be quotes");
7102 }
7103
7104 if (orig_keyword == KEY_require)
7105 pl_yylval.ival = 1;
7106 else
7107 pl_yylval.ival = 0;
7108
7109 PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
7110 PL_bufptr = s;
7111 PL_last_uni = PL_oldbufptr;
7112 PL_last_lop_op = OP_REQUIRE;
7113 s = skipspace(s);
7114 return REPORT( (int)KW_REQUIRE );
7115}
7116
7117static int
7118yyl_foreach(pTHX_ char *s)
7119{
7120 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7121 return REPORT(0);
7122 pl_yylval.ival = CopLINE(PL_curcop);
7123 s = skipspace(s);
7124 if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
7125 char *p = s;
7126 SSize_t s_off = s - SvPVX(PL_linestr);
7127 bool paren_is_valid = FALSE;
7128 bool maybe_package = FALSE;
7129 bool saw_core = FALSE;
7130 bool core_valid = FALSE;
7131
7132 if (UNLIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "CORE::"))) {
7133 saw_core = TRUE;
7134 p += 6;
7135 }
7136 if (LIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "my"))) {
7137 core_valid = TRUE;
7138 paren_is_valid = TRUE;
7139 if (isSPACE(p[2])) {
7140 p = skipspace(p + 3);
7141 maybe_package = TRUE;
7142 }
7143 else {
7144 p += 2;
7145 }
7146 }
7147 else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")) {
7148 core_valid = TRUE;
7149 if (isSPACE(p[3])) {
7150 p = skipspace(p + 4);
7151 maybe_package = TRUE;
7152 }
7153 else {
7154 p += 3;
7155 }
7156 }
7157 else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "state")) {
7158 core_valid = TRUE;
7159 if (isSPACE(p[5])) {
7160 p = skipspace(p + 6);
7161 }
7162 else {
7163 p += 5;
7164 }
7165 }
7166 if (saw_core && !core_valid) {
7167 Perl_croak(aTHX_ "Missing $ on loop variable");
7168 }
7169
7170 if (maybe_package && !saw_core) {
7171 /* skip optional package name, as in "for my abc $x (..)" */
7172 if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) {
7173 STRLEN len;
7174 p = scan_word6(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len, TRUE);
7175 p = skipspace(p);
7176 paren_is_valid = FALSE;
7177 }
7178 }
7179
7180 if (UNLIKELY(paren_is_valid && *p == '(')) {
7181 Perl_ck_warner_d(aTHX_
7182 packWARN(WARN_EXPERIMENTAL__FOR_LIST),
7183 "for my (...) is experimental");
7184 }
7185 else if (UNLIKELY(*p != '$' && *p != '\\')) {
7186 /* "for myfoo (" will end up here, but with p pointing at the 'f' */
7187 Perl_croak(aTHX_ "Missing $ on loop variable");
7188 }
7189 /* The buffer may have been reallocated, update s */
7190 s = SvPVX(PL_linestr) + s_off;
7191 }
7192 OPERATOR(KW_FOR);
7193}
7194
7195static int
7196yyl_do(pTHX_ char *s, I32 orig_keyword)
7197{
7198 s = skipspace(s);
7199 if (*s == '{')
7200 PRETERMBLOCK(KW_DO);
7201 if (*s != '\'') {
7202 char *d;
7203 STRLEN len;
7204 *PL_tokenbuf = '&';
7205 d = scan_word6(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7206 1, &len, TRUE);
7207 if (len && memNEs(PL_tokenbuf+1, len, "CORE")
7208 && !keyword(PL_tokenbuf + 1, len, 0)) {
7209 SSize_t off = s-SvPVX(PL_linestr);
7210 d = skipspace(d);
7211 s = SvPVX(PL_linestr)+off;
7212 if (*d == '(') {
7213 force_ident_maybe_lex('&');
7214 s = d;
7215 }
7216 }
7217 }
7218 if (orig_keyword == KEY_do)
7219 pl_yylval.ival = 1;
7220 else
7221 pl_yylval.ival = 0;
7222 OPERATOR(KW_DO);
7223}
7224
7225static int
7226yyl_my(pTHX_ char *s, I32 my)
7227{
7228 if (PL_in_my) {
7229 PL_bufptr = s;
7230 yyerror(Perl_form(aTHX_
7231 "Can't redeclare \"%s\" in \"%s\"",
7232 my == KEY_my ? "my" :
7233 my == KEY_state ? "state" : "our",
7234 PL_in_my == KEY_my ? "my" :
7235 PL_in_my == KEY_state ? "state" : "our"));
7236 }
7237 PL_in_my = (U16)my;
7238 s = skipspace(s);
7239 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
7240 STRLEN len;
7241 s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len, TRUE);
7242 if (memEQs(PL_tokenbuf, len, "sub"))
7243 return yyl_sub(aTHX_ s, my);
7244 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7245 if (!PL_in_my_stash) {
7246 char tmpbuf[1024];
7247 int i;
7248 PL_bufptr = s;
7249 i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7250 PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf));
7251 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7252 }
7253 }
7254 else if (*s == '\\') {
7255 if (!FEATURE_MYREF_IS_ENABLED)
7256 Perl_croak(aTHX_ "The experimental declared_refs "
7257 "feature is not enabled");
7258 Perl_ck_warner_d(aTHX_
7259 packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
7260 "Declaring references is experimental");
7261 }
7262 OPERATOR(KW_MY);
7263}
7264
7265static int yyl_try(pTHX_ char*);
7266
7267static bool
7268yyl_eol_needs_semicolon(pTHX_ char **ps)
7269{
7270 char *s = *ps;
7271 if (PL_lex_state != LEX_NORMAL
7272 || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
7273 {
7274 const bool in_comment = *s == '#';
7275 char *d;
7276 if (*s == '#' && s == PL_linestart && PL_in_eval
7277 && !PL_rsfp && !PL_parser->filtered) {
7278 /* handle eval qq[#line 1 "foo"\n ...] */
7279 CopLINE_dec(PL_curcop);
7280 incline(s, PL_bufend);
7281 }
7282 d = s;
7283 while (d < PL_bufend && *d != '\n')
7284 d++;
7285 if (d < PL_bufend)
7286 d++;
7287 s = d;
7288 if (in_comment && d == PL_bufend
7289 && PL_lex_state == LEX_INTERPNORMAL
7290 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
7291 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
7292 else
7293 incline(s, PL_bufend);
7294 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7295 PL_lex_state = LEX_FORMLINE;
7296 force_next(FORMRBRACK);
7297 *ps = s;
7298 return TRUE;
7299 }
7300 }
7301 else {
7302 while (s < PL_bufend && *s != '\n')
7303 s++;
7304 if (s < PL_bufend) {
7305 s++;
7306 if (s < PL_bufend)
7307 incline(s, PL_bufend);
7308 }
7309 }
7310 *ps = s;
7311 return FALSE;
7312}
7313
7314static int
7315yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s)
7316{
7317 char *d;
7318
7319 goto start;
7320
7321 do {
7322 fake_eof = 0;
7323 bof = cBOOL(PL_rsfp);
7324 start:
7325
7326 PL_bufptr = PL_bufend;
7327 COPLINE_INC_WITH_HERELINES;
7328 if (!lex_next_chunk(fake_eof)) {
7329 CopLINE_dec(PL_curcop);
7330 s = PL_bufptr;
7331 TOKEN(PERLY_SEMICOLON); /* not infinite loop because rsfp is NULL now */
7332 }
7333 CopLINE_dec(PL_curcop);
7334 s = PL_bufptr;
7335 /* If it looks like the start of a BOM or raw UTF-16,
7336 * check if it in fact is. */
7337 if (bof && PL_rsfp
7338 && ( *s == 0
7339 || *(U8*)s == BOM_UTF8_FIRST_BYTE
7340 || *(U8*)s >= 0xFE
7341 || s[1] == 0))
7342 {
7343 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
7344 bof = (offset == (Off_t)SvCUR(PL_linestr));
7345#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
7346 /* offset may include swallowed CR */
7347 if (!bof)
7348 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
7349#endif
7350 if (bof) {
7351 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7352 s = swallow_bom((U8*)s);
7353 }
7354 }
7355 if (PL_parser->in_pod) {
7356 /* Incest with pod. */
7357 if ( memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
7358 && !isALPHA(s[4]))
7359 {
7360 SvPVCLEAR(PL_linestr);
7361 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7362 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7363 PL_last_lop = PL_last_uni = NULL;
7364 PL_parser->in_pod = 0;
7365 }
7366 }
7367 if (PL_rsfp || PL_parser->filtered)
7368 incline(s, PL_bufend);
7369 } while (PL_parser->in_pod);
7370
7371 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
7372 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7373 PL_last_lop = PL_last_uni = NULL;
7374 if (CopLINE(PL_curcop) == 1) {
7375 while (s < PL_bufend && isSPACE(*s))
7376 s++;
7377 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
7378 s++;
7379 d = NULL;
7380 if (!PL_in_eval) {
7381 if (*s == '#' && *(s+1) == '!')
7382 d = s + 2;
7383#ifdef ALTERNATE_SHEBANG
7384 else {
7385 static char const as[] = ALTERNATE_SHEBANG;
7386 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
7387 d = s + (sizeof(as) - 1);
7388 }
7389#endif /* ALTERNATE_SHEBANG */
7390 }
7391 if (d) {
7392 char *ipath;
7393 char *ipathend;
7394
7395 while (isSPACE(*d))
7396 d++;
7397 ipath = d;
7398 while (*d && !isSPACE(*d))
7399 d++;
7400 ipathend = d;
7401
7402#ifdef ARG_ZERO_IS_SCRIPT
7403 if (ipathend > ipath) {
7404 /*
7405 * HP-UX (at least) sets argv[0] to the script name,
7406 * which makes $^X incorrect. And Digital UNIX and Linux,
7407 * at least, set argv[0] to the basename of the Perl
7408 * interpreter. So, having found "#!", we'll set it right.
7409 */
7410 SV* copfilesv = CopFILESV(PL_curcop);
7411 if (copfilesv) {
7412 SV * const x =
7413 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
7414 SVt_PV)); /* $^X */
7415 assert(SvPOK(x) || SvGMAGICAL(x));
7416 if (sv_eq(x, copfilesv)) {
7417 sv_setpvn(x, ipath, ipathend - ipath);
7418 SvSETMAGIC(x);
7419 }
7420 else {
7421 STRLEN blen;
7422 STRLEN llen;
7423 const char *bstart = SvPV_const(copfilesv, blen);
7424 const char * const lstart = SvPV_const(x, llen);
7425 if (llen < blen) {
7426 bstart += blen - llen;
7427 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
7428 sv_setpvn(x, ipath, ipathend - ipath);
7429 SvSETMAGIC(x);
7430 }
7431 }
7432 }
7433 }
7434 else {
7435 /* Anything to do if no copfilesv? */
7436 }
7437 TAINT_NOT; /* $^X is always tainted, but that's OK */
7438 }
7439#endif /* ARG_ZERO_IS_SCRIPT */
7440
7441 /*
7442 * Look for options.
7443 */
7444 d = instr(s,"perl -");
7445 if (!d) {
7446 d = instr(s,"perl");
7447#if defined(DOSISH)
7448 /* avoid getting into infinite loops when shebang
7449 * line contains "Perl" rather than "perl" */
7450 if (!d) {
7451 for (d = ipathend-4; d >= ipath; --d) {
7452 if (isALPHA_FOLD_EQ(*d, 'p')
7453 && !ibcmp(d, "perl", 4))
7454 {
7455 break;
7456 }
7457 }
7458 if (d < ipath)
7459 d = NULL;
7460 }
7461#endif
7462 }
7463#ifdef ALTERNATE_SHEBANG
7464 /*
7465 * If the ALTERNATE_SHEBANG on this system starts with a
7466 * character that can be part of a Perl expression, then if
7467 * we see it but not "perl", we're probably looking at the
7468 * start of Perl code, not a request to hand off to some
7469 * other interpreter. Similarly, if "perl" is there, but
7470 * not in the first 'word' of the line, we assume the line
7471 * contains the start of the Perl program.
7472 */
7473 if (d && *s != '#') {
7474 const char *c = ipath;
7475 while (*c && !memCHRs("; \t\r\n\f\v#", *c))
7476 c++;
7477 if (c < d)
7478 d = NULL; /* "perl" not in first word; ignore */
7479 else
7480 *s = '#'; /* Don't try to parse shebang line */
7481 }
7482#endif /* ALTERNATE_SHEBANG */
7483 if (!d
7484 && *s == '#'
7485 && ipathend > ipath
7486 && !PL_minus_c
7487 && !instr(s,"indir")
7488 && instr(PL_origargv[0],"perl"))
7489 {
7490 char **newargv;
7491
7492 *ipathend = '\0';
7493 s = ipathend + 1;
7494 while (s < PL_bufend && isSPACE(*s))
7495 s++;
7496 if (s < PL_bufend) {
7497 Newx(newargv,PL_origargc+3,char*);
7498 newargv[1] = s;
7499 while (s < PL_bufend && !isSPACE(*s))
7500 s++;
7501 *s = '\0';
7502 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
7503 }
7504 else
7505 newargv = PL_origargv;
7506 newargv[0] = ipath;
7507 PERL_FPU_PRE_EXEC
7508 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
7509 PERL_FPU_POST_EXEC
7510 Perl_croak(aTHX_ "Can't exec %s", ipath);
7511 }
7512 if (d) {
7513 while (*d && !isSPACE(*d))
7514 d++;
7515 while (SPACE_OR_TAB(*d))
7516 d++;
7517
7518 if (*d++ == '-') {
7519 const bool switches_done = PL_doswitches;
7520 const U32 oldpdb = PL_perldb;
7521 const bool oldn = PL_minus_n;
7522 const bool oldp = PL_minus_p;
7523 const char *d1 = d;
7524
7525 do {
7526 bool baduni = FALSE;
7527 if (*d1 == 'C') {
7528 const char *d2 = d1 + 1;
7529 if (parse_unicode_opts((const char **)&d2)
7530 != PL_unicode)
7531 baduni = TRUE;
7532 }
7533 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
7534 const char * const m = d1;
7535 while (*d1 && !isSPACE(*d1))
7536 d1++;
7537 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
7538 (int)(d1 - m), m);
7539 }
7540 d1 = moreswitches(d1);
7541 } while (d1);
7542 if (PL_doswitches && !switches_done) {
7543 int argc = PL_origargc;
7544 char **argv = PL_origargv;
7545 do {
7546 argc--,argv++;
7547 } while (argc && argv[0][0] == '-' && argv[0][1]);
7548 init_argv_symbols(argc,argv);
7549 }
7550 if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb)
7551 || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
7552 /* if we have already added "LINE: while (<>) {",
7553 we must not do it again */
7554 {
7555 SvPVCLEAR(PL_linestr);
7556 PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7557 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7558 PL_last_lop = PL_last_uni = NULL;
7559 PL_preambled = FALSE;
7560 if (PERLDB_LINE_OR_SAVESRC)
7561 (void)gv_fetchfile(PL_origfilename);
7562 return YYL_RETRY;
7563 }
7564 }
7565 }
7566 }
7567 }
7568
7569 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7570 PL_lex_state = LEX_FORMLINE;
7571 force_next(FORMRBRACK);
7572 TOKEN(PERLY_SEMICOLON);
7573 }
7574
7575 PL_bufptr = s;
7576 return YYL_RETRY;
7577}
7578
7579static int
7580yyl_fatcomma(pTHX_ char *s, STRLEN len)
7581{
7582 CLINE;
7583 pl_yylval.opval
7584 = newSVOP(OP_CONST, 0,
7585 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7586 pl_yylval.opval->op_private = OPpCONST_BARE;
7587 TERM(BAREWORD);
7588}
7589
7590static int
7591yyl_safe_bareword(pTHX_ char *s, const char lastchar)
7592{
7593 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7594 && PL_parser->saw_infix_sigil)
7595 {
7596 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7597 "Operator or semicolon missing before %c%" UTF8f,
7598 lastchar,
7599 UTF8fARG(UTF, strlen(PL_tokenbuf),
7600 PL_tokenbuf));
7601 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7602 "Ambiguous use of %c resolved as operator %c",
7603 lastchar, lastchar);
7604 }
7605 TOKEN(BAREWORD);
7606}
7607
7608static int
7609yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off)
7610{
7611 if (sv) {
7612 op_free(rv2cv_op);
7613 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7614 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7615 if (SvTYPE(sv) == SVt_PVAV)
7616 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7617 pl_yylval.opval);
7618 else {
7619 pl_yylval.opval->op_private = 0;
7620 pl_yylval.opval->op_folded = 1;
7621 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7622 }
7623 TOKEN(BAREWORD);
7624 }
7625
7626 op_free(pl_yylval.opval);
7627 pl_yylval.opval =
7628 off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7629 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7630 PL_last_lop = PL_oldbufptr;
7631 PL_last_lop_op = OP_ENTERSUB;
7632
7633 /* Is there a prototype? */
7634 if (SvPOK(cv)) {
7635 int k = yyl_subproto(aTHX_ s, cv);
7636 if (k != KEY_NULL)
7637 return k;
7638 }
7639
7640 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7641 PL_expect = XTERM;
7642 force_next(off ? PRIVATEREF : BAREWORD);
7643 if (!PL_lex_allbrackets
7644 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7645 {
7646 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7647 }
7648
7649 TOKEN(NOAMP);
7650}
7651
7652/* Honour "reserved word" warnings, and enforce strict subs */
7653static void
7654yyl_strictwarn_bareword(pTHX_ const char lastchar)
7655{
7656 /* after "print" and similar functions (corresponding to
7657 * "F? L" in opcode.pl), whatever wasn't already parsed as
7658 * a filehandle should be subject to "strict subs".
7659 * Likewise for the optional indirect-object argument to system
7660 * or exec, which can't be a bareword */
7661 if ((PL_last_lop_op == OP_PRINT
7662 || PL_last_lop_op == OP_PRTF
7663 || PL_last_lop_op == OP_SAY
7664 || PL_last_lop_op == OP_SYSTEM
7665 || PL_last_lop_op == OP_EXEC)
7666 && (PL_hints & HINT_STRICT_SUBS))
7667 {
7668 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7669 }
7670
7671 if (lastchar != '-' && ckWARN(WARN_RESERVED)) {
7672 char *d = PL_tokenbuf;
7673 while (isLOWER(*d))
7674 d++;
7675 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7676 /* PL_warn_reserved is constant */
7677 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7678 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7679 PL_tokenbuf);
7680 GCC_DIAG_RESTORE_STMT;
7681 }
7682 }
7683}
7684
7685static int
7686yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c)
7687{
7688 int pkgname = 0;
7689 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7690 bool safebw;
7691 bool no_op_error = FALSE;
7692 /* Use this var to track whether intuit_method has been
7693 called. intuit_method returns 0 or > 255. */
7694 int key = 1;
7695
7696 if (PL_expect == XOPERATOR) {
7697 if (PL_bufptr == PL_linestart) {
7698 CopLINE_dec(PL_curcop);
7699 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7700 CopLINE_inc(PL_curcop);
7701 }
7702 else
7703 /* We want to call no_op with s pointing after the
7704 bareword, so defer it. But we want it to come
7705 before the Bad name croak. */
7706 no_op_error = TRUE;
7707 }
7708
7709 /* Get the rest if it looks like a package qualifier */
7710
7711 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7712 STRLEN morelen;
7713 s = scan_word6(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7714 TRUE, &morelen, TRUE);
7715 if (no_op_error) {
7716 no_op("Bareword",s);
7717 no_op_error = FALSE;
7718 }
7719 if (!morelen)
7720 Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7721 UTF8fARG(UTF, len, PL_tokenbuf),
7722 *s == '\'' ? "'" : "::");
7723 len += morelen;
7724 pkgname = 1;
7725 }
7726
7727 if (no_op_error)
7728 no_op("Bareword",s);
7729
7730 /* See if the name is "Foo::",
7731 in which case Foo is a bareword
7732 (and a package name). */
7733
7734 if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') {
7735 if (ckWARN(WARN_BAREWORD)
7736 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7737 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7738 "Bareword \"%" UTF8f
7739 "\" refers to nonexistent package",
7740 UTF8fARG(UTF, len, PL_tokenbuf));
7741 len -= 2;
7742 PL_tokenbuf[len] = '\0';
7743 c.gv = NULL;
7744 c.gvp = 0;
7745 safebw = TRUE;
7746 }
7747 else {
7748 safebw = FALSE;
7749 }
7750
7751 /* if we saw a global override before, get the right name */
7752
7753 if (!c.sv)
7754 c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len);
7755 if (c.gvp) {
7756 SV *sv = newSVpvs("CORE::GLOBAL::");
7757 sv_catsv(sv, c.sv);
7758 SvREFCNT_dec(c.sv);
7759 c.sv = sv;
7760 }
7761
7762 /* Presume this is going to be a bareword of some sort. */
7763 CLINE;
7764 pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv);
7765 pl_yylval.opval->op_private = OPpCONST_BARE;
7766
7767 /* And if "Foo::", then that's what it certainly is. */
7768 if (safebw)
7769 return yyl_safe_bareword(aTHX_ s, lastchar);
7770
7771 if (!c.off) {
7772 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv));
7773 const_op->op_private = OPpCONST_BARE;
7774 c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7775 c.cv = c.lex
7776 ? isGV(c.gv)
7777 ? GvCV(c.gv)
7778 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV
7779 ? (CV *)SvRV(c.gv)
7780 : ((CV *)c.gv)
7781 : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB);
7782 }
7783
7784 /* See if it's the indirect object for a list operator. */
7785
7786 if (PL_oldoldbufptr
7787 && PL_oldoldbufptr < PL_bufptr
7788 && (PL_oldoldbufptr == PL_last_lop
7789 || PL_oldoldbufptr == PL_last_uni)
7790 && /* NO SKIPSPACE BEFORE HERE! */
7791 (PL_expect == XREF
7792 || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7793 == OA_FILEREF))
7794 {
7795 bool immediate_paren = *s == '(';
7796 SSize_t s_off;
7797
7798 /* (Now we can afford to cross potential line boundary.) */
7799 s = skipspace(s);
7800
7801 /* intuit_method() can indirectly call lex_next_chunk(),
7802 * invalidating s
7803 */
7804 s_off = s - SvPVX(PL_linestr);
7805 /* Two barewords in a row may indicate method call. */
7806 if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7807 || *s == '$')
7808 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7809 {
7810 /* the code at method: doesn't use s */
7811 goto method;
7812 }
7813 s = SvPVX(PL_linestr) + s_off;
7814
7815 /* If not a declared subroutine, it's an indirect object. */
7816 /* (But it's an indir obj regardless for sort.) */
7817 /* Also, if "_" follows a filetest operator, it's a bareword */
7818
7819 if (
7820 ( !immediate_paren && (PL_last_lop_op == OP_SORT
7821 || (!c.cv
7822 && (PL_last_lop_op != OP_MAPSTART
7823 && PL_last_lop_op != OP_GREPSTART))))
7824 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7825 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7826 == OA_FILESTATOP))
7827 )
7828 {
7829 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7830 yyl_strictwarn_bareword(aTHX_ lastchar);
7831 op_free(c.rv2cv_op);
7832 return yyl_safe_bareword(aTHX_ s, lastchar);
7833 }
7834 }
7835
7836 PL_expect = XOPERATOR;
7837 s = skipspace(s);
7838
7839 /* Is this a word before a => operator? */
7840 if (*s == '=' && s[1] == '>' && !pkgname) {
7841 op_free(c.rv2cv_op);
7842 CLINE;
7843 if (c.gvp || (c.lex && !c.off)) {
7844 assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7845 /* This is our own scalar, created a few lines
7846 above, so this is safe. */
7847 SvREADONLY_off(c.sv);
7848 sv_setpv(c.sv, PL_tokenbuf);
7849 if (UTF && !IN_BYTES
7850 && is_utf8_string((U8*)PL_tokenbuf, len))
7851 SvUTF8_on(c.sv);
7852 SvREADONLY_on(c.sv);
7853 }
7854 TERM(BAREWORD);
7855 }
7856
7857 /* If followed by a paren, it's certainly a subroutine. */
7858 if (*s == '(') {
7859 CLINE;
7860 if (c.cv) {
7861 char *d = s + 1;
7862 while (SPACE_OR_TAB(*d))
7863 d++;
7864 if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv)))
7865 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off);
7866 }
7867 NEXTVAL_NEXTTOKE.opval =
7868 c.off ? c.rv2cv_op : pl_yylval.opval;
7869 if (c.off)
7870 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7871 else op_free(c.rv2cv_op), force_next(BAREWORD);
7872 pl_yylval.ival = 0;
7873 TOKEN(PERLY_AMPERSAND);
7874 }
7875
7876 /* If followed by var or block, call it a method (unless sub) */
7877
7878 if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) {
7879 op_free(c.rv2cv_op);
7880 PL_last_lop = PL_oldbufptr;
7881 PL_last_lop_op = OP_METHOD;
7882 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7883 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7884 PL_expect = XBLOCKTERM;
7885 PL_bufptr = s;
7886 return REPORT(METHCALL0);
7887 }
7888
7889 /* If followed by a bareword, see if it looks like indir obj. */
7890
7891 if ( key == 1
7892 && !orig_keyword
7893 && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7894 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7895 {
7896 method:
7897 if (c.lex && !c.off) {
7898 assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7899 SvREADONLY_off(c.sv);
7900 sv_setpvn(c.sv, PL_tokenbuf, len);
7901 if (UTF && !IN_BYTES
7902 && is_utf8_string((U8*)PL_tokenbuf, len))
7903 SvUTF8_on(c.sv);
7904 else SvUTF8_off(c.sv);
7905 }
7906 op_free(c.rv2cv_op);
7907 if (key == METHCALL0 && !PL_lex_allbrackets
7908 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7909 {
7910 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7911 }
7912 return REPORT(key);
7913 }
7914
7915 /* Not a method, so call it a subroutine (if defined) */
7916
7917 if (c.cv) {
7918 /* Check for a constant sub */
7919 c.sv = cv_const_sv_or_av(c.cv);
7920 return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off);
7921 }
7922
7923 /* Call it a bare word */
7924
7925 if (PL_hints & HINT_STRICT_SUBS)
7926 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7927 else
7928 yyl_strictwarn_bareword(aTHX_ lastchar);
7929
7930 op_free(c.rv2cv_op);
7931
7932 return yyl_safe_bareword(aTHX_ s, lastchar);
7933}
7934
7935static int
7936yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c)
7937{
7938 switch (key) {
7939 default: /* not a keyword */
7940 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7941
7942 case KEY___FILE__:
7943 FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) );
7944
7945 case KEY___LINE__:
7946 FUN0OP(
7947 newSVOP(OP_CONST, 0,
7948 Perl_newSVpvf(aTHX_ "%" LINE_Tf, CopLINE(PL_curcop)))
7949 );
7950
7951 case KEY___PACKAGE__:
7952 FUN0OP(
7953 newSVOP(OP_CONST, 0, (PL_curstash
7954 ? newSVhek(HvNAME_HEK(PL_curstash))
7955 : &PL_sv_undef))
7956 );
7957
7958 case KEY___DATA__:
7959 case KEY___END__:
7960 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D'))
7961 yyl_data_handle(aTHX);
7962 return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s);
7963
7964 case KEY___SUB__:
7965 /* If !CvCLONE(PL_compcv) then rpeep will probably turn this into an
7966 * OP_CONST. We need to make it big enough to allow room for that if
7967 * so */
7968 FUN0OP(CvCLONE(PL_compcv)
7969 ? newOP(OP_RUNCV, 0)
7970 : newSVOP(OP_RUNCV, 0, &PL_sv_undef));
7971
7972 case KEY___CLASS__:
7973 FUN0(OP_CLASSNAME);
7974
7975 case KEY_AUTOLOAD:
7976 case KEY_DESTROY:
7977 case KEY_BEGIN:
7978 case KEY_UNITCHECK:
7979 case KEY_CHECK:
7980 case KEY_INIT:
7981 case KEY_END:
7982 if (PL_expect == XSTATE)
7983 return yyl_sub(aTHX_ PL_bufptr, key);
7984 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7985
7986 case KEY_ADJUST:
7987 Perl_ck_warner_d(aTHX_
7988 packWARN(WARN_EXPERIMENTAL__CLASS), "ADJUST is experimental");
7989
7990 /* The way that KEY_CHECK et.al. are handled currently are nothing
7991 * short of crazy. We won't copy that model for new phasers, but use
7992 * this as an experiment to test if this will work
7993 */
7994 PHASERBLOCK(KEY_ADJUST);
7995
7996 case KEY_abs:
7997 UNI(OP_ABS);
7998
7999 case KEY_alarm:
8000 UNI(OP_ALARM);
8001
8002 case KEY_accept:
8003 LOP(OP_ACCEPT,XTERM);
8004
8005 case KEY_and:
8006 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8007 return REPORT(0);
8008 OPERATOR(ANDOP);
8009
8010 case KEY_atan2:
8011 LOP(OP_ATAN2,XTERM);
8012
8013 case KEY_bind:
8014 LOP(OP_BIND,XTERM);
8015
8016 case KEY_binmode:
8017 LOP(OP_BINMODE,XTERM);
8018
8019 case KEY_bless:
8020 LOP(OP_BLESS,XTERM);
8021
8022 case KEY_break:
8023 FUN0(OP_BREAK);
8024
8025 case KEY_catch:
8026 Perl_ck_warner_d(aTHX_
8027 packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
8028 PREBLOCK(KW_CATCH);
8029
8030 case KEY_chop:
8031 UNI(OP_CHOP);
8032
8033 case KEY_class:
8034 Perl_ck_warner_d(aTHX_
8035 packWARN(WARN_EXPERIMENTAL__CLASS), "class is experimental");
8036
8037 s = force_word(s,BAREWORD,FALSE,TRUE);
8038 s = skipspace(s);
8039 s = force_strict_version(s);
8040 PL_expect = XATTRBLOCK;
8041 TOKEN(KW_CLASS);
8042
8043 case KEY_continue:
8044 /* We have to disambiguate the two senses of
8045 "continue". If the next token is a '{' then
8046 treat it as the start of a continue block;
8047 otherwise treat it as a control operator.
8048 */
8049 s = skipspace(s);
8050 if (*s == '{')
8051 PREBLOCK(KW_CONTINUE);
8052 else
8053 FUN0(OP_CONTINUE);
8054
8055 case KEY_chdir:
8056 /* may use HOME */
8057 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
8058 UNI(OP_CHDIR);
8059
8060 case KEY_close:
8061 UNI(OP_CLOSE);
8062
8063 case KEY_closedir:
8064 UNI(OP_CLOSEDIR);
8065
8066 case KEY_cmp:
8067 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8068 return REPORT(0);
8069 NCEop(OP_SCMP);
8070
8071 case KEY_caller:
8072 UNI(OP_CALLER);
8073
8074 case KEY_crypt:
8075
8076 LOP(OP_CRYPT,XTERM);
8077
8078 case KEY_chmod:
8079 LOP(OP_CHMOD,XTERM);
8080
8081 case KEY_chown:
8082 LOP(OP_CHOWN,XTERM);
8083
8084 case KEY_connect:
8085 LOP(OP_CONNECT,XTERM);
8086
8087 case KEY_chr:
8088 UNI(OP_CHR);
8089
8090 case KEY_cos:
8091 UNI(OP_COS);
8092
8093 case KEY_chroot:
8094 UNI(OP_CHROOT);
8095
8096 case KEY_default:
8097 PREBLOCK(KW_DEFAULT);
8098
8099 case KEY_defer:
8100 Perl_ck_warner_d(aTHX_
8101 packWARN(WARN_EXPERIMENTAL__DEFER), "defer is experimental");
8102 PREBLOCK(KW_DEFER);
8103
8104 case KEY_do:
8105 return yyl_do(aTHX_ s, orig_keyword);
8106
8107 case KEY_die:
8108 PL_hints |= HINT_BLOCK_SCOPE;
8109 LOP(OP_DIE,XTERM);
8110
8111 case KEY_defined:
8112 UNI(OP_DEFINED);
8113
8114 case KEY_delete:
8115 UNI(OP_DELETE);
8116
8117 case KEY_dbmopen:
8118 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
8119 STR_WITH_LEN("NDBM_File::"),
8120 STR_WITH_LEN("DB_File::"),
8121 STR_WITH_LEN("GDBM_File::"),
8122 STR_WITH_LEN("SDBM_File::"),
8123 STR_WITH_LEN("ODBM_File::"),
8124 NULL);
8125 LOP(OP_DBMOPEN,XTERM);
8126
8127 case KEY_dbmclose:
8128 UNI(OP_DBMCLOSE);
8129
8130 case KEY_dump:
8131 LOOPX(OP_DUMP);
8132
8133 case KEY_else:
8134 PREBLOCK(KW_ELSE);
8135
8136 case KEY_elsif:
8137 pl_yylval.ival = CopLINE(PL_curcop);
8138 OPERATOR(KW_ELSIF);
8139
8140 case KEY_eq:
8141 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8142 return REPORT(0);
8143 ChEop(OP_SEQ);
8144
8145 case KEY_exists:
8146 UNI(OP_EXISTS);
8147
8148 case KEY_exit:
8149 UNI(OP_EXIT);
8150
8151 case KEY_eval:
8152 s = skipspace(s);
8153 if (*s == '{') { /* block eval */
8154 PL_expect = XTERMBLOCK;
8155 UNIBRACK(OP_ENTERTRY);
8156 }
8157 else { /* string eval */
8158 PL_expect = XTERM;
8159 UNIBRACK(OP_ENTEREVAL);
8160 }
8161
8162 case KEY_evalbytes:
8163 PL_expect = XTERM;
8164 UNIBRACK(-OP_ENTEREVAL);
8165
8166 case KEY_eof:
8167 UNI(OP_EOF);
8168
8169 case KEY_exp:
8170 UNI(OP_EXP);
8171
8172 case KEY_each:
8173 UNI(OP_EACH);
8174
8175 case KEY_exec:
8176 LOP(OP_EXEC,XREF);
8177
8178 case KEY_endhostent:
8179 FUN0(OP_EHOSTENT);
8180
8181 case KEY_endnetent:
8182 FUN0(OP_ENETENT);
8183
8184 case KEY_endservent:
8185 FUN0(OP_ESERVENT);
8186
8187 case KEY_endprotoent:
8188 FUN0(OP_EPROTOENT);
8189
8190 case KEY_endpwent:
8191 FUN0(OP_EPWENT);
8192
8193 case KEY_endgrent:
8194 FUN0(OP_EGRENT);
8195
8196 case KEY_field:
8197 /* TODO: maybe this should use the same parser/grammar structures as
8198 * `my`, but it's also rather messy because of the `our` conflation
8199 */
8200 Perl_ck_warner_d(aTHX_
8201 packWARN(WARN_EXPERIMENTAL__CLASS), "field is experimental");
8202
8203 croak_kw_unless_class("field");
8204
8205 PL_parser->in_my = KEY_field;
8206 OPERATOR(KW_FIELD);
8207
8208 case KEY_finally:
8209 Perl_ck_warner_d(aTHX_
8210 packWARN(WARN_EXPERIMENTAL__TRY), "try/catch/finally is experimental");
8211 PREBLOCK(KW_FINALLY);
8212
8213 case KEY_for:
8214 case KEY_foreach:
8215 return yyl_foreach(aTHX_ s);
8216
8217 case KEY_formline:
8218 LOP(OP_FORMLINE,XTERM);
8219
8220 case KEY_fork:
8221 FUN0(OP_FORK);
8222
8223 case KEY_fc:
8224 UNI(OP_FC);
8225
8226 case KEY_fcntl:
8227 LOP(OP_FCNTL,XTERM);
8228
8229 case KEY_fileno:
8230 UNI(OP_FILENO);
8231
8232 case KEY_flock:
8233 LOP(OP_FLOCK,XTERM);
8234
8235 case KEY_gt:
8236 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8237 return REPORT(0);
8238 ChRop(OP_SGT);
8239
8240 case KEY_ge:
8241 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8242 return REPORT(0);
8243 ChRop(OP_SGE);
8244
8245 case KEY_grep:
8246 LOP(OP_GREPSTART, XREF);
8247
8248 case KEY_goto:
8249 LOOPX(OP_GOTO);
8250
8251 case KEY_gmtime:
8252 UNI(OP_GMTIME);
8253
8254 case KEY_getc:
8255 UNIDOR(OP_GETC);
8256
8257 case KEY_getppid:
8258 FUN0(OP_GETPPID);
8259
8260 case KEY_getpgrp:
8261 UNI(OP_GETPGRP);
8262
8263 case KEY_getpriority:
8264 LOP(OP_GETPRIORITY,XTERM);
8265
8266 case KEY_getprotobyname:
8267 UNI(OP_GPBYNAME);
8268
8269 case KEY_getprotobynumber:
8270 LOP(OP_GPBYNUMBER,XTERM);
8271
8272 case KEY_getprotoent:
8273 FUN0(OP_GPROTOENT);
8274
8275 case KEY_getpwent:
8276 FUN0(OP_GPWENT);
8277
8278 case KEY_getpwnam:
8279 UNI(OP_GPWNAM);
8280
8281 case KEY_getpwuid:
8282 UNI(OP_GPWUID);
8283
8284 case KEY_getpeername:
8285 UNI(OP_GETPEERNAME);
8286
8287 case KEY_gethostbyname:
8288 UNI(OP_GHBYNAME);
8289
8290 case KEY_gethostbyaddr:
8291 LOP(OP_GHBYADDR,XTERM);
8292
8293 case KEY_gethostent:
8294 FUN0(OP_GHOSTENT);
8295
8296 case KEY_getnetbyname:
8297 UNI(OP_GNBYNAME);
8298
8299 case KEY_getnetbyaddr:
8300 LOP(OP_GNBYADDR,XTERM);
8301
8302 case KEY_getnetent:
8303 FUN0(OP_GNETENT);
8304
8305 case KEY_getservbyname:
8306 LOP(OP_GSBYNAME,XTERM);
8307
8308 case KEY_getservbyport:
8309 LOP(OP_GSBYPORT,XTERM);
8310
8311 case KEY_getservent:
8312 FUN0(OP_GSERVENT);
8313
8314 case KEY_getsockname:
8315 UNI(OP_GETSOCKNAME);
8316
8317 case KEY_getsockopt:
8318 LOP(OP_GSOCKOPT,XTERM);
8319
8320 case KEY_getgrent:
8321 FUN0(OP_GGRENT);
8322
8323 case KEY_getgrnam:
8324 UNI(OP_GGRNAM);
8325
8326 case KEY_getgrgid:
8327 UNI(OP_GGRGID);
8328
8329 case KEY_getlogin:
8330 FUN0(OP_GETLOGIN);
8331
8332 case KEY_given:
8333 pl_yylval.ival = CopLINE(PL_curcop);
8334 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED__SMARTMATCH),
8335 "given is deprecated");
8336 OPERATOR(KW_GIVEN);
8337
8338 case KEY_glob:
8339 LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
8340
8341 case KEY_hex:
8342 UNI(OP_HEX);
8343
8344 case KEY_if:
8345 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8346 return REPORT(0);
8347 pl_yylval.ival = CopLINE(PL_curcop);
8348 OPERATOR(KW_IF);
8349
8350 case KEY_index:
8351 LOP(OP_INDEX,XTERM);
8352
8353 case KEY_int:
8354 UNI(OP_INT);
8355
8356 case KEY_ioctl:
8357 LOP(OP_IOCTL,XTERM);
8358
8359 case KEY_isa:
8360 NCRop(OP_ISA);
8361
8362 case KEY_join:
8363 LOP(OP_JOIN,XTERM);
8364
8365 case KEY_keys:
8366 UNI(OP_KEYS);
8367
8368 case KEY_kill:
8369 LOP(OP_KILL,XTERM);
8370
8371 case KEY_last:
8372 LOOPX(OP_LAST);
8373
8374 case KEY_lc:
8375 UNI(OP_LC);
8376
8377 case KEY_lcfirst:
8378 UNI(OP_LCFIRST);
8379
8380 case KEY_local:
8381 OPERATOR(KW_LOCAL);
8382
8383 case KEY_length:
8384 UNI(OP_LENGTH);
8385
8386 case KEY_lt:
8387 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8388 return REPORT(0);
8389 ChRop(OP_SLT);
8390
8391 case KEY_le:
8392 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8393 return REPORT(0);
8394 ChRop(OP_SLE);
8395
8396 case KEY_localtime:
8397 UNI(OP_LOCALTIME);
8398
8399 case KEY_log:
8400 UNI(OP_LOG);
8401
8402 case KEY_link:
8403 LOP(OP_LINK,XTERM);
8404
8405 case KEY_listen:
8406 LOP(OP_LISTEN,XTERM);
8407
8408 case KEY_lock:
8409 UNI(OP_LOCK);
8410
8411 case KEY_lstat:
8412 UNI(OP_LSTAT);
8413
8414 case KEY_m:
8415 s = scan_pat(s,OP_MATCH);
8416 TERM(sublex_start());
8417
8418 case KEY_map:
8419 LOP(OP_MAPSTART, XREF);
8420
8421 case KEY_mkdir:
8422 LOP(OP_MKDIR,XTERM);
8423
8424 case KEY_msgctl:
8425 LOP(OP_MSGCTL,XTERM);
8426
8427 case KEY_msgget:
8428 LOP(OP_MSGGET,XTERM);
8429
8430 case KEY_msgrcv:
8431 LOP(OP_MSGRCV,XTERM);
8432
8433 case KEY_msgsnd:
8434 LOP(OP_MSGSND,XTERM);
8435
8436 case KEY_our:
8437 case KEY_my:
8438 case KEY_state:
8439 return yyl_my(aTHX_ s, key);
8440
8441 case KEY_next:
8442 LOOPX(OP_NEXT);
8443
8444 case KEY_ne:
8445 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8446 return REPORT(0);
8447 ChEop(OP_SNE);
8448
8449 case KEY_no:
8450 s = tokenize_use(0, s);
8451 TOKEN(KW_USE_or_NO);
8452
8453 case KEY_not:
8454 if (*s == '(' || (s = skipspace(s), *s == '('))
8455 FUN1(OP_NOT);
8456 else {
8457 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8458 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8459 OPERATOR(NOTOP);
8460 }
8461
8462 case KEY_open:
8463 s = skipspace(s);
8464 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8465 const char *t;
8466 char *d = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
8467 for (t=d; isSPACE(*t);)
8468 t++;
8469 if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8470 /* [perl #16184] */
8471 && !(t[0] == '=' && t[1] == '>')
8472 && !(t[0] == ':' && t[1] == ':')
8473 && !keyword(s, d-s, 0)
8474 ) {
8475 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8476 "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8477 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8478 }
8479 }
8480 LOP(OP_OPEN,XTERM);
8481
8482 case KEY_or:
8483 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8484 return REPORT(0);
8485 pl_yylval.ival = OP_OR;
8486 OPERATOR(OROP);
8487
8488 case KEY_ord:
8489 UNI(OP_ORD);
8490
8491 case KEY_oct:
8492 UNI(OP_OCT);
8493
8494 case KEY_opendir:
8495 LOP(OP_OPEN_DIR,XTERM);
8496
8497 case KEY_print:
8498 checkcomma(s,PL_tokenbuf,"filehandle");
8499 LOP(OP_PRINT,XREF);
8500
8501 case KEY_printf:
8502 checkcomma(s,PL_tokenbuf,"filehandle");
8503 LOP(OP_PRTF,XREF);
8504
8505 case KEY_prototype:
8506 UNI(OP_PROTOTYPE);
8507
8508 case KEY_push:
8509 LOP(OP_PUSH,XTERM);
8510
8511 case KEY_pop:
8512 UNIDOR(OP_POP);
8513
8514 case KEY_pos:
8515 UNIDOR(OP_POS);
8516
8517 case KEY_pack:
8518 LOP(OP_PACK,XTERM);
8519
8520 case KEY_package:
8521 s = force_word(s,BAREWORD,FALSE,TRUE);
8522 s = skipspace(s);
8523 s = force_strict_version(s);
8524 PREBLOCK(KW_PACKAGE);
8525
8526 case KEY_pipe:
8527 LOP(OP_PIPE_OP,XTERM);
8528
8529 case KEY_q:
8530 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8531 if (!s)
8532 missingterm(NULL, 0);
8533 COPLINE_SET_FROM_MULTI_END;
8534 pl_yylval.ival = OP_CONST;
8535 TERM(sublex_start());
8536
8537 case KEY_quotemeta:
8538 UNI(OP_QUOTEMETA);
8539
8540 case KEY_qw:
8541 return yyl_qw(aTHX_ s, len);
8542
8543 case KEY_qq:
8544 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8545 if (!s)
8546 missingterm(NULL, 0);
8547 pl_yylval.ival = OP_STRINGIFY;
8548 if (SvIVX(PL_lex_stuff) == '\'')
8549 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8550 TERM(sublex_start());
8551
8552 case KEY_qr:
8553 s = scan_pat(s,OP_QR);
8554 TERM(sublex_start());
8555
8556 case KEY_qx:
8557 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8558 if (!s)
8559 missingterm(NULL, 0);
8560 pl_yylval.ival = OP_BACKTICK;
8561 TERM(sublex_start());
8562
8563 case KEY_return:
8564 OLDLOP(OP_RETURN);
8565
8566 case KEY_require:
8567 return yyl_require(aTHX_ s, orig_keyword);
8568
8569 case KEY_reset:
8570 UNI(OP_RESET);
8571
8572 case KEY_redo:
8573 LOOPX(OP_REDO);
8574
8575 case KEY_rename:
8576 LOP(OP_RENAME,XTERM);
8577
8578 case KEY_rand:
8579 UNI(OP_RAND);
8580
8581 case KEY_rmdir:
8582 UNI(OP_RMDIR);
8583
8584 case KEY_rindex:
8585 LOP(OP_RINDEX,XTERM);
8586
8587 case KEY_read:
8588 LOP(OP_READ,XTERM);
8589
8590 case KEY_readdir:
8591 UNI(OP_READDIR);
8592
8593 case KEY_readline:
8594 UNIDOR(OP_READLINE);
8595
8596 case KEY_readpipe:
8597 UNIDOR(OP_BACKTICK);
8598
8599 case KEY_rewinddir:
8600 UNI(OP_REWINDDIR);
8601
8602 case KEY_recv:
8603 LOP(OP_RECV,XTERM);
8604
8605 case KEY_reverse:
8606 LOP(OP_REVERSE,XTERM);
8607
8608 case KEY_readlink:
8609 UNIDOR(OP_READLINK);
8610
8611 case KEY_ref:
8612 UNI(OP_REF);
8613
8614 case KEY_s:
8615 s = scan_subst(s);
8616 if (pl_yylval.opval)
8617 TERM(sublex_start());
8618 else
8619 TOKEN(1); /* force error */
8620
8621 case KEY_say:
8622 checkcomma(s,PL_tokenbuf,"filehandle");
8623 LOP(OP_SAY,XREF);
8624
8625 case KEY_chomp:
8626 UNI(OP_CHOMP);
8627
8628 case KEY_scalar:
8629 UNI(OP_SCALAR);
8630
8631 case KEY_select:
8632 LOP(OP_SELECT,XTERM);
8633
8634 case KEY_seek:
8635 LOP(OP_SEEK,XTERM);
8636
8637 case KEY_semctl:
8638 LOP(OP_SEMCTL,XTERM);
8639
8640 case KEY_semget:
8641 LOP(OP_SEMGET,XTERM);
8642
8643 case KEY_semop:
8644 LOP(OP_SEMOP,XTERM);
8645
8646 case KEY_send:
8647 LOP(OP_SEND,XTERM);
8648
8649 case KEY_setpgrp:
8650 LOP(OP_SETPGRP,XTERM);
8651
8652 case KEY_setpriority:
8653 LOP(OP_SETPRIORITY,XTERM);
8654
8655 case KEY_sethostent:
8656 UNI(OP_SHOSTENT);
8657
8658 case KEY_setnetent:
8659 UNI(OP_SNETENT);
8660
8661 case KEY_setservent:
8662 UNI(OP_SSERVENT);
8663
8664 case KEY_setprotoent:
8665 UNI(OP_SPROTOENT);
8666
8667 case KEY_setpwent:
8668 FUN0(OP_SPWENT);
8669
8670 case KEY_setgrent:
8671 FUN0(OP_SGRENT);
8672
8673 case KEY_seekdir:
8674 LOP(OP_SEEKDIR,XTERM);
8675
8676 case KEY_setsockopt:
8677 LOP(OP_SSOCKOPT,XTERM);
8678
8679 case KEY_shift:
8680 UNIDOR(OP_SHIFT);
8681
8682 case KEY_shmctl:
8683 LOP(OP_SHMCTL,XTERM);
8684
8685 case KEY_shmget:
8686 LOP(OP_SHMGET,XTERM);
8687
8688 case KEY_shmread:
8689 LOP(OP_SHMREAD,XTERM);
8690
8691 case KEY_shmwrite:
8692 LOP(OP_SHMWRITE,XTERM);
8693
8694 case KEY_shutdown:
8695 LOP(OP_SHUTDOWN,XTERM);
8696
8697 case KEY_sin:
8698 UNI(OP_SIN);
8699
8700 case KEY_sleep:
8701 UNI(OP_SLEEP);
8702
8703 case KEY_socket:
8704 LOP(OP_SOCKET,XTERM);
8705
8706 case KEY_socketpair:
8707 LOP(OP_SOCKPAIR,XTERM);
8708
8709 case KEY_sort:
8710 checkcomma(s,PL_tokenbuf,"subroutine name");
8711 s = skipspace(s);
8712 PL_expect = XTERM;
8713 s = force_word(s,BAREWORD,TRUE,TRUE);
8714 LOP(OP_SORT,XREF);
8715
8716 case KEY_split:
8717 LOP(OP_SPLIT,XTERM);
8718
8719 case KEY_sprintf:
8720 LOP(OP_SPRINTF,XTERM);
8721
8722 case KEY_splice:
8723 LOP(OP_SPLICE,XTERM);
8724
8725 case KEY_sqrt:
8726 UNI(OP_SQRT);
8727
8728 case KEY_srand:
8729 UNI(OP_SRAND);
8730
8731 case KEY_stat:
8732 UNI(OP_STAT);
8733
8734 case KEY_study:
8735 UNI(OP_STUDY);
8736
8737 case KEY_substr:
8738 LOP(OP_SUBSTR,XTERM);
8739
8740 case KEY_method:
8741 /* For now we just treat 'method' identical to 'sub' plus a warning */
8742 Perl_ck_warner_d(aTHX_
8743 packWARN(WARN_EXPERIMENTAL__CLASS), "method is experimental");
8744 return yyl_sub(aTHX_ s, KEY_method);
8745
8746 case KEY_format:
8747 case KEY_sub:
8748 return yyl_sub(aTHX_ s, key);
8749
8750 case KEY_system:
8751 LOP(OP_SYSTEM,XREF);
8752
8753 case KEY_symlink:
8754 LOP(OP_SYMLINK,XTERM);
8755
8756 case KEY_syscall:
8757 LOP(OP_SYSCALL,XTERM);
8758
8759 case KEY_sysopen:
8760 LOP(OP_SYSOPEN,XTERM);
8761
8762 case KEY_sysseek:
8763 LOP(OP_SYSSEEK,XTERM);
8764
8765 case KEY_sysread:
8766 LOP(OP_SYSREAD,XTERM);
8767
8768 case KEY_syswrite:
8769 LOP(OP_SYSWRITE,XTERM);
8770
8771 case KEY_tr:
8772 case KEY_y:
8773 s = scan_trans(s);
8774 TERM(sublex_start());
8775
8776 case KEY_tell:
8777 UNI(OP_TELL);
8778
8779 case KEY_telldir:
8780 UNI(OP_TELLDIR);
8781
8782 case KEY_tie:
8783 LOP(OP_TIE,XTERM);
8784
8785 case KEY_tied:
8786 UNI(OP_TIED);
8787
8788 case KEY_time:
8789 FUN0(OP_TIME);
8790
8791 case KEY_times:
8792 FUN0(OP_TMS);
8793
8794 case KEY_truncate:
8795 LOP(OP_TRUNCATE,XTERM);
8796
8797 case KEY_try:
8798 pl_yylval.ival = CopLINE(PL_curcop);
8799 Perl_ck_warner_d(aTHX_
8800 packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
8801 PREBLOCK(KW_TRY);
8802
8803 case KEY_uc:
8804 UNI(OP_UC);
8805
8806 case KEY_ucfirst:
8807 UNI(OP_UCFIRST);
8808
8809 case KEY_untie:
8810 UNI(OP_UNTIE);
8811
8812 case KEY_until:
8813 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8814 return REPORT(0);
8815 pl_yylval.ival = CopLINE(PL_curcop);
8816 OPERATOR(KW_UNTIL);
8817
8818 case KEY_unless:
8819 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8820 return REPORT(0);
8821 pl_yylval.ival = CopLINE(PL_curcop);
8822 OPERATOR(KW_UNLESS);
8823
8824 case KEY_unlink:
8825 LOP(OP_UNLINK,XTERM);
8826
8827 case KEY_undef:
8828 UNIDOR(OP_UNDEF);
8829
8830 case KEY_unpack:
8831 LOP(OP_UNPACK,XTERM);
8832
8833 case KEY_utime:
8834 LOP(OP_UTIME,XTERM);
8835
8836 case KEY_umask:
8837 UNIDOR(OP_UMASK);
8838
8839 case KEY_unshift:
8840 LOP(OP_UNSHIFT,XTERM);
8841
8842 case KEY_use:
8843 s = tokenize_use(1, s);
8844 TOKEN(KW_USE_or_NO);
8845
8846 case KEY_values:
8847 UNI(OP_VALUES);
8848
8849 case KEY_vec:
8850 LOP(OP_VEC,XTERM);
8851
8852 case KEY_when:
8853 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8854 return REPORT(0);
8855 pl_yylval.ival = CopLINE(PL_curcop);
8856 Perl_ck_warner_d(aTHX_
8857 packWARN(WARN_DEPRECATED__SMARTMATCH),
8858 "when is deprecated");
8859 OPERATOR(KW_WHEN);
8860
8861 case KEY_while:
8862 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8863 return REPORT(0);
8864 pl_yylval.ival = CopLINE(PL_curcop);
8865 OPERATOR(KW_WHILE);
8866
8867 case KEY_warn:
8868 PL_hints |= HINT_BLOCK_SCOPE;
8869 LOP(OP_WARN,XTERM);
8870
8871 case KEY_wait:
8872 FUN0(OP_WAIT);
8873
8874 case KEY_waitpid:
8875 LOP(OP_WAITPID,XTERM);
8876
8877 case KEY_wantarray:
8878 FUN0(OP_WANTARRAY);
8879
8880 case KEY_write:
8881 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8882 * we use the same number on EBCDIC */
8883 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8884 UNI(OP_ENTERWRITE);
8885
8886 case KEY_x:
8887 if (PL_expect == XOPERATOR) {
8888 if (*s == '=' && !PL_lex_allbrackets
8889 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8890 {
8891 return REPORT(0);
8892 }
8893 Mop(OP_REPEAT);
8894 }
8895 check_uni();
8896 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
8897
8898 case KEY_xor:
8899 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8900 return REPORT(0);
8901 pl_yylval.ival = OP_XOR;
8902 OPERATOR(OROP);
8903 }
8904}
8905
8906static int
8907yyl_key_core(pTHX_ char *s, STRLEN len, struct code c)
8908{
8909 I32 key = 0;
8910 I32 orig_keyword = 0;
8911 STRLEN olen = len;
8912 char *d = s;
8913 s += 2;
8914 s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
8915 if ((*s == ':' && s[1] == ':')
8916 || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
8917 {
8918 Copy(PL_bufptr, PL_tokenbuf, olen, char);
8919 return yyl_just_a_word(aTHX_ d, olen, 0, c);
8920 }
8921 if (!key)
8922 Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
8923 UTF8fARG(UTF, len, PL_tokenbuf));
8924 if (key < 0)
8925 key = -key;
8926 else if (key == KEY_require || key == KEY_do
8927 || key == KEY_glob)
8928 /* that's a way to remember we saw "CORE::" */
8929 orig_keyword = key;
8930
8931 /* Known to be a reserved word at this point */
8932 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8933}
8934
8935struct Perl_custom_infix_result {
8936 struct Perl_custom_infix *def;
8937 SV *parsedata;
8938};
8939
8940static enum yytokentype tokentype_for_plugop(struct Perl_custom_infix *def)
8941{
8942 enum Perl_custom_infix_precedence prec = def->prec;
8943 if(prec <= INFIX_PREC_LOW)
8944 return PLUGIN_LOW_OP;
8945 if(prec <= INFIX_PREC_LOGICAL_OR_LOW)
8946 return PLUGIN_LOGICAL_OR_LOW_OP;
8947 if(prec <= INFIX_PREC_LOGICAL_AND_LOW)
8948 return PLUGIN_LOGICAL_AND_LOW_OP;
8949 if(prec <= INFIX_PREC_ASSIGN)
8950 return PLUGIN_ASSIGN_OP;
8951 if(prec <= INFIX_PREC_LOGICAL_OR)
8952 return PLUGIN_LOGICAL_OR_OP;
8953 if(prec <= INFIX_PREC_LOGICAL_AND)
8954 return PLUGIN_LOGICAL_AND_OP;
8955 if(prec <= INFIX_PREC_REL)
8956 return PLUGIN_REL_OP;
8957 if(prec <= INFIX_PREC_ADD)
8958 return PLUGIN_ADD_OP;
8959 if(prec <= INFIX_PREC_MUL)
8960 return PLUGIN_MUL_OP;
8961 if(prec <= INFIX_PREC_POW)
8962 return PLUGIN_POW_OP;
8963 return PLUGIN_HIGH_OP;
8964}
8965
8966OP *
8967Perl_build_infix_plugin(pTHX_ OP *lhs, OP *rhs, void *tokendata)
8968{
8969 PERL_ARGS_ASSERT_BUILD_INFIX_PLUGIN;
8970
8971 struct Perl_custom_infix_result *result = (struct Perl_custom_infix_result *)tokendata;
8972 SAVEFREEPV(result);
8973 if(result->parsedata)
8974 SAVEFREESV(result->parsedata);
8975
8976 return (*result->def->build_op)(aTHX_
8977 &result->parsedata, lhs, rhs, result->def);
8978}
8979
8980static int
8981yyl_keylookup(pTHX_ char *s, GV *gv)
8982{
8983 STRLEN len;
8984 bool anydelim;
8985 I32 key;
8986 struct code c = no_code;
8987 I32 orig_keyword = 0;
8988 char *d;
8989
8990 c.gv = gv;
8991
8992 PL_bufptr = s;
8993 s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
8994
8995 /* Some keywords can be followed by any delimiter, including ':' */
8996 anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
8997
8998 /* x::* is just a word, unless x is "CORE" */
8999 if (!anydelim && *s == ':' && s[1] == ':') {
9000 if (memEQs(PL_tokenbuf, len, "CORE"))
9001 return yyl_key_core(aTHX_ s, len, c);
9002 return yyl_just_a_word(aTHX_ s, len, 0, c);
9003 }
9004
9005 d = s;
9006 while (d < PL_bufend && isSPACE(*d))
9007 d++; /* no comments skipped here, or s### is misparsed */
9008
9009 /* Is this a word before a => operator? */
9010 if (*d == '=' && d[1] == '>') {
9011 return yyl_fatcomma(aTHX_ s, len);
9012 }
9013
9014 /* Check for plugged-in keyword */
9015 {
9016 OP *o;
9017 int result;
9018 char *saved_bufptr = PL_bufptr;
9019 PL_bufptr = s;
9020 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
9021 s = PL_bufptr;
9022 if (result == KEYWORD_PLUGIN_DECLINE) {
9023 /* not a plugged-in keyword */
9024 PL_bufptr = saved_bufptr;
9025 } else if (result == KEYWORD_PLUGIN_STMT) {
9026 pl_yylval.opval = o;
9027 CLINE;
9028 if (!PL_nexttoke) PL_expect = XSTATE;
9029 return REPORT(PLUGSTMT);
9030 } else if (result == KEYWORD_PLUGIN_EXPR) {
9031 pl_yylval.opval = o;
9032 CLINE;
9033 if (!PL_nexttoke) PL_expect = XOPERATOR;
9034 return REPORT(PLUGEXPR);
9035 } else {
9036 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf);
9037 }
9038 }
9039
9040 /* Check for plugged-in named operator */
9041 if(PLUGINFIX_IS_ENABLED) {
9042 struct Perl_custom_infix *def;
9043 STRLEN result;
9044 result = PL_infix_plugin(aTHX_ PL_tokenbuf, len, &def);
9045 if(result) {
9046 if(result != len)
9047 Perl_croak(aTHX_ "Bad infix plugin result (%zd) - did not consume entire identifier <%s>\n",
9048 result, PL_tokenbuf);
9049 PL_bufptr = s = d;
9050 struct Perl_custom_infix_result *result;
9051 Newx(result, 1, struct Perl_custom_infix_result);
9052 result->def = def;
9053 result->parsedata = NULL;
9054 if(def->parse) {
9055 (*def->parse)(aTHX_ &result->parsedata, def);
9056 s = PL_bufptr; /* restore local s variable */
9057 }
9058 pl_yylval.pval = result;
9059 CLINE;
9060 OPERATOR(tokentype_for_plugop(def));
9061 }
9062 }
9063
9064 /* Is this a label? */
9065 if (!anydelim && PL_expect == XSTATE
9066 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
9067 s = d + 1;
9068 pl_yylval.opval =
9069 newSVOP(OP_CONST, 0,
9070 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
9071 CLINE;
9072 TOKEN(LABEL);
9073 }
9074
9075 /* Check for lexical sub */
9076 if (PL_expect != XOPERATOR) {
9077 char tmpbuf[sizeof PL_tokenbuf + 1];
9078 *tmpbuf = '&';
9079 Copy(PL_tokenbuf, tmpbuf+1, len, char);
9080 c.off = pad_findmy_pvn(tmpbuf, len+1, 0);
9081 if (c.off != NOT_IN_PAD) {
9082 assert(c.off); /* we assume this is boolean-true below */
9083 if (PAD_COMPNAME_FLAGS_isOUR(c.off)) {
9084 HV * const stash = PAD_COMPNAME_OURSTASH(c.off);
9085 HEK * const stashname = HvNAME_HEK(stash);
9086 c.sv = newSVhek(stashname);
9087 sv_catpvs(c.sv, "::");
9088 sv_catpvn_flags(c.sv, PL_tokenbuf, len,
9089 (UTF ? SV_CATUTF8 : SV_CATBYTES));
9090 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv),
9091 SVt_PVCV);
9092 c.off = 0;
9093 if (!c.gv) {
9094 ASSUME(c.sv && SvREFCNT(c.sv) == 1);
9095 SvREFCNT_dec(c.sv);
9096 c.sv = NULL;
9097 return yyl_just_a_word(aTHX_ s, len, 0, c);
9098 }
9099 }
9100 else {
9101 c.rv2cv_op = newOP(OP_PADANY, 0);
9102 c.rv2cv_op->op_targ = c.off;
9103 c.cv = find_lexical_cv(c.off);
9104 }
9105 c.lex = TRUE;
9106 return yyl_just_a_word(aTHX_ s, len, 0, c);
9107 }
9108 c.off = 0;
9109 }
9110
9111 /* Check for built-in keyword */
9112 key = keyword(PL_tokenbuf, len, 0);
9113
9114 if (key < 0)
9115 key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp);
9116
9117 if (key && key != KEY___DATA__ && key != KEY___END__
9118 && (!anydelim || *s != '#')) {
9119 /* no override, and not s### either; skipspace is safe here
9120 * check for => on following line */
9121 bool arrow;
9122 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
9123 STRLEN soff = s - SvPVX(PL_linestr);
9124 s = peekspace(s);
9125 arrow = *s == '=' && s[1] == '>';
9126 PL_bufptr = SvPVX(PL_linestr) + bufoff;
9127 s = SvPVX(PL_linestr) + soff;
9128 if (arrow)
9129 return yyl_fatcomma(aTHX_ s, len);
9130 }
9131
9132 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
9133}
9134
9135static int
9136yyl_try(pTHX_ char *s)
9137{
9138 char *d;
9139 GV *gv = NULL;
9140 int tok;
9141
9142 retry:
9143 /* Check for plugged-in symbolic operator */
9144 if(PLUGINFIX_IS_ENABLED && isPLUGINFIX_FIRST(*s)) {
9145 struct Perl_custom_infix *def;
9146 char *s_end = s, *d = PL_tokenbuf;
9147 STRLEN len;
9148
9149 /* Copy the longest sequence of isPLUGINFIX() chars into PL_tokenbuf */
9150 while(s_end < PL_bufend && d < PL_tokenbuf+sizeof(PL_tokenbuf)-1 && isPLUGINFIX(*s_end))
9151 *d++ = *s_end++;
9152 *d = '\0';
9153
9154 if((len = (*PL_infix_plugin)(aTHX_ PL_tokenbuf, s_end - s, &def))) {
9155 s += len;
9156 struct Perl_custom_infix_result *result;
9157 Newx(result, 1, struct Perl_custom_infix_result);
9158 result->def = def;
9159 result->parsedata = NULL;
9160 if(def->parse) {
9161 PL_bufptr = s;
9162 (*def->parse)(aTHX_ &result->parsedata, def);
9163 s = PL_bufptr; /* restore local s variable */
9164 }
9165 pl_yylval.pval = result;
9166 CLINE;
9167 OPERATOR(tokentype_for_plugop(def));
9168 }
9169 }
9170
9171 switch (*s) {
9172 default:
9173 if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
9174 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9175 return tok;
9176 goto retry_bufptr;
9177 }
9178 yyl_croak_unrecognised(aTHX_ s);
9179
9180 case 4:
9181 case 26:
9182 /* emulate EOF on ^D or ^Z */
9183 if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY)
9184 return tok;
9185 retry_bufptr:
9186 s = PL_bufptr;
9187 goto retry;
9188
9189 case 0:
9190 if ((!PL_rsfp || PL_lex_inwhat)
9191 && (!PL_parser->filtered || s+1 < PL_bufend)) {
9192 PL_last_uni = 0;
9193 PL_last_lop = 0;
9194 if (PL_lex_brackets
9195 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
9196 {
9197 yyerror((const char *)
9198 (PL_lex_formbrack
9199 ? "Format not terminated"
9200 : "Missing right curly or square bracket"));
9201 }
9202 DEBUG_T({
9203 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
9204 });
9205 TOKEN(0);
9206 }
9207 if (s++ < PL_bufend)
9208 goto retry; /* ignore stray nulls */
9209 PL_last_uni = 0;
9210 PL_last_lop = 0;
9211 if (!PL_in_eval && !PL_preambled) {
9212 PL_preambled = TRUE;
9213 if (PL_perldb) {
9214 /* Generate a string of Perl code to load the debugger.
9215 * If PERL5DB is set, it will return the contents of that,
9216 * otherwise a compile-time require of perl5db.pl. */
9217
9218 const char * const pdb = PerlEnv_getenv("PERL5DB");
9219
9220 if (pdb) {
9221 sv_setpv(PL_linestr, pdb);
9222 sv_catpvs(PL_linestr,";");
9223 } else {
9224 SETERRNO(0,SS_NORMAL);
9225 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
9226 }
9227 PL_parser->preambling = CopLINE(PL_curcop);
9228 } else
9229 SvPVCLEAR(PL_linestr);
9230 if (PL_preambleav) {
9231 SV **svp = AvARRAY(PL_preambleav);
9232 SV **const end = svp + AvFILLp(PL_preambleav);
9233 while(svp <= end) {
9234 sv_catsv(PL_linestr, *svp);
9235 ++svp;
9236 sv_catpvs(PL_linestr, ";");
9237 }
9238 SvREFCNT_dec(MUTABLE_SV(PL_preambleav));
9239 PL_preambleav = NULL;
9240 }
9241 if (PL_minus_E)
9242 sv_catpvs(PL_linestr,
9243 "use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';");
9244 if (PL_minus_n || PL_minus_p) {
9245 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
9246 if (PL_minus_l)
9247 sv_catpvs(PL_linestr,"chomp;");
9248 if (PL_minus_a) {
9249 if (PL_minus_F) {
9250 if ( ( *PL_splitstr == '/'
9251 || *PL_splitstr == '\''
9252 || *PL_splitstr == '"')
9253 && strchr(PL_splitstr + 1, *PL_splitstr))
9254 {
9255 /* strchr is ok, because -F pattern can't contain
9256 * embedded NULs */
9257 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
9258 }
9259 else {
9260 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
9261 bytes can be used as quoting characters. :-) */
9262 const char *splits = PL_splitstr;
9263 sv_catpvs(PL_linestr, "our @F=split(q\0");
9264 do {
9265 /* Need to \ \s */
9266 if (*splits == '\\')
9267 sv_catpvn(PL_linestr, splits, 1);
9268 sv_catpvn(PL_linestr, splits, 1);
9269 } while (*splits++);
9270 /* This loop will embed the trailing NUL of
9271 PL_linestr as the last thing it does before
9272 terminating. */
9273 sv_catpvs(PL_linestr, ");");
9274 }
9275 }
9276 else
9277 sv_catpvs(PL_linestr,"our @F=split(' ');");
9278 }
9279 }
9280 sv_catpvs(PL_linestr, "\n");
9281 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
9282 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9283 PL_last_lop = PL_last_uni = NULL;
9284 if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
9285 update_debugger_info(PL_linestr, NULL, 0);
9286 goto retry;
9287 }
9288 if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY)
9289 return tok;
9290 goto retry_bufptr;
9291
9292 case '\r':
9293#ifdef PERL_STRICT_CR
9294 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
9295 Perl_croak(aTHX_
9296 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
9297#endif
9298 case ' ': case '\t': case '\f': case '\v':
9299 s++;
9300 goto retry;
9301
9302 case '#':
9303 case '\n': {
9304 const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s);
9305 if (needs_semicolon)
9306 TOKEN(PERLY_SEMICOLON);
9307 else
9308 goto retry;
9309 }
9310
9311 case '-':
9312 return yyl_hyphen(aTHX_ s);
9313
9314 case '+':
9315 return yyl_plus(aTHX_ s);
9316
9317 case '*':
9318 return yyl_star(aTHX_ s);
9319
9320 case '%':
9321 return yyl_percent(aTHX_ s);
9322
9323 case '^':
9324 return yyl_caret(aTHX_ s);
9325
9326 case '[':
9327 return yyl_leftsquare(aTHX_ s);
9328
9329 case '~':
9330 return yyl_tilde(aTHX_ s);
9331
9332 case ',':
9333 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9334 TOKEN(0);
9335 s++;
9336 OPERATOR(PERLY_COMMA);
9337 case ':':
9338 if (s[1] == ':')
9339 return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
9340 return yyl_colon(aTHX_ s + 1);
9341
9342 case '(':
9343 return yyl_leftparen(aTHX_ s + 1);
9344
9345 case ';':
9346 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
9347 TOKEN(0);
9348 CLINE;
9349 s++;
9350 PL_expect = XSTATE;
9351 TOKEN(PERLY_SEMICOLON);
9352
9353 case ')':
9354 return yyl_rightparen(aTHX_ s);
9355
9356 case ']':
9357 return yyl_rightsquare(aTHX_ s);
9358
9359 case '{':
9360 return yyl_leftcurly(aTHX_ s + 1, 0);
9361
9362 case '}':
9363 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
9364 TOKEN(0);
9365 return yyl_rightcurly(aTHX_ s, 0);
9366
9367 case '&':
9368 return yyl_ampersand(aTHX_ s);
9369
9370 case '|':
9371 return yyl_verticalbar(aTHX_ s);
9372
9373 case '=':
9374 if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n')
9375 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "====="))
9376 {
9377 s = vcs_conflict_marker(s + 7);
9378 goto retry;
9379 }
9380
9381 s++;
9382 {
9383 const char tmp = *s++;
9384 if (tmp == '=') {
9385 if (!PL_lex_allbrackets
9386 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
9387 {
9388 s -= 2;
9389 TOKEN(0);
9390 }
9391 ChEop(OP_EQ);
9392 }
9393 if (tmp == '>') {
9394 if (!PL_lex_allbrackets
9395 && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9396 {
9397 s -= 2;
9398 TOKEN(0);
9399 }
9400 OPERATOR(PERLY_COMMA);
9401 }
9402 if (tmp == '~')
9403 PMop(OP_MATCH);
9404 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
9405 && memCHRs("+-*/%.^&|<",tmp))
9406 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9407 "Reversed %c= operator",(int)tmp);
9408 s--;
9409 if (PL_expect == XSTATE
9410 && isALPHA(tmp)
9411 && (s == PL_linestart+1 || s[-2] == '\n') )
9412 {
9413 if ( (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
9414 || PL_lex_state != LEX_NORMAL)
9415 {
9416 d = PL_bufend;
9417 while (s < d) {
9418 if (*s++ == '\n') {
9419 incline(s, PL_bufend);
9420 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
9421 {
9422 s = (char *) memchr(s,'\n', d - s);
9423 if (s)
9424 s++;
9425 else
9426 s = d;
9427 incline(s, PL_bufend);
9428 goto retry;
9429 }
9430 }
9431 }
9432 goto retry;
9433 }
9434 s = PL_bufend;
9435 PL_parser->in_pod = 1;
9436 goto retry;
9437 }
9438 }
9439 if (PL_expect == XBLOCK) {
9440 const char *t = s;
9441#ifdef PERL_STRICT_CR
9442 while (SPACE_OR_TAB(*t))
9443#else
9444 while (SPACE_OR_TAB(*t) || *t == '\r')
9445#endif
9446 t++;
9447 if (*t == '\n' || *t == '#') {
9448 ENTER_with_name("lex_format");
9449 SAVEI8(PL_parser->form_lex_state);
9450 SAVEI32(PL_lex_formbrack);
9451 PL_parser->form_lex_state = PL_lex_state;
9452 PL_lex_formbrack = PL_lex_brackets + 1;
9453 PL_parser->sub_error_count = PL_error_count;
9454 return yyl_leftcurly(aTHX_ s, 1);
9455 }
9456 }
9457 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
9458 s--;
9459 TOKEN(0);
9460 }
9461 pl_yylval.ival = 0;
9462 OPERATOR(ASSIGNOP);
9463
9464 case '!':
9465 return yyl_bang(aTHX_ s + 1);
9466
9467 case '<':
9468 if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n')
9469 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<"))
9470 {
9471 s = vcs_conflict_marker(s + 7);
9472 goto retry;
9473 }
9474 return yyl_leftpointy(aTHX_ s);
9475
9476 case '>':
9477 if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n')
9478 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>"))
9479 {
9480 s = vcs_conflict_marker(s + 7);
9481 goto retry;
9482 }
9483 return yyl_rightpointy(aTHX_ s + 1);
9484
9485 case '$':
9486 return yyl_dollar(aTHX_ s);
9487
9488 case '@':
9489 return yyl_snail(aTHX_ s);
9490
9491 case '/': /* may be division, defined-or, or pattern */
9492 return yyl_slash(aTHX_ s);
9493
9494 case '?': /* conditional */
9495 s++;
9496 if (!PL_lex_allbrackets
9497 && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
9498 {
9499 s--;
9500 TOKEN(0);
9501 }
9502 PL_lex_allbrackets++;
9503 OPERATOR(PERLY_QUESTION_MARK);
9504
9505 case '.':
9506 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
9507#ifdef PERL_STRICT_CR
9508 && s[1] == '\n'
9509#else
9510 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
9511#endif
9512 && (s == PL_linestart || s[-1] == '\n') )
9513 {
9514 PL_expect = XSTATE;
9515 /* formbrack==2 means dot seen where arguments expected */
9516 return yyl_rightcurly(aTHX_ s, 2);
9517 }
9518 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
9519 s += 3;
9520 OPERATOR(YADAYADA);
9521 }
9522 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
9523 char tmp = *s++;
9524 if (*s == tmp) {
9525 if (!PL_lex_allbrackets
9526 && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
9527 {
9528 s--;
9529 TOKEN(0);
9530 }
9531 s++;
9532 if (*s == tmp) {
9533 s++;
9534 pl_yylval.ival = OPf_SPECIAL;
9535 }
9536 else
9537 pl_yylval.ival = 0;
9538 OPERATOR(DOTDOT);
9539 }
9540 if (*s == '=' && !PL_lex_allbrackets
9541 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9542 {
9543 s--;
9544 TOKEN(0);
9545 }
9546 Aop(OP_CONCAT);
9547 }
9548 /* FALLTHROUGH */
9549 case '0': case '1': case '2': case '3': case '4':
9550 case '5': case '6': case '7': case '8': case '9':
9551 s = scan_num(s, &pl_yylval);
9552 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
9553 if (PL_expect == XOPERATOR)
9554 no_op("Number",s);
9555 TERM(THING);
9556
9557 case '\'':
9558 return yyl_sglquote(aTHX_ s);
9559
9560 case '"':
9561 return yyl_dblquote(aTHX_ s);
9562
9563 case '`':
9564 return yyl_backtick(aTHX_ s);
9565
9566 case '\\':
9567 return yyl_backslash(aTHX_ s + 1);
9568
9569 case 'v':
9570 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
9571 char *start = s + 2;
9572 while (isDIGIT(*start) || *start == '_')
9573 start++;
9574 if (*start == '.' && isDIGIT(start[1])) {
9575 s = scan_num(s, &pl_yylval);
9576 TERM(THING);
9577 }
9578 else if ((*start == ':' && start[1] == ':')
9579 || (PL_expect == XSTATE && *start == ':')) {
9580 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9581 return tok;
9582 goto retry_bufptr;
9583 }
9584 else if (PL_expect == XSTATE) {
9585 d = start;
9586 while (d < PL_bufend && isSPACE(*d)) d++;
9587 if (*d == ':') {
9588 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9589 return tok;
9590 goto retry_bufptr;
9591 }
9592 }
9593 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
9594 if (!isALPHA(*start) && (PL_expect == XTERM
9595 || PL_expect == XREF || PL_expect == XSTATE
9596 || PL_expect == XTERMORDORDOR)) {
9597 GV *const gv = gv_fetchpvn_flags(s, start - s,
9598 UTF ? SVf_UTF8 : 0, SVt_PVCV);
9599 if (!gv) {
9600 s = scan_num(s, &pl_yylval);
9601 TERM(THING);
9602 }
9603 }
9604 }
9605 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9606 return tok;
9607 goto retry_bufptr;
9608
9609 case 'x':
9610 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
9611 s++;
9612 Mop(OP_REPEAT);
9613 }
9614 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9615 return tok;
9616 goto retry_bufptr;
9617
9618 case '_':
9619 case 'a': case 'A':
9620 case 'b': case 'B':
9621 case 'c': case 'C':
9622 case 'd': case 'D':
9623 case 'e': case 'E':
9624 case 'f': case 'F':
9625 case 'g': case 'G':
9626 case 'h': case 'H':
9627 case 'i': case 'I':
9628 case 'j': case 'J':
9629 case 'k': case 'K':
9630 case 'l': case 'L':
9631 case 'm': case 'M':
9632 case 'n': case 'N':
9633 case 'o': case 'O':
9634 case 'p': case 'P':
9635 case 'q': case 'Q':
9636 case 'r': case 'R':
9637 case 's': case 'S':
9638 case 't': case 'T':
9639 case 'u': case 'U':
9640 case 'V':
9641 case 'w': case 'W':
9642 case 'X':
9643 case 'y': case 'Y':
9644 case 'z': case 'Z':
9645 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9646 return tok;
9647 goto retry_bufptr;
9648 }
9649}
9650
9651
9652/*
9653 yylex
9654
9655 Works out what to call the token just pulled out of the input
9656 stream. The yacc parser takes care of taking the ops we return and
9657 stitching them into a tree.
9658
9659 Returns:
9660 The type of the next token
9661
9662 Structure:
9663 Check if we have already built the token; if so, use it.
9664 Switch based on the current state:
9665 - if we have a case modifier in a string, deal with that
9666 - handle other cases of interpolation inside a string
9667 - scan the next line if we are inside a format
9668 In the normal state, switch on the next character:
9669 - default:
9670 if alphabetic, go to key lookup
9671 unrecognized character - croak
9672 - 0/4/26: handle end-of-line or EOF
9673 - cases for whitespace
9674 - \n and #: handle comments and line numbers
9675 - various operators, brackets and sigils
9676 - numbers
9677 - quotes
9678 - 'v': vstrings (or go to key lookup)
9679 - 'x' repetition operator (or go to key lookup)
9680 - other ASCII alphanumerics (key lookup begins here):
9681 word before => ?
9682 keyword plugin
9683 scan built-in keyword (but do nothing with it yet)
9684 check for statement label
9685 check for lexical subs
9686 return yyl_just_a_word if there is one
9687 see whether built-in keyword is overridden
9688 switch on keyword number:
9689 - default: return yyl_just_a_word:
9690 not a built-in keyword; handle bareword lookup
9691 disambiguate between method and sub call
9692 fall back to bareword
9693 - cases for built-in keywords
9694*/
9695
9696int
9697Perl_yylex(pTHX)
9698{
9699 char *s = PL_bufptr;
9700
9701 if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
9702 const U8* first_bad_char_loc;
9703 if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
9704 PL_bufend - PL_bufptr,
9705 &first_bad_char_loc)))
9706 {
9707 _force_out_malformed_utf8_message(first_bad_char_loc,
9708 (U8 *) PL_bufend,
9709 0,
9710 1 /* 1 means die */ );
9711 NOT_REACHED; /* NOTREACHED */
9712 }
9713 PL_parser->recheck_utf8_validity = FALSE;
9714 }
9715 DEBUG_T( {
9716 SV* tmp = newSVpvs("");
9717 PerlIO_printf(Perl_debug_log, "### %" LINE_Tf ":LEX_%s/X%s %s\n",
9718 CopLINE(PL_curcop),
9719 lex_state_names[PL_lex_state],
9720 exp_name[PL_expect],
9721 pv_display(tmp, s, strlen(s), 0, 60));
9722 SvREFCNT_dec(tmp);
9723 } );
9724
9725 /* when we've already built the next token, just pull it out of the queue */
9726 if (PL_nexttoke) {
9727 PL_nexttoke--;
9728 pl_yylval = PL_nextval[PL_nexttoke];
9729 {
9730 I32 next_type;
9731 next_type = PL_nexttype[PL_nexttoke];
9732 if (next_type & (7<<24)) {
9733 if (next_type & (1<<24)) {
9734 if (PL_lex_brackets > 100)
9735 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
9736 PL_lex_brackstack[PL_lex_brackets++] =
9737 (char) ((U8) (next_type >> 16));
9738 }
9739 if (next_type & (2<<24))
9740 PL_lex_allbrackets++;
9741 if (next_type & (4<<24))
9742 PL_lex_allbrackets--;
9743 next_type &= 0xffff;
9744 }
9745 return REPORT(next_type == 'p' ? pending_ident() : next_type);
9746 }
9747 }
9748
9749 switch (PL_lex_state) {
9750 case LEX_NORMAL:
9751 case LEX_INTERPNORMAL:
9752 break;
9753
9754 /* interpolated case modifiers like \L \U, including \Q and \E.
9755 when we get here, PL_bufptr is at the \
9756 */
9757 case LEX_INTERPCASEMOD:
9758 /* handle \E or end of string */
9759 return yyl_interpcasemod(aTHX_ s);
9760
9761 case LEX_INTERPPUSH:
9762 return REPORT(sublex_push());
9763
9764 case LEX_INTERPSTART:
9765 if (PL_bufptr == PL_bufend)
9766 return REPORT(sublex_done());
9767 DEBUG_T({
9768 if(*PL_bufptr != '(')
9769 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
9770 });
9771 PL_expect = XTERM;
9772 /* for /@a/, we leave the joining for the regex engine to do
9773 * (unless we're within \Q etc) */
9774 PL_lex_dojoin = (*PL_bufptr == '@'
9775 && (!PL_lex_inpat || PL_lex_casemods));
9776 PL_lex_state = LEX_INTERPNORMAL;
9777 if (PL_lex_dojoin) {
9778 NEXTVAL_NEXTTOKE.ival = 0;
9779 force_next(PERLY_COMMA);
9780 force_ident("\"", PERLY_DOLLAR);
9781 NEXTVAL_NEXTTOKE.ival = 0;
9782 force_next(PERLY_DOLLAR);
9783 NEXTVAL_NEXTTOKE.ival = 0;
9784 force_next((2<<24)|PERLY_PAREN_OPEN);
9785 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
9786 force_next(FUNC);
9787 }
9788 /* Convert (?{...}) or (*{...}) and friends to 'do {...}' */
9789 if (PL_lex_inpat && *PL_bufptr == '(') {
9790 PL_parser->lex_shared->re_eval_start = PL_bufptr;
9791 PL_bufptr += 2;
9792 if (*PL_bufptr != '{')
9793 PL_bufptr++;
9794 PL_expect = XTERMBLOCK;
9795 force_next(KW_DO);
9796 }
9797
9798 if (PL_lex_starts++) {
9799 s = PL_bufptr;
9800 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9801 if (!PL_lex_casemods && PL_lex_inpat)
9802 TOKEN(PERLY_COMMA);
9803 else
9804 AopNOASSIGN(OP_CONCAT);
9805 }
9806 return yylex();
9807
9808 case LEX_INTERPENDMAYBE:
9809 if (intuit_more(PL_bufptr, PL_bufend)) {
9810 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
9811 break;
9812 }
9813 /* FALLTHROUGH */
9814
9815 case LEX_INTERPEND:
9816 if (PL_lex_dojoin) {
9817 const U8 dojoin_was = PL_lex_dojoin;
9818 PL_lex_dojoin = FALSE;
9819 PL_lex_state = LEX_INTERPCONCAT;
9820 PL_lex_allbrackets--;
9821 return REPORT(dojoin_was == 1 ? (int)PERLY_PAREN_CLOSE : (int)POSTJOIN);
9822 }
9823 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
9824 && SvEVALED(PL_lex_repl))
9825 {
9826 if (PL_bufptr != PL_bufend)
9827 Perl_croak(aTHX_ "Bad evalled substitution pattern");
9828 PL_lex_repl = NULL;
9829 }
9830 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
9831 re_eval_str. If the here-doc body's length equals the previous
9832 value of re_eval_start, re_eval_start will now be null. So
9833 check re_eval_str as well. */
9834 if (PL_parser->lex_shared->re_eval_start
9835 || PL_parser->lex_shared->re_eval_str) {
9836 SV *sv;
9837 if (*PL_bufptr != ')')
9838 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
9839 PL_bufptr++;
9840 /* having compiled a (?{..}) expression, return the original
9841 * text too, as a const */
9842 if (PL_parser->lex_shared->re_eval_str) {
9843 sv = PL_parser->lex_shared->re_eval_str;
9844 PL_parser->lex_shared->re_eval_str = NULL;
9845 SvCUR_set(sv,
9846 PL_bufptr - PL_parser->lex_shared->re_eval_start);
9847 SvPV_shrink_to_cur(sv);
9848 }
9849 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
9850 PL_bufptr - PL_parser->lex_shared->re_eval_start);
9851 NEXTVAL_NEXTTOKE.opval =
9852 newSVOP(OP_CONST, 0,
9853 sv);
9854 force_next(THING);
9855 PL_parser->lex_shared->re_eval_start = NULL;
9856 PL_expect = XTERM;
9857 return REPORT(PERLY_COMMA);
9858 }
9859
9860 /* FALLTHROUGH */
9861 case LEX_INTERPCONCAT:
9862#ifdef DEBUGGING
9863 if (PL_lex_brackets)
9864 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
9865 (long) PL_lex_brackets);
9866#endif
9867 if (PL_bufptr == PL_bufend)
9868 return REPORT(sublex_done());
9869
9870 /* m'foo' still needs to be parsed for possible (?{...}) */
9871 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
9872 SV *sv = newSVsv(PL_linestr);
9873 sv = tokeq(sv);
9874 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
9875 s = PL_bufend;
9876 }
9877 else {
9878 int save_error_count = PL_error_count;
9879
9880 s = scan_const(PL_bufptr);
9881
9882 /* Set flag if this was a pattern and there were errors. op.c will
9883 * refuse to compile a pattern with this flag set. Otherwise, we
9884 * could get segfaults, etc. */
9885 if (PL_lex_inpat && PL_error_count > save_error_count) {
9886 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
9887 }
9888 if (*s == '\\')
9889 PL_lex_state = LEX_INTERPCASEMOD;
9890 else
9891 PL_lex_state = LEX_INTERPSTART;
9892 }
9893
9894 if (s != PL_bufptr) {
9895 NEXTVAL_NEXTTOKE = pl_yylval;
9896 PL_expect = XTERM;
9897 force_next(THING);
9898 if (PL_lex_starts++) {
9899 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9900 if (!PL_lex_casemods && PL_lex_inpat)
9901 TOKEN(PERLY_COMMA);
9902 else
9903 AopNOASSIGN(OP_CONCAT);
9904 }
9905 else {
9906 PL_bufptr = s;
9907 return yylex();
9908 }
9909 }
9910
9911 return yylex();
9912 case LEX_FORMLINE:
9913 if (PL_parser->sub_error_count != PL_error_count) {
9914 /* There was an error parsing a formline, which tends to
9915 mess up the parser.
9916 Unlike interpolated sub-parsing, we can't treat any of
9917 these as recoverable, so no need to check sub_no_recover.
9918 */
9919 yyquit();
9920 }
9921 assert(PL_lex_formbrack);
9922 s = scan_formline(PL_bufptr);
9923 if (!PL_lex_formbrack)
9924 return yyl_rightcurly(aTHX_ s, 1);
9925 PL_bufptr = s;
9926 return yylex();
9927 }
9928
9929 /* We really do *not* want PL_linestr ever becoming a COW. */
9930 assert (!SvIsCOW(PL_linestr));
9931 s = PL_bufptr;
9932 PL_oldoldbufptr = PL_oldbufptr;
9933 PL_oldbufptr = s;
9934
9935 if (PL_in_my == KEY_sigvar) {
9936 PL_parser->saw_infix_sigil = 0;
9937 return yyl_sigvar(aTHX_ s);
9938 }
9939
9940 {
9941 /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil.
9942 On its return, we then need to set it to indicate whether the token
9943 we just encountered was an infix operator that (if we hadn't been
9944 expecting an operator) have been a sigil.
9945 */
9946 bool expected_operator = (PL_expect == XOPERATOR);
9947 int ret = yyl_try(aTHX_ s);
9948 switch (pl_yylval.ival) {
9949 case OP_BIT_AND:
9950 case OP_MODULO:
9951 case OP_MULTIPLY:
9952 case OP_NBIT_AND:
9953 if (expected_operator) {
9954 PL_parser->saw_infix_sigil = 1;
9955 break;
9956 }
9957 /* FALLTHROUGH */
9958 default:
9959 PL_parser->saw_infix_sigil = 0;
9960 }
9961 return ret;
9962 }
9963}
9964
9965
9966/*
9967 S_pending_ident
9968
9969 Looks up an identifier in the pad or in a package
9970
9971 PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
9972 rather than a plain pad var.
9973
9974 Returns:
9975 PRIVATEREF if this is a lexical name.
9976 BAREWORD if this belongs to a package.
9977
9978 Structure:
9979 if we're in a my declaration
9980 croak if they tried to say my($foo::bar)
9981 build the ops for a my() declaration
9982 if it's an access to a my() variable
9983 build ops for access to a my() variable
9984 if in a dq string, and they've said @foo and we can't find @foo
9985 warn
9986 build ops for a bareword
9987*/
9988
9989static int
9990S_pending_ident(pTHX)
9991{
9992 PADOFFSET tmp = 0;
9993 const char pit = (char)pl_yylval.ival;
9994 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9995 /* All routes through this function want to know if there is a colon. */
9996 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9997
9998 DEBUG_T({ PerlIO_printf(Perl_debug_log,
9999 "### Pending identifier '%s'\n", PL_tokenbuf); });
10000 assert(tokenbuf_len >= 2);
10001
10002 /* if we're in a my(), we can't allow dynamics here.
10003 $foo'bar has already been turned into $foo::bar, so
10004 just check for colons.
10005
10006 if it's a legal name, the OP is a PADANY.
10007 */
10008 if (PL_in_my) {
10009 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
10010 if (has_colon)
10011 /* diag_listed_as: No package name allowed for variable %s
10012 in "our" */
10013 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
10014 "%s %s in \"our\"",
10015 *PL_tokenbuf=='&' ? "subroutine" : "variable",
10016 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
10017 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
10018 }
10019 else {
10020 OP *o;
10021 if (has_colon) {
10022 /* "my" variable %s can't be in a package */
10023 /* PL_no_myglob is constant */
10024 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
10025 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
10026 PL_in_my == KEY_my ? "my" :
10027 PL_in_my == KEY_field ? "field" : "state",
10028 *PL_tokenbuf == '&' ? "subroutine" : "variable",
10029 PL_tokenbuf),
10030 UTF ? SVf_UTF8 : 0);
10031 GCC_DIAG_RESTORE_STMT;
10032 }
10033
10034 if (PL_in_my == KEY_sigvar) {
10035 /* A signature 'padop' needs in addition, an op_first to
10036 * point to a child sigdefelem, and an extra field to hold
10037 * the signature index. We can achieve both by using an
10038 * UNOP_AUX and (ab)using the op_aux field to hold the
10039 * index. If we ever need more fields, use a real malloced
10040 * aux strut instead.
10041 */
10042 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
10043 INT2PTR(UNOP_AUX_item *,
10044 (PL_parser->sig_elems)));
10045 o->op_private |= ( PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
10046 : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
10047 : OPpARGELEM_HV);
10048 }
10049 else
10050 o = newOP(OP_PADANY, 0);
10051 o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
10052 UTF ? SVf_UTF8 : 0);
10053 if (PL_in_my == KEY_sigvar)
10054 PL_in_my = 0;
10055
10056 pl_yylval.opval = o;
10057 return PRIVATEREF;
10058 }
10059 }
10060
10061 /*
10062 build the ops for accesses to a my() variable.
10063 */
10064
10065 if (!has_colon) {
10066 if (!PL_in_my)
10067 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
10068 0);
10069 if (tmp != NOT_IN_PAD) {
10070 /* might be an "our" variable" */
10071 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10072 /* build ops for a bareword */
10073 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10074 HEK * const stashname = HvNAME_HEK(stash);
10075 SV * const sym = newSVhek(stashname);
10076 sv_catpvs(sym, "::");
10077 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
10078 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
10079 pl_yylval.opval->op_private = OPpCONST_ENTERED;
10080 if (pit != '&')
10081 gv_fetchsv(sym,
10082 GV_ADDMULTI,
10083 ((PL_tokenbuf[0] == '$') ? SVt_PV
10084 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
10085 : SVt_PVHV));
10086 return BAREWORD;
10087 }
10088
10089 pl_yylval.opval = newOP(OP_PADANY, 0);
10090 pl_yylval.opval->op_targ = tmp;
10091 return PRIVATEREF;
10092 }
10093 }
10094
10095 /*
10096 Whine if they've said @foo or @foo{key} in a doublequoted string,
10097 and @foo (or %foo) isn't a variable we can find in the symbol
10098 table.
10099 */
10100 if (ckWARN(WARN_AMBIGUOUS)
10101 && pit == '@'
10102 && PL_lex_state != LEX_NORMAL
10103 && !PL_lex_brackets)
10104 {
10105 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
10106 ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
10107 SVt_PVAV);
10108 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
10109 )
10110 {
10111 /* Downgraded from fatal to warning 20000522 mjd */
10112 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10113 "Possible unintended interpolation of %" UTF8f
10114 " in string",
10115 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
10116 }
10117 }
10118
10119 /* build ops for a bareword */
10120 pl_yylval.opval = newSVOP(OP_CONST, 0,
10121 newSVpvn_flags(PL_tokenbuf + 1,
10122 tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
10123 UTF ? SVf_UTF8 : 0 ));
10124 pl_yylval.opval->op_private = OPpCONST_ENTERED;
10125 if (pit != '&')
10126 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
10127 (PL_in_eval ? GV_ADDMULTI : GV_ADD)
10128 | ( UTF ? SVf_UTF8 : 0 ),
10129 ((PL_tokenbuf[0] == '$') ? SVt_PV
10130 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
10131 : SVt_PVHV));
10132 return BAREWORD;
10133}
10134
10135STATIC void
10136S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10137{
10138 PERL_ARGS_ASSERT_CHECKCOMMA;
10139
10140 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
10141 if (ckWARN(WARN_SYNTAX)) {
10142 int level = 1;
10143 const char *w;
10144 for (w = s+2; *w && level; w++) {
10145 if (*w == '(')
10146 ++level;
10147 else if (*w == ')')
10148 --level;
10149 }
10150 while (isSPACE(*w))
10151 ++w;
10152 /* the list of chars below is for end of statements or
10153 * block / parens, boolean operators (&&, ||, //) and branch
10154 * constructs (or, and, if, until, unless, while, err, for).
10155 * Not a very solid hack... */
10156 if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
10157 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10158 "%s (...) interpreted as function",name);
10159 }
10160 }
10161 while (s < PL_bufend && isSPACE(*s))
10162 s++;
10163 if (*s == '(')
10164 s++;
10165 while (s < PL_bufend && isSPACE(*s))
10166 s++;
10167 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
10168 const char * const w = s;
10169 s += UTF ? UTF8SKIP(s) : 1;
10170 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10171 s += UTF ? UTF8SKIP(s) : 1;
10172 while (s < PL_bufend && isSPACE(*s))
10173 s++;
10174 if (*s == ',') {
10175 GV* gv;
10176 if (keyword(w, s - w, 0))
10177 return;
10178
10179 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
10180 if (gv && GvCVu(gv))
10181 return;
10182 if (s - w <= 254) {
10183 PADOFFSET off;
10184 char tmpbuf[256];
10185 Copy(w, tmpbuf+1, s - w, char);
10186 *tmpbuf = '&';
10187 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
10188 if (off != NOT_IN_PAD) return;
10189 }
10190 Perl_croak(aTHX_ "No comma allowed after %s", what);
10191 }
10192 }
10193}
10194
10195/* S_new_constant(): do any overload::constant lookup.
10196
10197 Either returns sv, or mortalizes/frees sv and returns a new SV*.
10198 Best used as sv=new_constant(..., sv, ...).
10199 If s, pv are NULL, calls subroutine with one argument,
10200 and <type> is used with error messages only.
10201 <type> is assumed to be well formed UTF-8.
10202
10203 If error_msg is not NULL, *error_msg will be set to any error encountered.
10204 Otherwise yyerror() will be used to output it */
10205
10206STATIC SV *
10207S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
10208 SV *sv, SV *pv, const char *type, STRLEN typelen,
10209 const char ** error_msg)
10210{
10211 dSP;
10212 HV * table = GvHV(PL_hintgv); /* ^H */
10213 SV *res;
10214 SV *errsv = NULL;
10215 SV **cvp;
10216 SV *cv, *typesv;
10217 const char *why1 = "", *why2 = "", *why3 = "";
10218 const char * optional_colon = ":"; /* Only some messages have a colon */
10219 char *msg;
10220
10221 PERL_ARGS_ASSERT_NEW_CONSTANT;
10222 /* We assume that this is true: */
10223 assert(type || s);
10224
10225 sv_2mortal(sv); /* Parent created it permanently */
10226
10227 if ( ! table
10228 || ! (PL_hints & HINT_LOCALIZE_HH))
10229 {
10230 why1 = "unknown";
10231 optional_colon = "";
10232 goto report;
10233 }
10234
10235 cvp = hv_fetch(table, key, keylen, FALSE);
10236 if (!cvp || !SvOK(*cvp)) {
10237 why1 = "$^H{";
10238 why2 = key;
10239 why3 = "} is not defined";
10240 goto report;
10241 }
10242
10243 cv = *cvp;
10244 if (!pv && s)
10245 pv = newSVpvn_flags(s, len, SVs_TEMP);
10246 if (type && pv)
10247 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
10248 else
10249 typesv = &PL_sv_undef;
10250
10251 PUSHSTACKi(PERLSI_OVERLOAD);
10252 ENTER ;
10253 SAVETMPS;
10254
10255 PUSHMARK(SP) ;
10256 EXTEND(sp, 3);
10257 if (pv)
10258 PUSHs(pv);
10259 PUSHs(sv);
10260 if (pv)
10261 PUSHs(typesv);
10262 PUTBACK;
10263 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10264
10265 SPAGAIN ;
10266
10267 /* Check the eval first */
10268 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
10269 STRLEN errlen;
10270 const char * errstr;
10271 sv_catpvs(errsv, "Propagated");
10272 errstr = SvPV_const(errsv, errlen);
10273 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
10274 (void)POPs;
10275 res = SvREFCNT_inc_simple_NN(sv);
10276 }
10277 else {
10278 res = POPs;
10279 SvREFCNT_inc_simple_void_NN(res);
10280 }
10281
10282 PUTBACK ;
10283 FREETMPS ;
10284 LEAVE ;
10285 POPSTACK;
10286
10287 if (SvOK(res)) {
10288 return res;
10289 }
10290
10291 sv = res;
10292 (void)sv_2mortal(sv);
10293
10294 why1 = "Call to &{$^H{";
10295 why2 = key;
10296 why3 = "}} did not return a defined value";
10297
10298 report:
10299
10300 msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s",
10301 (int)(type ? typelen : len),
10302 (type ? type: s),
10303 optional_colon,
10304 why1, why2, why3);
10305 if (error_msg) {
10306 *error_msg = msg;
10307 }
10308 else {
10309 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
10310 }
10311 return SvREFCNT_inc_simple_NN(sv);
10312}
10313
10314PERL_STATIC_INLINE void
10315S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
10316 bool is_utf8, bool check_dollar, bool tick_warn)
10317{
10318 int saw_tick = 0;
10319 const char *olds = *s;
10320 PERL_ARGS_ASSERT_PARSE_IDENT;
10321
10322 while (*s < PL_bufend) {
10323 if (*d >= e)
10324 Perl_croak(aTHX_ "%s", ident_too_long);
10325 if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
10326 /* The UTF-8 case must come first, otherwise things
10327 * like c\N{COMBINING TILDE} would start failing, as the
10328 * isWORDCHAR_A case below would gobble the 'c' up.
10329 */
10330
10331 char *t = *s + UTF8SKIP(*s);
10332 while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
10333 t += UTF8SKIP(t);
10334 }
10335 if (*d + (t - *s) > e)
10336 Perl_croak(aTHX_ "%s", ident_too_long);
10337 Copy(*s, *d, t - *s, char);
10338 *d += t - *s;
10339 *s = t;
10340 }
10341 else if ( isWORDCHAR_A(**s) ) {
10342 do {
10343 *(*d)++ = *(*s)++;
10344 } while (isWORDCHAR_A(**s) && *d < e);
10345 }
10346 else if ( allow_package
10347 && **s == '\''
10348 && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
10349 {
10350 *(*d)++ = ':';
10351 *(*d)++ = ':';
10352 (*s)++;
10353 saw_tick++;
10354 }
10355 else if (allow_package && **s == ':' && (*s)[1] == ':'
10356 /* Disallow things like Foo::$bar. For the curious, this is
10357 * the code path that triggers the "Bad name after" warning
10358 * when looking for barewords.
10359 */
10360 && !(check_dollar && (*s)[2] == '$')) {
10361 *(*d)++ = *(*s)++;
10362 *(*d)++ = *(*s)++;
10363 }
10364 else
10365 break;
10366 }
10367 if (UNLIKELY(saw_tick && tick_warn && ckWARN2_d(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR))) {
10368 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10369 char *this_d;
10370 char *d2;
10371 Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
10372 d2 = this_d;
10373 SAVEFREEPV(this_d);
10374
10375 Perl_warner(aTHX_ packWARN2(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR),
10376 "Old package separator used in string");
10377 if (olds[-1] == '#')
10378 *d2++ = olds[-2];
10379 *d2++ = olds[-1];
10380 while (olds < *s) {
10381 if (*olds == '\'') {
10382 *d2++ = '\\';
10383 *d2++ = *olds++;
10384 }
10385 else
10386 *d2++ = *olds++;
10387 }
10388 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10389 "\t(Did you mean \"%" UTF8f "\" instead?)\n",
10390 UTF8fARG(is_utf8, d2-this_d, this_d));
10391 }
10392 else {
10393 Perl_warner(aTHX_ packWARN2(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR),
10394 "Old package separator \"'\" deprecated");
10395 }
10396 }
10397 return;
10398}
10399
10400/* Returns a NUL terminated string, with the length of the string written to
10401 *slp
10402
10403 scan_word6() may be removed once ' in names is removed.
10404 */
10405char *
10406Perl_scan_word6(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp, bool warn_tick)
10407{
10408 char *d = dest;
10409 char * const e = d + destlen - 3; /* two-character token, ending NUL */
10410 bool is_utf8 = cBOOL(UTF);
10411
10412 PERL_ARGS_ASSERT_SCAN_WORD6;
10413
10414 parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, warn_tick);
10415 *d = '\0';
10416 *slp = d - dest;
10417 return s;
10418}
10419
10420char *
10421Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10422{
10423 PERL_ARGS_ASSERT_SCAN_WORD;
10424 return scan_word6(s, dest, destlen, allow_package, slp, FALSE);
10425}
10426
10427/* scan s and extract an identifier ($var) from it if possible
10428 * into dest.
10429 * XXX: This function has subtle implications on parsing, and
10430 * changing how it behaves can cause a variable to change from
10431 * being a run time rv2sv call or a compile time binding to a
10432 * specific variable name.
10433 */
10434STATIC char *
10435S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
10436{
10437 I32 herelines = PL_parser->herelines;
10438 SSize_t bracket = -1;
10439 char funny = *s++;
10440 char *d = dest;
10441 char * const e = d + destlen - 3; /* two-character token, ending NUL */
10442 bool is_utf8 = cBOOL(UTF);
10443 line_t orig_copline = 0, tmp_copline = 0;
10444
10445 PERL_ARGS_ASSERT_SCAN_IDENT;
10446
10447 if (isSPACE(*s) || !*s)
10448 s = skipspace(s);
10449 if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
10450 bool is_zero= *s == '0' ? TRUE : FALSE;
10451 char *digit_start= d;
10452 *d++ = *s++;
10453 while (s < PL_bufend && isDIGIT(*s)) {
10454 if (d >= e)
10455 Perl_croak(aTHX_ "%s", ident_too_long);
10456 *d++ = *s++;
10457 }
10458 if (is_zero && d - digit_start > 1)
10459 Perl_croak(aTHX_ ident_var_zero_multi_digit);
10460 }
10461 else { /* See if it is a "normal" identifier */
10462 parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
10463 }
10464 *d = '\0';
10465 d = dest;
10466 if (*d) {
10467 /* Either a digit variable, or parse_ident() found an identifier
10468 (anything valid as a bareword), so job done and return. */
10469 if (PL_lex_state != LEX_NORMAL)
10470 PL_lex_state = LEX_INTERPENDMAYBE;
10471 return s;
10472 }
10473
10474 /* Here, it is not a run-of-the-mill identifier name */
10475
10476 if (*s == '$' && s[1]
10477 && ( isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
10478 || isDIGIT_A((U8)s[1])
10479 || s[1] == '$'
10480 || s[1] == '{'
10481 || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
10482 {
10483 /* Dereferencing a value in a scalar variable.
10484 The alternatives are different syntaxes for a scalar variable.
10485 Using ' as a leading package separator isn't allowed. :: is. */
10486 return s;
10487 }
10488 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
10489 if (*s == '{') {
10490 bracket = s - SvPVX(PL_linestr);
10491 s++;
10492 orig_copline = CopLINE(PL_curcop);
10493 if (s < PL_bufend && isSPACE(*s)) {
10494 s = skipspace(s);
10495 }
10496 }
10497
10498
10499 /* Extract the first character of the variable name from 's' and
10500 * copy it, null terminated into 'd'. Note that this does not
10501 * involve checking for just IDFIRST characters, as it allows the
10502 * '^' for ${^FOO} type variable names, and it allows all the
10503 * characters that are legal in a single character variable name.
10504 *
10505 * The legal ones are any of:
10506 * a) all ASCII characters except:
10507 * 1) control and space-type ones, like NUL, SOH, \t, and SPACE;
10508 * 2) '{'
10509 * The final case currently doesn't get this far in the program, so we
10510 * don't test for it. If that were to change, it would be ok to allow it.
10511 * b) When not under Unicode rules, any upper Latin1 character
10512 * c) Otherwise, when unicode rules are used, all XIDS characters.
10513 *
10514 * Because all ASCII characters have the same representation whether
10515 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
10516 * '{' without knowing if is UTF-8 or not. */
10517
10518 if ((s <= PL_bufend - ((is_utf8)
10519 ? UTF8SKIP(s)
10520 : 1))
10521 && (
10522 isGRAPH_A(*s)
10523 ||
10524 ( is_utf8
10525 ? isIDFIRST_utf8_safe(s, PL_bufend)
10526 : (isGRAPH_L1(*s)
10527 && LIKELY((U8) *s != LATIN1_TO_NATIVE(0xAD))
10528 )
10529 )
10530 )
10531 ){
10532 if (is_utf8) {
10533 const STRLEN skip = UTF8SKIP(s);
10534 STRLEN i;
10535 d[skip] = '\0';
10536 for ( i = 0; i < skip; i++ )
10537 d[i] = *s++;
10538 }
10539 else {
10540 *d = *s++;
10541 d[1] = '\0';
10542 }
10543 }
10544
10545 /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
10546 if (isDIGIT(*d)) {
10547 bool is_zero= *d == '0' ? TRUE : FALSE;
10548 char *digit_start= d;
10549 while (s < PL_bufend && isDIGIT(*s)) {
10550 d++;
10551 if (d >= e)
10552 Perl_croak(aTHX_ "%s", ident_too_long);
10553 *d= *s++;
10554 }
10555 if (is_zero && d - digit_start >= 1) /* d points at the last digit */
10556 Perl_croak(aTHX_ ident_var_zero_multi_digit);
10557 d[1] = '\0';
10558 }
10559
10560 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
10561 else if (*d == '^' && *s && isCONTROLVAR(*s)) {
10562 *d = toCTRL(*s);
10563 s++;
10564 }
10565 /* Warn about ambiguous code after unary operators if {...} notation isn't
10566 used. There's no difference in ambiguity; it's merely a heuristic
10567 about when not to warn. */
10568 else if (ck_uni && bracket == -1)
10569 check_uni();
10570
10571 if (bracket != -1) {
10572 bool skip;
10573 char *s2;
10574 /* If we were processing {...} notation then... */
10575 if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
10576 || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
10577 && isWORDCHAR(*s))
10578 ) {
10579 /* note we have to check for a normal identifier first,
10580 * as it handles utf8 symbols, and only after that has
10581 * been ruled out can we look at the caret words */
10582 if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
10583 /* if it starts as a valid identifier, assume that it is one.
10584 (the later check for } being at the expected point will trap
10585 cases where this doesn't pan out.) */
10586 d += is_utf8 ? UTF8SKIP(d) : 1;
10587 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
10588 *d = '\0';
10589 }
10590 else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
10591 d++;
10592 while (isWORDCHAR(*s) && d < e) {
10593 *d++ = *s++;
10594 }
10595 if (d >= e)
10596 Perl_croak(aTHX_ "%s", ident_too_long);
10597 *d = '\0';
10598 }
10599 tmp_copline = CopLINE(PL_curcop);
10600 if (s < PL_bufend && isSPACE(*s)) {
10601 s = skipspace(s);
10602 }
10603 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10604 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */
10605 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10606 const char * const brack =
10607 (const char *)
10608 ((*s == '[') ? "[...]" : "{...}");
10609 orig_copline = CopLINE(PL_curcop);
10610 CopLINE_set(PL_curcop, tmp_copline);
10611 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
10612 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10613 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10614 funny, dest, brack, funny, dest, brack);
10615 CopLINE_set(PL_curcop, orig_copline);
10616 }
10617 bracket++;
10618 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10619 PL_lex_allbrackets++;
10620 return s;
10621 }
10622 }
10623
10624 if ( !tmp_copline )
10625 tmp_copline = CopLINE(PL_curcop);
10626 if ((skip = s < PL_bufend && isSPACE(*s))) {
10627 /* Avoid incrementing line numbers or resetting PL_linestart,
10628 in case we have to back up. */
10629 STRLEN s_off = s - SvPVX(PL_linestr);
10630 s2 = peekspace(s);
10631 s = SvPVX(PL_linestr) + s_off;
10632 }
10633 else
10634 s2 = s;
10635
10636 /* Expect to find a closing } after consuming any trailing whitespace.
10637 */
10638 if (*s2 == '}') {
10639 /* Now increment line numbers if applicable. */
10640 if (skip)
10641 s = skipspace(s);
10642 s++;
10643 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10644 PL_lex_state = LEX_INTERPEND;
10645 PL_expect = XREF;
10646 }
10647 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
10648 if (ckWARN(WARN_AMBIGUOUS)
10649 && (keyword(dest, d - dest, 0)
10650 || get_cvn_flags(dest, d - dest, is_utf8
10651 ? SVf_UTF8
10652 : 0)))
10653 {
10654 SV *tmp = newSVpvn_flags( dest, d - dest,
10655 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
10656 if (funny == '#')
10657 funny = '@';
10658 orig_copline = CopLINE(PL_curcop);
10659 CopLINE_set(PL_curcop, tmp_copline);
10660 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10661 "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
10662 funny, SVfARG(tmp), funny, SVfARG(tmp));
10663 CopLINE_set(PL_curcop, orig_copline);
10664 }
10665 }
10666 }
10667 else {
10668 /* Didn't find the closing } at the point we expected, so restore
10669 state such that the next thing to process is the opening { and */
10670 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
10671 CopLINE_set(PL_curcop, orig_copline);
10672 PL_parser->herelines = herelines;
10673 *dest = '\0';
10674 PL_parser->sub_no_recover = TRUE;
10675 }
10676 }
10677 else if ( PL_lex_state == LEX_INTERPNORMAL
10678 && !PL_lex_brackets
10679 && !intuit_more(s, PL_bufend))
10680 PL_lex_state = LEX_INTERPEND;
10681 return s;
10682}
10683
10684static bool
10685S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
10686
10687 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
10688 * found in the parse starting at 's', based on the subset that are valid
10689 * in this context input to this routine in 'valid_flags'. Advances s.
10690 * Returns TRUE if the input should be treated as a valid flag, so the next
10691 * char may be as well; otherwise FALSE. 'charset' should point to a NUL
10692 * upon first call on the current regex. This routine will set it to any
10693 * charset modifier found. The caller shouldn't change it. This way,
10694 * another charset modifier encountered in the parse can be detected as an
10695 * error, as we have decided to allow only one */
10696
10697 const char c = **s;
10698 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
10699
10700 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
10701 if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
10702 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
10703 UTF ? SVf_UTF8 : 0);
10704 (*s) += charlen;
10705 /* Pretend that it worked, so will continue processing before
10706 * dieing */
10707 return TRUE;
10708 }
10709 return FALSE;
10710 }
10711
10712 switch (c) {
10713
10714 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
10715 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
10716 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
10717 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
10718 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
10719 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
10720 case LOCALE_PAT_MOD:
10721 if (*charset) {
10722 goto multiple_charsets;
10723 }
10724 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
10725 *charset = c;
10726 break;
10727 case UNICODE_PAT_MOD:
10728 if (*charset) {
10729 goto multiple_charsets;
10730 }
10731 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
10732 *charset = c;
10733 break;
10734 case ASCII_RESTRICT_PAT_MOD:
10735 if (! *charset) {
10736 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
10737 }
10738 else {
10739
10740 /* Error if previous modifier wasn't an 'a', but if it was, see
10741 * if, and accept, a second occurrence (only) */
10742 if (*charset != 'a'
10743 || get_regex_charset(*pmfl)
10744 != REGEX_ASCII_RESTRICTED_CHARSET)
10745 {
10746 goto multiple_charsets;
10747 }
10748 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
10749 }
10750 *charset = c;
10751 break;
10752 case DEPENDS_PAT_MOD:
10753 if (*charset) {
10754 goto multiple_charsets;
10755 }
10756 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
10757 *charset = c;
10758 break;
10759 }
10760
10761 (*s)++;
10762 return TRUE;
10763
10764 multiple_charsets:
10765 if (*charset != c) {
10766 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
10767 }
10768 else if (c == 'a') {
10769 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
10770 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
10771 }
10772 else {
10773 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
10774 }
10775
10776 /* Pretend that it worked, so will continue processing before dieing */
10777 (*s)++;
10778 return TRUE;
10779}
10780
10781STATIC char *
10782S_scan_pat(pTHX_ char *start, I32 type)
10783{
10784 PMOP *pm;
10785 char *s;
10786 const char * const valid_flags =
10787 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10788 char charset = '\0'; /* character set modifier */
10789 unsigned int x_mod_count = 0;
10790
10791 PERL_ARGS_ASSERT_SCAN_PAT;
10792
10793 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
10794 if (!s)
10795 Perl_croak(aTHX_ "Search pattern not terminated");
10796
10797 pm = (PMOP*)newPMOP(type, 0);
10798 if (PL_multi_open == '?') {
10799 /* This is the only point in the code that sets PMf_ONCE: */
10800 pm->op_pmflags |= PMf_ONCE;
10801
10802 /* Hence it's safe to do this bit of PMOP book-keeping here, which
10803 allows us to restrict the list needed by reset to just the ??
10804 matches. */
10805 assert(type != OP_TRANS);
10806 if (PL_curstash) {
10807 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10808 U32 elements;
10809 if (!mg) {
10810 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10811 0);
10812 }
10813 elements = mg->mg_len / sizeof(PMOP**);
10814 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10815 ((PMOP**)mg->mg_ptr) [elements++] = pm;
10816 mg->mg_len = elements * sizeof(PMOP**);
10817 PmopSTASH_set(pm,PL_curstash);
10818 }
10819 }
10820
10821 /* if qr/...(?{..}).../, then need to parse the pattern within a new
10822 * anon CV. False positives like qr/[(?{]/ are harmless */
10823
10824 if (type == OP_QR) {
10825 STRLEN len;
10826 char *e, *p = SvPV(PL_lex_stuff, len);
10827 e = p + len;
10828 for (; p < e; p++) {
10829 if (p[0] == '(' && (
10830 (p[1] == '?' && (p[2] == '{' ||
10831 (p[2] == '?' && p[3] == '{'))) ||
10832 (p[1] == '*' && (p[2] == '{' ||
10833 (p[2] == '*' && p[3] == '{')))
10834 )){
10835 pm->op_pmflags |= PMf_HAS_CV;
10836 break;
10837 }
10838 }
10839 pm->op_pmflags |= PMf_IS_QR;
10840 }
10841
10842 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
10843 &s, &charset, &x_mod_count))
10844 {};
10845 /* issue a warning if /c is specified,but /g is not */
10846 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
10847 {
10848 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
10849 "Use of /c modifier is meaningless without /g" );
10850 }
10851
10852 PL_lex_op = (OP*)pm;
10853 pl_yylval.ival = OP_MATCH;
10854 return s;
10855}
10856
10857STATIC char *
10858S_scan_subst(pTHX_ char *start)
10859{
10860 char *s;
10861 PMOP *pm;
10862 I32 first_start;
10863 line_t first_line;
10864 line_t linediff = 0;
10865 I32 es = 0;
10866 char charset = '\0'; /* character set modifier */
10867 unsigned int x_mod_count = 0;
10868 char *t;
10869
10870 PERL_ARGS_ASSERT_SCAN_SUBST;
10871
10872 pl_yylval.ival = OP_NULL;
10873
10874 s = scan_str(start, TRUE, FALSE, FALSE, &t);
10875
10876 if (!s)
10877 Perl_croak(aTHX_ "Substitution pattern not terminated");
10878
10879 s = t;
10880
10881 first_start = PL_multi_start;
10882 first_line = CopLINE(PL_curcop);
10883 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10884 if (!s) {
10885 SvREFCNT_dec_NN(PL_lex_stuff);
10886 PL_lex_stuff = NULL;
10887 Perl_croak(aTHX_ "Substitution replacement not terminated");
10888 }
10889 PL_multi_start = first_start; /* so whole substitution is taken together */
10890
10891 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10892
10893
10894 while (*s) {
10895 if (*s == EXEC_PAT_MOD) {
10896 s++;
10897 es++;
10898 }
10899 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
10900 &s, &charset, &x_mod_count))
10901 {
10902 break;
10903 }
10904 }
10905
10906 if ((pm->op_pmflags & PMf_CONTINUE)) {
10907 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10908 }
10909
10910 if (es) {
10911 SV * const repl = newSVpvs("");
10912
10913 PL_multi_end = 0;
10914 pm->op_pmflags |= PMf_EVAL;
10915 for (; es > 1; es--) {
10916 sv_catpvs(repl, "eval ");
10917 }
10918 sv_catpvs(repl, "do {");
10919 sv_catsv(repl, PL_parser->lex_sub_repl);
10920 sv_catpvs(repl, "}");
10921 SvREFCNT_dec(PL_parser->lex_sub_repl);
10922 PL_parser->lex_sub_repl = repl;
10923 }
10924
10925
10926 linediff = CopLINE(PL_curcop) - first_line;
10927 if (linediff)
10928 CopLINE_set(PL_curcop, first_line);
10929
10930 if (linediff || es) {
10931 /* the IVX field indicates that the replacement string is a s///e;
10932 * the NVX field indicates how many src code lines the replacement
10933 * spreads over */
10934 sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
10935 ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
10936 ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
10937 cBOOL(es);
10938 }
10939
10940 PL_lex_op = (OP*)pm;
10941 pl_yylval.ival = OP_SUBST;
10942 return s;
10943}
10944
10945STATIC char *
10946S_scan_trans(pTHX_ char *start)
10947{
10948 char* s;
10949 OP *o;
10950 U8 squash;
10951 U8 del;
10952 U8 complement;
10953 bool nondestruct = 0;
10954 char *t;
10955
10956 PERL_ARGS_ASSERT_SCAN_TRANS;
10957
10958 pl_yylval.ival = OP_NULL;
10959
10960 s = scan_str(start,FALSE,FALSE,FALSE,&t);
10961 if (!s)
10962 Perl_croak(aTHX_ "Transliteration pattern not terminated");
10963
10964 s = t;
10965
10966 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10967 if (!s) {
10968 SvREFCNT_dec_NN(PL_lex_stuff);
10969 PL_lex_stuff = NULL;
10970 Perl_croak(aTHX_ "Transliteration replacement not terminated");
10971 }
10972
10973 complement = del = squash = 0;
10974 while (1) {
10975 switch (*s) {
10976 case 'c':
10977 complement = OPpTRANS_COMPLEMENT;
10978 break;
10979 case 'd':
10980 del = OPpTRANS_DELETE;
10981 break;
10982 case 's':
10983 squash = OPpTRANS_SQUASH;
10984 break;
10985 case 'r':
10986 nondestruct = 1;
10987 break;
10988 default:
10989 goto no_more;
10990 }
10991 s++;
10992 }
10993 no_more:
10994
10995 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
10996 o->op_private &= ~OPpTRANS_ALL;
10997 o->op_private |= del|squash|complement;
10998
10999 PL_lex_op = o;
11000 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
11001
11002
11003 return s;
11004}
11005
11006/* scan_heredoc
11007 Takes a pointer to the first < in <<FOO.
11008 Returns a pointer to the byte following <<FOO.
11009
11010 This function scans a heredoc, which involves different methods
11011 depending on whether we are in a string eval, quoted construct, etc.
11012 This is because PL_linestr could containing a single line of input, or
11013 a whole string being evalled, or the contents of the current quote-
11014 like operator.
11015
11016 The two basic methods are:
11017 - Steal lines from the input stream
11018 - Scan the heredoc in PL_linestr and remove it therefrom
11019
11020 In a file scope or filtered eval, the first method is used; in a
11021 string eval, the second.
11022
11023 In a quote-like operator, we have to choose between the two,
11024 depending on where we can find a newline. We peek into outer lex-
11025 ing scopes until we find one with a newline in it. If we reach the
11026 outermost lexing scope and it is a file, we use the stream method.
11027 Otherwise it is treated as an eval.
11028*/
11029
11030STATIC char *
11031S_scan_heredoc(pTHX_ char *s)
11032{
11033 I32 op_type = OP_SCALAR;
11034 I32 len;
11035 SV *tmpstr;
11036 char term;
11037 char *d;
11038 char *e;
11039 char *peek;
11040 char *indent = 0;
11041 I32 indent_len = 0;
11042 bool indented = FALSE;
11043 const bool infile = PL_rsfp || PL_parser->filtered;
11044 const line_t origline = CopLINE(PL_curcop);
11045 LEXSHARED *shared = PL_parser->lex_shared;
11046
11047 PERL_ARGS_ASSERT_SCAN_HEREDOC;
11048
11049 s += 2;
11050 d = PL_tokenbuf + 1;
11051 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11052 *PL_tokenbuf = '\n';
11053 peek = s;
11054
11055 if (*peek == '~') {
11056 indented = TRUE;
11057 peek++; s++;
11058 }
11059
11060 while (SPACE_OR_TAB(*peek))
11061 peek++;
11062
11063 if (*peek == '`' || *peek == '\'' || *peek =='"') {
11064 s = peek;
11065 term = *s++;
11066 s = delimcpy(d, e, s, PL_bufend, term, &len);
11067 if (s == PL_bufend)
11068 Perl_croak(aTHX_ "Unterminated delimiter for here document");
11069 d += len;
11070 s++;
11071 }
11072 else {
11073 if (*s == '\\')
11074 /* <<\FOO is equivalent to <<'FOO' */
11075 s++, term = '\'';
11076 else
11077 term = '"';
11078
11079 if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
11080 Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
11081
11082 peek = s;
11083
11084 while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
11085 peek += UTF ? UTF8SKIP(peek) : 1;
11086 }
11087
11088 len = (peek - s >= e - d) ? (e - d) : (peek - s);
11089 Copy(s, d, len, char);
11090 s += len;
11091 d += len;
11092 }
11093
11094 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11095 Perl_croak(aTHX_ "Delimiter for here document is too long");
11096
11097 *d++ = '\n';
11098 *d = '\0';
11099 len = d - PL_tokenbuf;
11100
11101#ifndef PERL_STRICT_CR
11102 d = (char *) memchr(s, '\r', PL_bufend - s);
11103 if (d) {
11104 char * const olds = s;
11105 s = d;
11106 while (s < PL_bufend) {
11107 if (*s == '\r') {
11108 *d++ = '\n';
11109 if (*++s == '\n')
11110 s++;
11111 }
11112 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11113 *d++ = *s++;
11114 s++;
11115 }
11116 else
11117 *d++ = *s++;
11118 }
11119 *d = '\0';
11120 PL_bufend = d;
11121 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11122 s = olds;
11123 }
11124#endif
11125
11126 tmpstr = newSV_type(SVt_PVIV);
11127 if (term == '\'') {
11128 op_type = OP_CONST;
11129 SvIV_set(tmpstr, -1);
11130 }
11131 else if (term == '`') {
11132 op_type = OP_BACKTICK;
11133 SvIV_set(tmpstr, '\\');
11134 }
11135
11136 PL_multi_start = origline + 1 + PL_parser->herelines;
11137 PL_multi_open = PL_multi_close = '<';
11138
11139 /* inside a string eval or quote-like operator */
11140 if (!infile || PL_lex_inwhat) {
11141 SV *linestr;
11142 char *bufend;
11143 char * const olds = s;
11144 PERL_CONTEXT * const cx = CX_CUR();
11145 /* These two fields are not set until an inner lexing scope is
11146 entered. But we need them set here. */
11147 shared->ls_bufptr = s;
11148 shared->ls_linestr = PL_linestr;
11149
11150 if (PL_lex_inwhat) {
11151 /* Look for a newline. If the current buffer does not have one,
11152 peek into the line buffer of the parent lexing scope, going
11153 up as many levels as necessary to find one with a newline
11154 after bufptr.
11155 */
11156 while (!(s = (char *)memchr(
11157 (void *)shared->ls_bufptr, '\n',
11158 SvEND(shared->ls_linestr)-shared->ls_bufptr
11159 )))
11160 {
11161 shared = shared->ls_prev;
11162 /* shared is only null if we have gone beyond the outermost
11163 lexing scope. In a file, we will have broken out of the
11164 loop in the previous iteration. In an eval, the string buf-
11165 fer ends with "\n;", so the while condition above will have
11166 evaluated to false. So shared can never be null. Or so you
11167 might think. Odd syntax errors like s;@{<<; can gobble up
11168 the implicit semicolon at the end of a flie, causing the
11169 file handle to be closed even when we are not in a string
11170 eval. So shared may be null in that case.
11171 (Closing '>>}' here to balance the earlier open brace for
11172 editors that look for matched pairs.) */
11173 if (UNLIKELY(!shared))
11174 goto interminable;
11175 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
11176 most lexing scope. In a file, shared->ls_linestr at that
11177 level is just one line, so there is no body to steal. */
11178 if (infile && !shared->ls_prev) {
11179 s = olds;
11180 goto streaming;
11181 }
11182 }
11183 }
11184 else { /* eval or we've already hit EOF */
11185 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
11186 if (!s)
11187 goto interminable;
11188 }
11189
11190 linestr = shared->ls_linestr;
11191 bufend = SvEND(linestr);
11192 d = s;
11193 if (indented) {
11194 char *myolds = s;
11195
11196 while (s < bufend - len + 1) {
11197 if (*s++ == '\n')
11198 ++PL_parser->herelines;
11199
11200 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
11201 char *backup = s;
11202 indent_len = 0;
11203
11204 /* Only valid if it's preceded by whitespace only */
11205 while (backup != myolds && --backup >= myolds) {
11206 if (! SPACE_OR_TAB(*backup)) {
11207 break;
11208 }
11209 indent_len++;
11210 }
11211
11212 /* No whitespace or all! */
11213 if (backup == s || *backup == '\n') {
11214 Newx(indent, indent_len + 1, char);
11215 memcpy(indent, backup + 1, indent_len);
11216 indent[indent_len] = 0;
11217 s--; /* before our delimiter */
11218 PL_parser->herelines--; /* this line doesn't count */
11219 break;
11220 }
11221 }
11222 }
11223 }
11224 else {
11225 while (s < bufend - len + 1
11226 && memNE(s,PL_tokenbuf,len) )
11227 {
11228 if (*s++ == '\n')
11229 ++PL_parser->herelines;
11230 }
11231 }
11232
11233 if (s >= bufend - len + 1) {
11234 goto interminable;
11235 }
11236
11237 sv_setpvn_fresh(tmpstr,d+1,s-d);
11238 s += len - 1;
11239 /* the preceding stmt passes a newline */
11240 PL_parser->herelines++;
11241
11242 /* s now points to the newline after the heredoc terminator.
11243 d points to the newline before the body of the heredoc.
11244 */
11245
11246 /* We are going to modify linestr in place here, so set
11247 aside copies of the string if necessary for re-evals or
11248 (caller $n)[6]. */
11249 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
11250 check shared->re_eval_str. */
11251 if (shared->re_eval_start || shared->re_eval_str) {
11252 /* Set aside the rest of the regexp */
11253 if (!shared->re_eval_str)
11254 shared->re_eval_str =
11255 newSVpvn(shared->re_eval_start,
11256 bufend - shared->re_eval_start);
11257 shared->re_eval_start -= s-d;
11258 }
11259
11260 if (cxstack_ix >= 0
11261 && CxTYPE(cx) == CXt_EVAL
11262 && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
11263 && cx->blk_eval.cur_text == linestr)
11264 {
11265 cx->blk_eval.cur_text = newSVsv(linestr);
11266 cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
11267 }
11268
11269 /* Copy everything from s onwards back to d. */
11270 Move(s,d,bufend-s + 1,char);
11271 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
11272 /* Setting PL_bufend only applies when we have not dug deeper
11273 into other scopes, because sublex_done sets PL_bufend to
11274 SvEND(PL_linestr). */
11275 if (shared == PL_parser->lex_shared)
11276 PL_bufend = SvEND(linestr);
11277 s = olds;
11278 }
11279 else {
11280 SV *linestr_save;
11281 char *oldbufptr_save;
11282 char *oldoldbufptr_save;
11283 streaming:
11284 sv_grow_fresh(tmpstr, 80);
11285 SvPVCLEAR_FRESH(tmpstr); /* avoid "uninitialized" warning */
11286 term = PL_tokenbuf[1];
11287 len--;
11288 linestr_save = PL_linestr; /* must restore this afterwards */
11289 d = s; /* and this */
11290 oldbufptr_save = PL_oldbufptr;
11291 oldoldbufptr_save = PL_oldoldbufptr;
11292 PL_linestr = newSVpvs("");
11293 PL_bufend = SvPVX(PL_linestr);
11294
11295 while (1) {
11296 PL_bufptr = PL_bufend;
11297 CopLINE_set(PL_curcop,
11298 origline + 1 + PL_parser->herelines);
11299
11300 if ( !lex_next_chunk(LEX_NO_TERM)
11301 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
11302 {
11303 /* Simply freeing linestr_save might seem simpler here, as it
11304 does not matter what PL_linestr points to, since we are
11305 about to croak; but in a quote-like op, linestr_save
11306 will have been prospectively freed already, via
11307 SAVEFREESV(PL_linestr) in sublex_push, so it's easier to
11308 restore PL_linestr. */
11309 SvREFCNT_dec_NN(PL_linestr);
11310 PL_linestr = linestr_save;
11311 PL_oldbufptr = oldbufptr_save;
11312 PL_oldoldbufptr = oldoldbufptr_save;
11313 goto interminable;
11314 }
11315
11316 CopLINE_set(PL_curcop, origline);
11317
11318 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
11319 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
11320 /* ^That should be enough to avoid this needing to grow: */
11321 sv_catpvs(PL_linestr, "\n\0");
11322 assert(s == SvPVX(PL_linestr));
11323 PL_bufend = SvEND(PL_linestr);
11324 }
11325
11326 s = PL_bufptr;
11327 PL_parser->herelines++;
11328 PL_last_lop = PL_last_uni = NULL;
11329
11330#ifndef PERL_STRICT_CR
11331 if (PL_bufend - PL_linestart >= 2) {
11332 if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
11333 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11334 {
11335 PL_bufend[-2] = '\n';
11336 PL_bufend--;
11337 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11338 }
11339 else if (PL_bufend[-1] == '\r')
11340 PL_bufend[-1] = '\n';
11341 }
11342 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11343 PL_bufend[-1] = '\n';
11344#endif
11345
11346 if (indented && (PL_bufend-s) >= len) {
11347 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
11348
11349 if (found) {
11350 char *backup = found;
11351 indent_len = 0;
11352
11353 /* Only valid if it's preceded by whitespace only */
11354 while (backup != s && --backup >= s) {
11355 if (! SPACE_OR_TAB(*backup)) {
11356 break;
11357 }
11358 indent_len++;
11359 }
11360
11361 /* All whitespace or none! */
11362 if (backup == found || SPACE_OR_TAB(*backup)) {
11363 Newx(indent, indent_len + 1, char);
11364 memcpy(indent, backup, indent_len);
11365 indent[indent_len] = 0;
11366 SvREFCNT_dec(PL_linestr);
11367 PL_linestr = linestr_save;
11368 PL_linestart = SvPVX(linestr_save);
11369 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11370 PL_oldbufptr = oldbufptr_save;
11371 PL_oldoldbufptr = oldoldbufptr_save;
11372 s = d;
11373 break;
11374 }
11375 }
11376
11377 /* Didn't find it */
11378 sv_catsv(tmpstr,PL_linestr);
11379 }
11380 else {
11381 if (*s == term && PL_bufend-s >= len
11382 && memEQ(s,PL_tokenbuf + 1,len))
11383 {
11384 SvREFCNT_dec(PL_linestr);
11385 PL_linestr = linestr_save;
11386 PL_linestart = SvPVX(linestr_save);
11387 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11388 PL_oldbufptr = oldbufptr_save;
11389 PL_oldoldbufptr = oldoldbufptr_save;
11390 s = d;
11391 break;
11392 }
11393 else {
11394 sv_catsv(tmpstr,PL_linestr);
11395 }
11396 }
11397 } /* while (1) */
11398 }
11399
11400 PL_multi_end = origline + PL_parser->herelines;
11401
11402 if (indented && indent) {
11403 STRLEN linecount = 1;
11404 STRLEN herelen = SvCUR(tmpstr);
11405 char *ss = SvPVX(tmpstr);
11406 char *se = ss + herelen;
11407 SV *newstr = newSV(herelen+1);
11408 SvPOK_on(newstr);
11409
11410 /* Trim leading whitespace */
11411 while (ss < se) {
11412 /* newline only? Copy and move on */
11413 if (*ss == '\n') {
11414 sv_catpvs(newstr,"\n");
11415 ss++;
11416 linecount++;
11417
11418 /* Found our indentation? Strip it */
11419 }
11420 else if (se - ss >= indent_len
11421 && memEQ(ss, indent, indent_len))
11422 {
11423 STRLEN le = 0;
11424 ss += indent_len;
11425
11426 while ((ss + le) < se && *(ss + le) != '\n')
11427 le++;
11428
11429 sv_catpvn(newstr, ss, le);
11430 ss += le;
11431
11432 /* Line doesn't begin with our indentation? Croak */
11433 }
11434 else {
11435 Safefree(indent);
11436 Perl_croak(aTHX_
11437 "Indentation on line %d of here-doc doesn't match delimiter",
11438 (int)linecount
11439 );
11440 }
11441 } /* while */
11442
11443 /* avoid sv_setsv() as we don't want to COW here */
11444 sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
11445 Safefree(indent);
11446 SvREFCNT_dec_NN(newstr);
11447 }
11448
11449 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11450 SvPV_shrink_to_cur(tmpstr);
11451 }
11452
11453 if (!IN_BYTES) {
11454 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11455 SvUTF8_on(tmpstr);
11456 }
11457
11458 PL_lex_stuff = tmpstr;
11459 pl_yylval.ival = op_type;
11460 return s;
11461
11462 interminable:
11463 if (indent)
11464 Safefree(indent);
11465 SvREFCNT_dec(tmpstr);
11466 CopLINE_set(PL_curcop, origline);
11467 missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
11468}
11469
11470
11471/* scan_inputsymbol
11472 takes: position of first '<' in input buffer
11473 returns: position of first char following the matching '>' in
11474 input buffer
11475 side-effects: pl_yylval and lex_op are set.
11476
11477 This code handles:
11478
11479 <> read from ARGV
11480 <<>> read from ARGV without magic open
11481 <FH> read from filehandle
11482 <pkg::FH> read from package qualified filehandle
11483 <pkg'FH> read from package qualified filehandle
11484 <$fh> read from filehandle in $fh
11485 <*.h> filename glob
11486
11487*/
11488
11489STATIC char *
11490S_scan_inputsymbol(pTHX_ char *start)
11491{
11492 char *s = start; /* current position in buffer */
11493 char *end;
11494 I32 len;
11495 bool nomagicopen = FALSE;
11496 char *d = PL_tokenbuf; /* start of temp holding space */
11497 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11498
11499 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
11500
11501 end = (char *) memchr(s, '\n', PL_bufend - s);
11502 if (!end)
11503 end = PL_bufend;
11504 if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
11505 nomagicopen = TRUE;
11506 *d = '\0';
11507 len = 0;
11508 s += 3;
11509 }
11510 else
11511 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11512
11513 /* die if we didn't have space for the contents of the <>,
11514 or if it didn't end, or if we see a newline
11515 */
11516
11517 if (len >= (I32)sizeof PL_tokenbuf)
11518 Perl_croak(aTHX_ "Excessively long <> operator");
11519 if (s >= end)
11520 Perl_croak(aTHX_ "Unterminated <> operator");
11521
11522 s++;
11523
11524 /* check for <$fh>
11525 Remember, only scalar variables are interpreted as filehandles by
11526 this code. Anything more complex (e.g., <$fh{$num}>) will be
11527 treated as a glob() call.
11528 This code makes use of the fact that except for the $ at the front,
11529 a scalar variable and a filehandle look the same.
11530 */
11531 if (*d == '$' && d[1]) d++;
11532
11533 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11534 while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
11535 d += UTF ? UTF8SKIP(d) : 1;
11536 }
11537
11538 /* If we've tried to read what we allow filehandles to look like, and
11539 there's still text left, then it must be a glob() and not a getline.
11540 Use scan_str to pull out the stuff between the <> and treat it
11541 as nothing more than a string.
11542 */
11543
11544 if (d - PL_tokenbuf != len) {
11545 pl_yylval.ival = OP_GLOB;
11546 s = scan_str(start,FALSE,FALSE,FALSE,NULL);
11547 if (!s)
11548 Perl_croak(aTHX_ "Glob not terminated");
11549 return s;
11550 }
11551 else {
11552 bool readline_overridden = FALSE;
11553 GV *gv_readline;
11554 /* we're in a filehandle read situation */
11555 d = PL_tokenbuf;
11556
11557 /* turn <> into <ARGV> */
11558 if (!len)
11559 Copy("ARGV",d,5,char);
11560
11561 /* Check whether readline() is overridden */
11562 if ((gv_readline = gv_override("readline",8)))
11563 readline_overridden = TRUE;
11564
11565 /* if <$fh>, create the ops to turn the variable into a
11566 filehandle
11567 */
11568 if (*d == '$') {
11569 /* try to find it in the pad for this block, otherwise find
11570 add symbol table ops
11571 */
11572 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
11573 if (tmp != NOT_IN_PAD) {
11574 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11575 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11576 HEK * const stashname = HvNAME_HEK(stash);
11577 SV * const sym = newSVhek_mortal(stashname);
11578 sv_catpvs(sym, "::");
11579 sv_catpv(sym, d+1);
11580 d = SvPVX(sym);
11581 goto intro_sym;
11582 }
11583 else {
11584 OP * const o = newPADxVOP(OP_PADSV, 0, tmp);
11585 PL_lex_op = readline_overridden
11586 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11587 op_append_elem(OP_LIST, o,
11588 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11589 : newUNOP(OP_READLINE, 0, o);
11590 }
11591 }
11592 else {
11593 GV *gv;
11594 ++d;
11595 intro_sym:
11596 gv = gv_fetchpv(d,
11597 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
11598 SVt_PV);
11599 PL_lex_op = readline_overridden
11600 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11601 op_append_elem(OP_LIST,
11602 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11603 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11604 : newUNOP(OP_READLINE, 0,
11605 newUNOP(OP_RV2SV, 0,
11606 newGVOP(OP_GV, 0, gv)));
11607 }
11608 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11609 pl_yylval.ival = OP_NULL;
11610 }
11611
11612 /* If it's none of the above, it must be a literal filehandle
11613 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11614 else {
11615 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
11616 PL_lex_op = readline_overridden
11617 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11618 op_append_elem(OP_LIST,
11619 newGVOP(OP_GV, 0, gv),
11620 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11621 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
11622 pl_yylval.ival = OP_NULL;
11623
11624 /* leave the token generation above to avoid confusing the parser */
11625 if (!FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) {
11626 no_bareword_filehandle(d);
11627 }
11628 }
11629 }
11630
11631 return s;
11632}
11633
11634
11635/* scan_str
11636 takes:
11637 start position in buffer
11638 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but
11639 only if they are of the open/close form
11640 keep_delims preserve the delimiters around the string
11641 re_reparse compiling a run-time /(?{})/:
11642 collapse // to /, and skip encoding src
11643 delimp if non-null, this is set to the position of
11644 the closing delimiter, or just after it if
11645 the closing and opening delimiters differ
11646 (i.e., the opening delimiter of a substitu-
11647 tion replacement)
11648 returns: position to continue reading from buffer
11649 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11650 updates the read buffer.
11651
11652 This subroutine pulls a string out of the input. It is called for:
11653 q single quotes q(literal text)
11654 ' single quotes 'literal text'
11655 qq double quotes qq(interpolate $here please)
11656 " double quotes "interpolate $here please"
11657 qx backticks qx(/bin/ls -l)
11658 ` backticks `/bin/ls -l`
11659 qw quote words @EXPORT_OK = qw( func() $spam )
11660 m// regexp match m/this/
11661 s/// regexp substitute s/this/that/
11662 tr/// string transliterate tr/this/that/
11663 y/// string transliterate y/this/that/
11664 ($*@) sub prototypes sub foo ($)
11665 (stuff) sub attr parameters sub foo : attr(stuff)
11666 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11667
11668 In most of these cases (all but <>, patterns and transliterate)
11669 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11670 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11671 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11672 calls scan_str().
11673
11674 It skips whitespace before the string starts, and treats the first
11675 character as the delimiter. If the delimiter is one of ([{< then
11676 the corresponding "close" character )]}> is used as the closing
11677 delimiter. It allows quoting of delimiters, and if the string has
11678 balanced delimiters ([{<>}]) it allows nesting.
11679
11680 On success, the SV with the resulting string is put into lex_stuff or,
11681 if that is already non-NULL, into lex_repl. The second case occurs only
11682 when parsing the RHS of the special constructs s/// and tr/// (y///).
11683 For convenience, the terminating delimiter character is stuffed into
11684 SvIVX of the SV.
11685*/
11686
11687char *
11688Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
11689 char **delimp
11690 )
11691{
11692 SV *sv; /* scalar value: string */
11693 char *s = start; /* current position in the buffer */
11694 char *to; /* current position in the sv's data */
11695 int brackets = 1; /* bracket nesting level */
11696 bool d_is_utf8 = FALSE; /* is there any utf8 content? */
11697 UV open_delim_code; /* code point */
11698 char open_delim_str[UTF8_MAXBYTES+1];
11699 STRLEN delim_byte_len; /* each delimiter currently is the same number
11700 of bytes */
11701 line_t herelines;
11702
11703 /* The only non-UTF character that isn't a stand alone grapheme is
11704 * white-space, hence can't be a delimiter. */
11705 const char * non_grapheme_msg = "Use of unassigned code point or"
11706 " non-standalone grapheme for a delimiter"
11707 " is not allowed";
11708 PERL_ARGS_ASSERT_SCAN_STR;
11709
11710 /* skip space before the delimiter */
11711 if (isSPACE(*s)) { /* skipspace can change the buffer 's' is in, so
11712 'start' also has to change */
11713 s = start = skipspace(s);
11714 }
11715
11716 /* mark where we are, in case we need to report errors */
11717 CLINE;
11718
11719 /* after skipping whitespace, the next character is the delimiter */
11720 if (! UTF || UTF8_IS_INVARIANT(*s)) {
11721 open_delim_code = (U8) *s;
11722 open_delim_str[0] = *s;
11723 delim_byte_len = 1;
11724 }
11725 else {
11726 open_delim_code = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend,
11727 &delim_byte_len);
11728 if (UNLIKELY(! is_grapheme((U8 *) start,
11729 (U8 *) s,
11730 (U8 *) PL_bufend,
11731 open_delim_code)))
11732 {
11733 yyerror(non_grapheme_msg);
11734 }
11735
11736 Copy(s, open_delim_str, delim_byte_len, char);
11737 }
11738 open_delim_str[delim_byte_len] = '\0'; /* Only for safety */
11739
11740
11741 /* mark where we are */
11742 PL_multi_start = CopLINE(PL_curcop);
11743 PL_multi_open = open_delim_code;
11744 herelines = PL_parser->herelines;
11745
11746 const char * legal_paired_opening_delims;
11747 const char * legal_paired_closing_delims;
11748 const char * deprecated_opening_delims;
11749 if (FEATURE_MORE_DELIMS_IS_ENABLED) {
11750 if (UTF) {
11751 legal_paired_opening_delims = EXTRA_OPENING_UTF8_BRACKETS;
11752 legal_paired_closing_delims = EXTRA_CLOSING_UTF8_BRACKETS;
11753
11754 /* We are deprecating using a closing delimiter as the opening, in
11755 * case we want in the future to accept them reversed. The string
11756 * may include ones that are legal, but the code below won't look
11757 * at this string unless it didn't find a legal opening one */
11758 deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS;
11759 }
11760 else {
11761 legal_paired_opening_delims = EXTRA_OPENING_NON_UTF8_BRACKETS;
11762 legal_paired_closing_delims = EXTRA_CLOSING_NON_UTF8_BRACKETS;
11763 deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS;
11764 }
11765 }
11766 else {
11767 legal_paired_opening_delims = "([{<";
11768 legal_paired_closing_delims = ")]}>";
11769 deprecated_opening_delims = (UTF)
11770 ? DEPRECATED_OPENING_UTF8_BRACKETS
11771 : DEPRECATED_OPENING_NON_UTF8_BRACKETS;
11772 }
11773
11774 const char * legal_paired_opening_delims_end = legal_paired_opening_delims
11775 + strlen(legal_paired_opening_delims);
11776 const char * deprecated_delims_end = deprecated_opening_delims
11777 + strlen(deprecated_opening_delims);
11778
11779 const char * close_delim_str = open_delim_str;
11780 UV close_delim_code = open_delim_code;
11781
11782 /* If the delimiter has a mirror-image closing one, get it */
11783 const char *tmps = ninstr(legal_paired_opening_delims,
11784 legal_paired_opening_delims_end,
11785 open_delim_str, open_delim_str + delim_byte_len);
11786 if (tmps) {
11787 /* Here, there is a paired delimiter, and tmps points to its position
11788 in the string of the accepted opening paired delimiters. The
11789 corresponding position in the string of closing ones is the
11790 beginning of the paired mate. Both contain the same number of
11791 bytes. */
11792 close_delim_str = legal_paired_closing_delims
11793 + (tmps - legal_paired_opening_delims);
11794
11795 /* The list of paired delimiters contains all the ASCII ones that have
11796 * always been legal, and no other ASCIIs. Don't raise a message if
11797 * using one of these */
11798 if (! isASCII(open_delim_code)) {
11799 Perl_ck_warner_d(aTHX_
11800 packWARN(WARN_EXPERIMENTAL__EXTRA_PAIRED_DELIMITERS),
11801 "Use of '%" UTF8f "' is experimental as a string delimiter",
11802 UTF8fARG(UTF, delim_byte_len, open_delim_str));
11803 }
11804
11805 close_delim_code = (UTF)
11806 ? valid_utf8_to_uvchr((U8 *) close_delim_str, NULL)
11807 : * (U8 *) close_delim_str;
11808 }
11809 else { /* Here, the delimiter isn't paired, hence the close is the same as
11810 the open; and has already been set up. But make sure it isn't
11811 deprecated to use this particular delimiter, as we plan
11812 eventually to make it paired. */
11813 if (ninstr(deprecated_opening_delims, deprecated_delims_end,
11814 open_delim_str, open_delim_str + delim_byte_len))
11815 {
11816 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED__DELIMITER_WILL_BE_PAIRED),
11817 "Use of '%" UTF8f "' is deprecated as a string delimiter",
11818 UTF8fARG(UTF, delim_byte_len, open_delim_str));
11819 }
11820
11821 /* Note that a NUL may be used as a delimiter, and this happens when
11822 * delimiting an empty string, and no special handling for it is
11823 * needed, as ninstr() calls are used */
11824 }
11825
11826 PL_multi_close = close_delim_code;
11827
11828 if (PL_multi_open == PL_multi_close) {
11829 keep_bracketed_quoted = FALSE;
11830 }
11831
11832 /* create a new SV to hold the contents. 79 is the SV's initial length.
11833 What a random number. */
11834 sv = newSV_type(SVt_PVIV);
11835 sv_grow_fresh(sv, 79);
11836 SvIV_set(sv, close_delim_code);
11837 (void)SvPOK_only(sv); /* validate pointer */
11838
11839 /* move past delimiter and try to read a complete string */
11840 if (keep_delims)
11841 sv_catpvn(sv, s, delim_byte_len);
11842 s += delim_byte_len;
11843 for (;;) {
11844 /* extend sv if need be */
11845 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11846 /* set 'to' to the next character in the sv's string */
11847 to = SvPVX(sv)+SvCUR(sv);
11848
11849 /* read until we run out of string, or we find the closing delimiter */
11850 while (s < PL_bufend) {
11851 /* embedded newlines increment the line count */
11852 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11853 COPLINE_INC_WITH_HERELINES;
11854
11855 /* backslashes can escape the closing delimiter */
11856 if ( *s == '\\' && s < PL_bufend - delim_byte_len
11857
11858 /* ... but not if the delimiter itself is a backslash */
11859 && close_delim_code != '\\')
11860 {
11861 /* Here, we have an escaping backslash. If we're supposed to
11862 * discard those that escape the closing delimiter, just
11863 * discard this one */
11864 if ( ! keep_bracketed_quoted
11865 && ( memEQ(s + 1, open_delim_str, delim_byte_len)
11866 || ( PL_multi_open == PL_multi_close
11867 && re_reparse && s[1] == '\\')
11868 || memEQ(s + 1, close_delim_str, delim_byte_len)))
11869 {
11870 s++;
11871 }
11872 else /* any other escapes are simply copied straight through */
11873 *to++ = *s++;
11874 }
11875 else if ( s < PL_bufend - (delim_byte_len - 1)
11876 && memEQ(s, close_delim_str, delim_byte_len)
11877 && --brackets <= 0)
11878 {
11879 /* Found unescaped closing delimiter, unnested if we care about
11880 * that; so are done.
11881 *
11882 * In the case of the opening and closing delimiters being
11883 * different, we have to deal with nesting; the conditional
11884 * above makes sure we don't get here until the nesting level,
11885 * 'brackets', is back down to zero. In the other case,
11886 * nesting isn't an issue, and 'brackets' never can get
11887 * incremented above 0, so will come here at the first closing
11888 * delimiter.
11889 *
11890 * Only grapheme delimiters are legal. */
11891 if ( UTF /* All Non-UTF-8's are graphemes */
11892 && UNLIKELY(! is_grapheme((U8 *) start,
11893 (U8 *) s,
11894 (U8 *) PL_bufend,
11895 close_delim_code)))
11896 {
11897 yyerror(non_grapheme_msg);
11898 }
11899
11900 break;
11901 }
11902 /* No nesting if open eq close */
11903 else if ( PL_multi_open != PL_multi_close
11904 && s < PL_bufend - (delim_byte_len - 1)
11905 && memEQ(s, open_delim_str, delim_byte_len))
11906 {
11907 brackets++;
11908 }
11909
11910 /* Here, still in the middle of the string; copy this character */
11911 if (! UTF || UTF8_IS_INVARIANT((U8) *s)) {
11912 *to++ = *s++;
11913 }
11914 else {
11915 size_t this_char_len = UTF8SKIP(s);
11916 Copy(s, to, this_char_len, char);
11917 s += this_char_len;
11918 to += this_char_len;
11919
11920 d_is_utf8 = TRUE;
11921 }
11922 } /* End of loop through buffer */
11923
11924 /* Here, found end of the string, OR ran out of buffer: terminate the
11925 * copied string and update the sv's end-of-string */
11926 *to = '\0';
11927 SvCUR_set(sv, to - SvPVX_const(sv));
11928
11929 /*
11930 * this next chunk reads more into the buffer if we're not done yet
11931 */
11932
11933 if (s < PL_bufend)
11934 break; /* handle case where we are done yet :-) */
11935
11936#ifndef PERL_STRICT_CR
11937 if (to - SvPVX_const(sv) >= 2) {
11938 if ( (to[-2] == '\r' && to[-1] == '\n')
11939 || (to[-2] == '\n' && to[-1] == '\r'))
11940 {
11941 to[-2] = '\n';
11942 to--;
11943 SvCUR_set(sv, to - SvPVX_const(sv));
11944 }
11945 else if (to[-1] == '\r')
11946 to[-1] = '\n';
11947 }
11948 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11949 to[-1] = '\n';
11950#endif
11951
11952 /* if we're out of file, or a read fails, bail and reset the current
11953 line marker so we can report where the unterminated string began
11954 */
11955 COPLINE_INC_WITH_HERELINES;
11956 PL_bufptr = PL_bufend;
11957 if (!lex_next_chunk(0)) {
11958 ASSUME(sv);
11959 SvREFCNT_dec(sv);
11960 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11961 return NULL;
11962 }
11963 s = start = PL_bufptr;
11964 } /* End of infinite loop */
11965
11966 /* at this point, we have successfully read the delimited string */
11967
11968 if (keep_delims)
11969 sv_catpvn(sv, s, delim_byte_len);
11970 s += delim_byte_len;
11971
11972 if (d_is_utf8)
11973 SvUTF8_on(sv);
11974
11975 PL_multi_end = CopLINE(PL_curcop);
11976 CopLINE_set(PL_curcop, PL_multi_start);
11977 PL_parser->herelines = herelines;
11978
11979 /* if we allocated too much space, give some back */
11980 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11981 SvLEN_set(sv, SvCUR(sv) + 1);
11982 SvPV_shrink_to_cur(sv);
11983 }
11984
11985 /* decide whether this is the first or second quoted string we've read
11986 for this op
11987 */
11988
11989 if (PL_lex_stuff)
11990 PL_parser->lex_sub_repl = sv;
11991 else
11992 PL_lex_stuff = sv;
11993 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-delim_byte_len : s;
11994 return s;
11995}
11996
11997/*
11998 scan_num
11999 takes: pointer to position in buffer
12000 returns: pointer to new position in buffer
12001 side-effects: builds ops for the constant in pl_yylval.op
12002
12003 Read a number in any of the formats that Perl accepts:
12004
12005 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12006 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
12007 0b[01](_?[01])* binary integers
12008 0o?[0-7](_?[0-7])* octal integers
12009 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers
12010 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats
12011
12012 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12013 thing it reads.
12014
12015 If it reads a number without a decimal point or an exponent, it will
12016 try converting the number to an integer and see if it can do so
12017 without loss of precision.
12018*/
12019
12020char *
12021Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12022{
12023 const char *s = start; /* current position in buffer */
12024 char *d; /* destination in temp buffer */
12025 char *e; /* end of temp buffer */
12026 NV nv; /* number read, as a double */
12027 SV *sv = NULL; /* place to put the converted number */
12028 bool floatit; /* boolean: int or float? */
12029 const char *lastub = NULL; /* position of last underbar */
12030 static const char* const number_too_long = "Number too long";
12031 bool warned_about_underscore = 0;
12032 I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
12033#define WARN_ABOUT_UNDERSCORE() \
12034 do { \
12035 if (!warned_about_underscore) { \
12036 warned_about_underscore = 1; \
12037 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
12038 "Misplaced _ in number"); \
12039 } \
12040 } while(0)
12041 /* Hexadecimal floating point.
12042 *
12043 * In many places (where we have quads and NV is IEEE 754 double)
12044 * we can fit the mantissa bits of a NV into an unsigned quad.
12045 * (Note that UVs might not be quads even when we have quads.)
12046 * This will not work everywhere, though (either no quads, or
12047 * using long doubles), in which case we have to resort to NV,
12048 * which will probably mean horrible loss of precision due to
12049 * multiple fp operations. */
12050 bool hexfp = FALSE;
12051 int total_bits = 0;
12052 int significant_bits = 0;
12053#if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
12054# define HEXFP_UQUAD
12055 Uquad_t hexfp_uquad = 0;
12056 int hexfp_frac_bits = 0;
12057#else
12058# define HEXFP_NV
12059 NV hexfp_nv = 0.0;
12060#endif
12061 NV hexfp_mult = 1.0;
12062 UV high_non_zero = 0; /* highest digit */
12063 int non_zero_integer_digits = 0;
12064 bool new_octal = FALSE; /* octal with "0o" prefix */
12065
12066 PERL_ARGS_ASSERT_SCAN_NUM;
12067
12068 /* We use the first character to decide what type of number this is */
12069
12070 switch (*s) {
12071 default:
12072 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
12073
12074 /* if it starts with a 0, it could be an octal number, a decimal in
12075 0.13 disguise, or a hexadecimal number, or a binary number. */
12076 case '0':
12077 {
12078 /* variables:
12079 u holds the "number so far"
12080 overflowed was the number more than we can hold?
12081
12082 Shift is used when we add a digit. It also serves as an "are
12083 we in octal/hex/binary?" indicator to disallow hex characters
12084 when in octal mode.
12085 */
12086 NV n = 0.0;
12087 UV u = 0;
12088 bool overflowed = FALSE;
12089 bool just_zero = TRUE; /* just plain 0 or binary number? */
12090 bool has_digs = FALSE;
12091 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12092 static const char* const bases[5] =
12093 { "", "binary", "", "octal", "hexadecimal" };
12094 static const char* const Bases[5] =
12095 { "", "Binary", "", "Octal", "Hexadecimal" };
12096 static const char* const maxima[5] =
12097 { "",
12098 "0b11111111111111111111111111111111",
12099 "",
12100 "037777777777",
12101 "0xffffffff" };
12102
12103 /* check for hex */
12104 if (isALPHA_FOLD_EQ(s[1], 'x')) {
12105 shift = 4;
12106 s += 2;
12107 just_zero = FALSE;
12108 } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
12109 shift = 1;
12110 s += 2;
12111 just_zero = FALSE;
12112 }
12113 /* check for a decimal in disguise */
12114 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
12115 goto decimal;
12116 /* so it must be octal */
12117 else {
12118 shift = 3;
12119 s++;
12120 if (isALPHA_FOLD_EQ(*s, 'o')) {
12121 s++;
12122 just_zero = FALSE;
12123 new_octal = TRUE;
12124 }
12125 }
12126
12127 if (*s == '_') {
12128 WARN_ABOUT_UNDERSCORE();
12129 lastub = s++;
12130 }
12131
12132 /* read the rest of the number */
12133 for (;;) {
12134 /* x is used in the overflow test,
12135 b is the digit we're adding on. */
12136 UV x, b;
12137
12138 switch (*s) {
12139
12140 /* if we don't mention it, we're done */
12141 default:
12142 goto out;
12143
12144 /* _ are ignored -- but warned about if consecutive */
12145 case '_':
12146 if (lastub && s == lastub + 1)
12147 WARN_ABOUT_UNDERSCORE();
12148 lastub = s++;
12149 break;
12150
12151 /* 8 and 9 are not octal */
12152 case '8': case '9':
12153 if (shift == 3)
12154 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12155 /* FALLTHROUGH */
12156
12157 /* octal digits */
12158 case '2': case '3': case '4':
12159 case '5': case '6': case '7':
12160 if (shift == 1)
12161 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12162 /* FALLTHROUGH */
12163
12164 case '0': case '1':
12165 b = *s++ & 15; /* ASCII digit -> value of digit */
12166 goto digit;
12167
12168 /* hex digits */
12169 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12170 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12171 /* make sure they said 0x */
12172 if (shift != 4)
12173 goto out;
12174 b = (*s++ & 7) + 9;
12175
12176 /* Prepare to put the digit we have onto the end
12177 of the number so far. We check for overflows.
12178 */
12179
12180 digit:
12181 just_zero = FALSE;
12182 has_digs = TRUE;
12183 if (!overflowed) {
12184 assert(shift >= 0);
12185 x = u << shift; /* make room for the digit */
12186
12187 total_bits += shift;
12188
12189 if ((x >> shift) != u
12190 && !(PL_hints & HINT_NEW_BINARY)) {
12191 overflowed = TRUE;
12192 n = (NV) u;
12193 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12194 "Integer overflow in %s number",
12195 bases[shift]);
12196 } else
12197 u = x | b; /* add the digit to the end */
12198 }
12199 if (overflowed) {
12200 n *= nvshift[shift];
12201 /* If an NV has not enough bits in its
12202 * mantissa to represent an UV this summing of
12203 * small low-order numbers is a waste of time
12204 * (because the NV cannot preserve the
12205 * low-order bits anyway): we could just
12206 * remember when did we overflow and in the
12207 * end just multiply n by the right
12208 * amount. */
12209 n += (NV) b;
12210 }
12211
12212 if (high_non_zero == 0 && b > 0)
12213 high_non_zero = b;
12214
12215 if (high_non_zero)
12216 non_zero_integer_digits++;
12217
12218 /* this could be hexfp, but peek ahead
12219 * to avoid matching ".." */
12220 if (UNLIKELY(HEXFP_PEEK(s))) {
12221 goto out;
12222 }
12223
12224 break;
12225 }
12226 }
12227
12228 /* if we get here, we had success: make a scalar value from
12229 the number.
12230 */
12231 out:
12232
12233 /* final misplaced underbar check */
12234 if (s[-1] == '_')
12235 WARN_ABOUT_UNDERSCORE();
12236
12237 if (UNLIKELY(HEXFP_PEEK(s))) {
12238 /* Do sloppy (on the underbars) but quick detection
12239 * (and value construction) for hexfp, the decimal
12240 * detection will shortly be more thorough with the
12241 * underbar checks. */
12242 const char* h = s;
12243 significant_bits = non_zero_integer_digits * shift;
12244#ifdef HEXFP_UQUAD
12245 hexfp_uquad = u;
12246#else /* HEXFP_NV */
12247 hexfp_nv = u;
12248#endif
12249 /* Ignore the leading zero bits of
12250 * the high (first) non-zero digit. */
12251 if (high_non_zero) {
12252 if (high_non_zero < 0x8)
12253 significant_bits--;
12254 if (high_non_zero < 0x4)
12255 significant_bits--;
12256 if (high_non_zero < 0x2)
12257 significant_bits--;
12258 }
12259
12260 if (*h == '.') {
12261#ifdef HEXFP_NV
12262 NV nv_mult = 1.0;
12263#endif
12264 bool accumulate = TRUE;
12265 U8 b = 0; /* silence compiler warning */
12266 int lim = 1 << shift;
12267 for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
12268 *h == '_'); h++) {
12269 if (isXDIGIT(*h)) {
12270 significant_bits += shift;
12271#ifdef HEXFP_UQUAD
12272 if (accumulate) {
12273 if (significant_bits < NV_MANT_DIG) {
12274 /* We are in the long "run" of xdigits,
12275 * accumulate the full four bits. */
12276 assert(shift >= 0);
12277 hexfp_uquad <<= shift;
12278 hexfp_uquad |= b;
12279 hexfp_frac_bits += shift;
12280 } else if (significant_bits - shift < NV_MANT_DIG) {
12281 /* We are at a hexdigit either at,
12282 * or straddling, the edge of mantissa.
12283 * We will try grabbing as many as
12284 * possible bits. */
12285 int tail =
12286 significant_bits - NV_MANT_DIG;
12287 if (tail <= 0)
12288 tail += shift;
12289 assert(tail >= 0);
12290 hexfp_uquad <<= tail;
12291 assert((shift - tail) >= 0);
12292 hexfp_uquad |= b >> (shift - tail);
12293 hexfp_frac_bits += tail;
12294
12295 /* Ignore the trailing zero bits
12296 * of the last non-zero xdigit.
12297 *
12298 * The assumption here is that if
12299 * one has input of e.g. the xdigit
12300 * eight (0x8), there is only one
12301 * bit being input, not the full
12302 * four bits. Conversely, if one
12303 * specifies a zero xdigit, the
12304 * assumption is that one really
12305 * wants all those bits to be zero. */
12306 if (b) {
12307 if ((b & 0x1) == 0x0) {
12308 significant_bits--;
12309 if ((b & 0x2) == 0x0) {
12310 significant_bits--;
12311 if ((b & 0x4) == 0x0) {
12312 significant_bits--;
12313 }
12314 }
12315 }
12316 }
12317
12318 accumulate = FALSE;
12319 }
12320 } else {
12321 /* Keep skipping the xdigits, and
12322 * accumulating the significant bits,
12323 * but do not shift the uquad
12324 * (which would catastrophically drop
12325 * high-order bits) or accumulate the
12326 * xdigits anymore. */
12327 }
12328#else /* HEXFP_NV */
12329 if (accumulate) {
12330 nv_mult /= nvshift[shift];
12331 if (nv_mult > 0.0)
12332 hexfp_nv += b * nv_mult;
12333 else
12334 accumulate = FALSE;
12335 }
12336#endif
12337 }
12338 if (significant_bits >= NV_MANT_DIG)
12339 accumulate = FALSE;
12340 }
12341 }
12342
12343 if ((total_bits > 0 || significant_bits > 0) &&
12344 isALPHA_FOLD_EQ(*h, 'p')) {
12345 bool negexp = FALSE;
12346 h++;
12347 if (*h == '+')
12348 h++;
12349 else if (*h == '-') {
12350 negexp = TRUE;
12351 h++;
12352 }
12353 if (isDIGIT(*h)) {
12354 I32 hexfp_exp = 0;
12355 while (isDIGIT(*h) || *h == '_') {
12356 if (isDIGIT(*h)) {
12357 hexfp_exp *= 10;
12358 hexfp_exp += *h - '0';
12359#ifdef NV_MIN_EXP
12360 if (negexp
12361 && -hexfp_exp < NV_MIN_EXP - 1) {
12362 /* NOTE: this means that the exponent
12363 * underflow warning happens for
12364 * the IEEE 754 subnormals (denormals),
12365 * because DBL_MIN_EXP etc are the lowest
12366 * possible binary (or, rather, DBL_RADIX-base)
12367 * exponent for normals, not subnormals.
12368 *
12369 * This may or may not be a good thing. */
12370 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12371 "Hexadecimal float: exponent underflow");
12372 break;
12373 }
12374#endif
12375#ifdef NV_MAX_EXP
12376 if (!negexp
12377 && hexfp_exp > NV_MAX_EXP - 1) {
12378 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12379 "Hexadecimal float: exponent overflow");
12380 break;
12381 }
12382#endif
12383 }
12384 h++;
12385 }
12386 if (negexp)
12387 hexfp_exp = -hexfp_exp;
12388#ifdef HEXFP_UQUAD
12389 hexfp_exp -= hexfp_frac_bits;
12390#endif
12391 hexfp_mult = Perl_pow(2.0, hexfp_exp);
12392 hexfp = TRUE;
12393 goto decimal;
12394 }
12395 }
12396 }
12397
12398 if (!just_zero && !has_digs) {
12399 /* 0x, 0o or 0b with no digits, treat it as an error.
12400 Originally this backed up the parse before the b or
12401 x, but that has the potential for silent changes in
12402 behaviour, like for: "0x.3" and "0x+$foo".
12403 */
12404 const char *d = s;
12405 char *oldbp = PL_bufptr;
12406 if (*d) ++d; /* so the user sees the bad non-digit */
12407 PL_bufptr = (char *)d; /* so yyerror reports the context */
12408 yyerror(Perl_form(aTHX_ "No digits found for %s literal",
12409 bases[shift]));
12410 PL_bufptr = oldbp;
12411 }
12412
12413 if (overflowed) {
12414 if (n > 4294967295.0)
12415 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12416 "%s number > %s non-portable",
12417 Bases[shift],
12418 new_octal ? "0o37777777777" : maxima[shift]);
12419 sv = newSVnv(n);
12420 }
12421 else {
12422#if UVSIZE > 4
12423 if (u > 0xffffffff)
12424 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12425 "%s number > %s non-portable",
12426 Bases[shift],
12427 new_octal ? "0o37777777777" : maxima[shift]);
12428#endif
12429 sv = newSVuv(u);
12430 }
12431 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12432 sv = new_constant(start, s - start, "integer",
12433 sv, NULL, NULL, 0, NULL);
12434 else if (PL_hints & HINT_NEW_BINARY)
12435 sv = new_constant(start, s - start, "binary",
12436 sv, NULL, NULL, 0, NULL);
12437 }
12438 break;
12439
12440 /*
12441 handle decimal numbers.
12442 we're also sent here when we read a 0 as the first digit
12443 */
12444 case '1': case '2': case '3': case '4': case '5':
12445 case '6': case '7': case '8': case '9': case '.':
12446 decimal:
12447 d = PL_tokenbuf;
12448 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12449 floatit = FALSE;
12450 if (hexfp) {
12451 floatit = TRUE;
12452 *d++ = '0';
12453 switch (shift) {
12454 case 4:
12455 *d++ = 'x';
12456 s = start + 2;
12457 break;
12458 case 3:
12459 if (new_octal) {
12460 *d++ = 'o';
12461 s = start + 2;
12462 break;
12463 }
12464 s = start + 1;
12465 break;
12466 case 1:
12467 *d++ = 'b';
12468 s = start + 2;
12469 break;
12470 default:
12471 NOT_REACHED; /* NOTREACHED */
12472 }
12473 }
12474
12475 /* read next group of digits and _ and copy into d */
12476 while (isDIGIT(*s)
12477 || *s == '_'
12478 || UNLIKELY(hexfp && isXDIGIT(*s)))
12479 {
12480 /* skip underscores, checking for misplaced ones
12481 if -w is on
12482 */
12483 if (*s == '_') {
12484 if (lastub && s == lastub + 1)
12485 WARN_ABOUT_UNDERSCORE();
12486 lastub = s++;
12487 }
12488 else {
12489 /* check for end of fixed-length buffer */
12490 if (d >= e)
12491 Perl_croak(aTHX_ "%s", number_too_long);
12492 /* if we're ok, copy the character */
12493 *d++ = *s++;
12494 }
12495 }
12496
12497 /* final misplaced underbar check */
12498 if (lastub && s == lastub + 1)
12499 WARN_ABOUT_UNDERSCORE();
12500
12501 /* read a decimal portion if there is one. avoid
12502 3..5 being interpreted as the number 3. followed
12503 by .5
12504 */
12505 if (*s == '.' && s[1] != '.') {
12506 floatit = TRUE;
12507 *d++ = *s++;
12508
12509 if (*s == '_') {
12510 WARN_ABOUT_UNDERSCORE();
12511 lastub = s;
12512 }
12513
12514 /* copy, ignoring underbars, until we run out of digits.
12515 */
12516 for (; isDIGIT(*s)
12517 || *s == '_'
12518 || UNLIKELY(hexfp && isXDIGIT(*s));
12519 s++)
12520 {
12521 /* fixed length buffer check */
12522 if (d >= e)
12523 Perl_croak(aTHX_ "%s", number_too_long);
12524 if (*s == '_') {
12525 if (lastub && s == lastub + 1)
12526 WARN_ABOUT_UNDERSCORE();
12527 lastub = s;
12528 }
12529 else
12530 *d++ = *s;
12531 }
12532 /* fractional part ending in underbar? */
12533 if (s[-1] == '_')
12534 WARN_ABOUT_UNDERSCORE();
12535 if (*s == '.' && isDIGIT(s[1])) {
12536 /* oops, it's really a v-string, but without the "v" */
12537 s = start;
12538 goto vstring;
12539 }
12540 }
12541
12542 /* read exponent part, if present */
12543 if ((isALPHA_FOLD_EQ(*s, 'e')
12544 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
12545 && memCHRs("+-0123456789_", s[1]))
12546 {
12547 int exp_digits = 0;
12548 const char *save_s = s;
12549 char * save_d = d;
12550
12551 /* regardless of whether user said 3E5 or 3e5, use lower 'e',
12552 ditto for p (hexfloats) */
12553 if ((isALPHA_FOLD_EQ(*s, 'e'))) {
12554 /* At least some Mach atof()s don't grok 'E' */
12555 *d++ = 'e';
12556 }
12557 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
12558 *d++ = 'p';
12559 }
12560
12561 s++;
12562
12563
12564 /* stray preinitial _ */
12565 if (*s == '_') {
12566 WARN_ABOUT_UNDERSCORE();
12567 lastub = s++;
12568 }
12569
12570 /* allow positive or negative exponent */
12571 if (*s == '+' || *s == '-')
12572 *d++ = *s++;
12573
12574 /* stray initial _ */
12575 if (*s == '_') {
12576 WARN_ABOUT_UNDERSCORE();
12577 lastub = s++;
12578 }
12579
12580 /* read digits of exponent */
12581 while (isDIGIT(*s) || *s == '_') {
12582 if (isDIGIT(*s)) {
12583 ++exp_digits;
12584 if (d >= e)
12585 Perl_croak(aTHX_ "%s", number_too_long);
12586 *d++ = *s++;
12587 }
12588 else {
12589 if (((lastub && s == lastub + 1)
12590 || (!isDIGIT(s[1]) && s[1] != '_')))
12591 WARN_ABOUT_UNDERSCORE();
12592 lastub = s++;
12593 }
12594 }
12595
12596 if (!exp_digits) {
12597 /* no exponent digits, the [eEpP] could be for something else,
12598 * though in practice we don't get here for p since that's preparsed
12599 * earlier, and results in only the 0xX being consumed, so behave similarly
12600 * for decimal floats and consume only the D.DD, leaving the [eE] to the
12601 * next token.
12602 */
12603 s = save_s;
12604 d = save_d;
12605 }
12606 else {
12607 floatit = TRUE;
12608 }
12609 }
12610
12611
12612 /*
12613 We try to do an integer conversion first if no characters
12614 indicating "float" have been found.
12615 */
12616
12617 if (!floatit) {
12618 UV uv;
12619 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12620
12621 if (flags == IS_NUMBER_IN_UV) {
12622 if (uv <= IV_MAX)
12623 sv = newSViv(uv); /* Prefer IVs over UVs. */
12624 else
12625 sv = newSVuv(uv);
12626 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12627 if (uv <= (UV) IV_MIN)
12628 sv = newSViv(-(IV)uv);
12629 else
12630 floatit = TRUE;
12631 } else
12632 floatit = TRUE;
12633 }
12634 if (floatit) {
12635 /* terminate the string */
12636 *d = '\0';
12637 if (UNLIKELY(hexfp)) {
12638# ifdef NV_MANT_DIG
12639 if (significant_bits > NV_MANT_DIG)
12640 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12641 "Hexadecimal float: mantissa overflow");
12642# endif
12643#ifdef HEXFP_UQUAD
12644 nv = hexfp_uquad * hexfp_mult;
12645#else /* HEXFP_NV */
12646 nv = hexfp_nv * hexfp_mult;
12647#endif
12648 } else {
12649 nv = Atof(PL_tokenbuf);
12650 }
12651 sv = newSVnv(nv);
12652 }
12653
12654 if ( floatit
12655 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12656 const char *const key = floatit ? "float" : "integer";
12657 const STRLEN keylen = floatit ? 5 : 7;
12658 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12659 key, keylen, sv, NULL, NULL, 0, NULL);
12660 }
12661 break;
12662
12663 /* if it starts with a v, it could be a v-string */
12664 case 'v':
12665 vstring:
12666 sv = newSV(5); /* preallocate storage space */
12667 ENTER_with_name("scan_vstring");
12668 SAVEFREESV(sv);
12669 s = scan_vstring(s, PL_bufend, sv);
12670 SvREFCNT_inc_simple_void_NN(sv);
12671 LEAVE_with_name("scan_vstring");
12672 break;
12673 }
12674
12675 /* make the op for the constant and return */
12676
12677 if (sv)
12678 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12679 else
12680 lvalp->opval = NULL;
12681
12682 return (char *)s;
12683}
12684
12685STATIC char *
12686S_scan_formline(pTHX_ char *s)
12687{
12688 SV * const stuff = newSVpvs("");
12689 bool needargs = FALSE;
12690 bool eofmt = FALSE;
12691
12692 PERL_ARGS_ASSERT_SCAN_FORMLINE;
12693
12694 while (!needargs) {
12695 char *eol;
12696 if (*s == '.') {
12697 char *t = s+1;
12698#ifdef PERL_STRICT_CR
12699 while (SPACE_OR_TAB(*t))
12700 t++;
12701#else
12702 while (SPACE_OR_TAB(*t) || *t == '\r')
12703 t++;
12704#endif
12705 if (*t == '\n' || t == PL_bufend) {
12706 eofmt = TRUE;
12707 break;
12708 }
12709 }
12710 eol = (char *) memchr(s,'\n',PL_bufend-s);
12711 if (! eol) {
12712 eol = PL_bufend;
12713 }
12714 else {
12715 eol++;
12716 }
12717 if (*s != '#') {
12718 char *t;
12719 for (t = s; t < eol; t++) {
12720 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12721 needargs = FALSE;
12722 goto enough; /* ~~ must be first line in formline */
12723 }
12724 if (*t == '@' || *t == '^')
12725 needargs = TRUE;
12726 }
12727 if (eol > s) {
12728 sv_catpvn(stuff, s, eol-s);
12729#ifndef PERL_STRICT_CR
12730 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12731 char *end = SvPVX(stuff) + SvCUR(stuff);
12732 end[-2] = '\n';
12733 end[-1] = '\0';
12734 SvCUR_set(stuff, SvCUR(stuff) - 1);
12735 }
12736#endif
12737 }
12738 else
12739 break;
12740 }
12741 s = (char*)eol;
12742 if ((PL_rsfp || PL_parser->filtered)
12743 && PL_parser->form_lex_state == LEX_NORMAL) {
12744 bool got_some;
12745 PL_bufptr = PL_bufend;
12746 COPLINE_INC_WITH_HERELINES;
12747 got_some = lex_next_chunk(0);
12748 CopLINE_dec(PL_curcop);
12749 s = PL_bufptr;
12750 if (!got_some)
12751 break;
12752 }
12753 incline(s, PL_bufend);
12754 }
12755 enough:
12756 if (!SvCUR(stuff) || needargs)
12757 PL_lex_state = PL_parser->form_lex_state;
12758 if (SvCUR(stuff)) {
12759 PL_expect = XSTATE;
12760 if (needargs) {
12761 const char *s2 = s;
12762 while (isSPACE(*s2) && *s2 != '\n')
12763 s2++;
12764 if (*s2 == '{') {
12765 PL_expect = XTERMBLOCK;
12766 NEXTVAL_NEXTTOKE.ival = 0;
12767 force_next(KW_DO);
12768 }
12769 NEXTVAL_NEXTTOKE.ival = 0;
12770 force_next(FORMLBRACK);
12771 }
12772 if (!IN_BYTES) {
12773 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12774 SvUTF8_on(stuff);
12775 }
12776 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
12777 force_next(THING);
12778 }
12779 else {
12780 SvREFCNT_dec(stuff);
12781 if (eofmt)
12782 PL_lex_formbrack = 0;
12783 }
12784 return s;
12785}
12786
12787/*
12788=for apidoc start_subparse
12789
12790Set things up for parsing a subroutine.
12791
12792If C<is_format> is non-zero, the input is to be considered a format sub
12793(a specialised sub used to implement perl's C<format> feature); else a
12794normal C<sub>.
12795
12796C<flags> are added to the flags for C<PL_compcv>. C<flags> may include the
12797C<CVf_IsMETHOD> bit, which causes the new subroutine to be a method.
12798
12799This returns the value of C<PL_savestack_ix> that was in effect upon entry to
12800the function;
12801
12802=cut
12803*/
12804
12805I32
12806Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12807{
12808 const I32 oldsavestack_ix = PL_savestack_ix;
12809 CV* const outsidecv = PL_compcv;
12810 bool is_method = flags & CVf_IsMETHOD;
12811
12812 if (is_method)
12813 croak_kw_unless_class("method");
12814
12815 SAVEI32(PL_subline);
12816 save_item(PL_subname);
12817 SAVESPTR(PL_compcv);
12818
12819 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12820 CvFLAGS(PL_compcv) |= flags;
12821
12822 PL_subline = CopLINE(PL_curcop);
12823 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12824 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12825 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12826 if (outsidecv && CvPADLIST(outsidecv))
12827 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
12828 if (is_method)
12829 class_prepare_method_parse(PL_compcv);
12830
12831 return oldsavestack_ix;
12832}
12833
12834/* If o represents a builtin attribute, apply it to cv and returns true.
12835 * Otherwise does nothing and returns false
12836 */
12837
12838STATIC bool
12839S_apply_builtin_cv_attribute(pTHX_ CV *cv, OP *o)
12840{
12841 assert(o->op_type == OP_CONST);
12842 SV *sv = cSVOPo_sv;
12843 STRLEN len = SvCUR(sv);
12844
12845 /* NOTE: any CV attrs applied here need to be part of
12846 the CVf_BUILTIN_ATTRS define in cv.h! */
12847
12848 if(memEQs(SvPVX(sv), len, "lvalue"))
12849 CvLVALUE_on(cv);
12850 else if(memEQs(SvPVX(sv), len, "method"))
12851 CvNOWARN_AMBIGUOUS_on(cv);
12852 else if(memEQs(SvPVX(sv), len, "const")) {
12853 Perl_ck_warner_d(aTHX_
12854 packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
12855 ":const is experimental"
12856 );
12857 CvANONCONST_on(cv);
12858 if (!CvANON(cv))
12859 yyerror(":const is not permitted on named subroutines");
12860 }
12861 else
12862 return false;
12863
12864 return true;
12865}
12866
12867/*
12868=for apidoc apply_builtin_cv_attributes
12869
12870Given an OP_LIST containing attribute definitions, filter it for known builtin
12871attributes to apply to the cv, returning a possibly-smaller list containing
12872just the remaining ones.
12873
12874=cut
12875*/
12876
12877OP *
12878Perl_apply_builtin_cv_attributes(pTHX_ CV *cv, OP *attrlist)
12879{
12880 PERL_ARGS_ASSERT_APPLY_BUILTIN_CV_ATTRIBUTES;
12881
12882 if(!attrlist)
12883 return attrlist;
12884
12885 if(attrlist->op_type != OP_LIST) {
12886 /* Not in fact a list but just a single attribute */
12887 if(S_apply_builtin_cv_attribute(aTHX_ cv, attrlist)) {
12888 op_free(attrlist);
12889 return NULL;
12890 }
12891
12892 return attrlist;
12893 }
12894
12895 OP *prev = cLISTOPx(attrlist)->op_first;
12896 assert(prev->op_type == OP_PUSHMARK);
12897 OP *o = OpSIBLING(prev);
12898
12899 OP *next;
12900 for(; o; o = next) {
12901 next = OpSIBLING(o);
12902
12903 if(S_apply_builtin_cv_attribute(aTHX_ cv, o)) {
12904 op_sibling_splice(attrlist, prev, 1, NULL);
12905 op_free(o);
12906 }
12907 else {
12908 prev = o;
12909 }
12910 }
12911
12912 if(OpHAS_SIBLING(cLISTOPx(attrlist)->op_first))
12913 return attrlist;
12914
12915 /* The list is now entirely empty, we might as well discard it */
12916 op_free(attrlist);
12917 return NULL;
12918}
12919
12920
12921/* Do extra initialisation of a CV (typically one just created by
12922 * start_subparse()) if that CV is for a named sub
12923 */
12924
12925void
12926Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
12927{
12928 PERL_ARGS_ASSERT_INIT_NAMED_CV;
12929
12930 if (nameop->op_type == OP_CONST) {
12931 const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
12932 if ( strEQ(name, "BEGIN")
12933 || strEQ(name, "END")
12934 || strEQ(name, "INIT")
12935 || strEQ(name, "CHECK")
12936 || strEQ(name, "UNITCHECK")
12937 )
12938 CvSPECIAL_on(cv);
12939 }
12940 else
12941 /* State subs inside anonymous subs need to be
12942 clonable themselves. */
12943 if ( CvANON(CvOUTSIDE(cv))
12944 || CvCLONE(CvOUTSIDE(cv))
12945 || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
12946 CvOUTSIDE(cv)
12947 ))[nameop->op_targ])
12948 )
12949 CvCLONE_on(cv);
12950}
12951
12952
12953static int
12954S_yywarn(pTHX_ const char *const s, U32 flags)
12955{
12956 PERL_ARGS_ASSERT_YYWARN;
12957
12958 PL_in_eval |= EVAL_WARNONLY;
12959 yyerror_pv(s, flags);
12960 return 0;
12961}
12962
12963void
12964Perl_abort_execution(pTHX_ SV* msg_sv, const char * const name)
12965{
12966 PERL_ARGS_ASSERT_ABORT_EXECUTION;
12967
12968 if (msg_sv) {
12969 if (PL_minus_c)
12970 Perl_croak(aTHX_ "%" SVf "%s had compilation errors.\n", SVfARG(msg_sv), name);
12971 else {
12972 Perl_croak(aTHX_
12973 "%" SVf "Execution of %s aborted due to compilation errors.\n", SVfARG(msg_sv), name);
12974 }
12975 } else {
12976 if (PL_minus_c)
12977 Perl_croak(aTHX_ "%s had compilation errors.\n", name);
12978 else {
12979 Perl_croak(aTHX_
12980 "Execution of %s aborted due to compilation errors.\n", name);
12981 }
12982 }
12983
12984 NOT_REACHED; /* NOTREACHED */
12985}
12986
12987void
12988Perl_yyquit(pTHX)
12989{
12990 /* Called, after at least one error has been found, to abort the parse now,
12991 * instead of trying to forge ahead */
12992
12993 yyerror_pvn(NULL, 0, 0);
12994}
12995
12996int
12997Perl_yyerror(pTHX_ const char *const s)
12998{
12999 PERL_ARGS_ASSERT_YYERROR;
13000 int r = yyerror_pvn(s, strlen(s), 0);
13001 return r;
13002}
13003
13004int
13005Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
13006{
13007 PERL_ARGS_ASSERT_YYERROR_PV;
13008 int r = yyerror_pvn(s, strlen(s), flags);
13009 return r;
13010}
13011
13012int
13013Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
13014{
13015 const char *context = NULL;
13016 int contlen = -1;
13017 SV *msg;
13018 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
13019 int yychar = PL_parser->yychar;
13020
13021 /* Output error message 's' with length 'len'. 'flags' are SV flags that
13022 * apply. If the number of errors found is large enough, it abandons
13023 * parsing. If 's' is NULL, there is no message, and it abandons
13024 * processing unconditionally */
13025
13026 if (s != NULL) {
13027 if (!yychar || (yychar == PERLY_SEMICOLON && !PL_rsfp))
13028 sv_catpvs(where_sv, "at EOF");
13029 else if ( PL_oldoldbufptr
13030 && PL_bufptr > PL_oldoldbufptr
13031 && PL_bufptr - PL_oldoldbufptr < 200
13032 && PL_oldoldbufptr != PL_oldbufptr
13033 && PL_oldbufptr != PL_bufptr)
13034 {
13035 while (isSPACE(*PL_oldoldbufptr))
13036 PL_oldoldbufptr++;
13037 context = PL_oldoldbufptr;
13038 contlen = PL_bufptr - PL_oldoldbufptr;
13039 }
13040 else if ( PL_oldbufptr
13041 && PL_bufptr > PL_oldbufptr
13042 && PL_bufptr - PL_oldbufptr < 200
13043 && PL_oldbufptr != PL_bufptr)
13044 {
13045 while (isSPACE(*PL_oldbufptr))
13046 PL_oldbufptr++;
13047 context = PL_oldbufptr;
13048 contlen = PL_bufptr - PL_oldbufptr;
13049 }
13050 else if (yychar > 255)
13051 sv_catpvs(where_sv, "next token ???");
13052 else if (yychar == YYEMPTY) {
13053 if (PL_lex_state == LEX_NORMAL)
13054 sv_catpvs(where_sv, "at end of line");
13055 else if (PL_lex_inpat)
13056 sv_catpvs(where_sv, "within pattern");
13057 else
13058 sv_catpvs(where_sv, "within string");
13059 }
13060 else {
13061 sv_catpvs(where_sv, "next char ");
13062 if (yychar < 32)
13063 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13064 else if (isPRINT_LC(yychar)) {
13065 const char string = yychar;
13066 sv_catpvn(where_sv, &string, 1);
13067 }
13068 else
13069 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13070 }
13071 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
13072 Perl_sv_catpvf(aTHX_ msg, " at %s line %" LINE_Tf ", ",
13073 OutCopFILE(PL_curcop),
13074 (PL_parser->preambling == NOLINE
13075 ? CopLINE(PL_curcop)
13076 : PL_parser->preambling));
13077 if (context)
13078 Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
13079 UTF8fARG(UTF, contlen, context));
13080 else
13081 Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
13082 if ( PL_multi_start < PL_multi_end
13083 && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
13084 {
13085 Perl_sv_catpvf(aTHX_ msg,
13086 " (Might be a runaway multi-line %c%c string starting on"
13087 " line %" LINE_Tf ")\n",
13088 (int)PL_multi_open,(int)PL_multi_close,(line_t)PL_multi_start);
13089 PL_multi_end = 0;
13090 }
13091 if (PL_in_eval & EVAL_WARNONLY) {
13092 PL_in_eval &= ~EVAL_WARNONLY;
13093 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
13094 }
13095 else {
13096 qerror(msg);
13097 }
13098 }
13099 /* if there was no message then this is a yyquit(), which is actualy handled
13100 * by qerror() with a NULL argument */
13101 if (s == NULL)
13102 qerror(NULL);
13103
13104 PL_in_my = 0;
13105 PL_in_my_stash = NULL;
13106 return 0;
13107}
13108
13109STATIC char*
13110S_swallow_bom(pTHX_ U8 *s)
13111{
13112 const STRLEN slen = SvCUR(PL_linestr);
13113
13114 PERL_ARGS_ASSERT_SWALLOW_BOM;
13115
13116 switch (s[0]) {
13117 case 0xFF:
13118 if (s[1] == 0xFE) {
13119 /* UTF-16 little-endian? (or UTF-32LE?) */
13120 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
13121 /* diag_listed_as: Unsupported script encoding %s */
13122 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
13123#ifndef PERL_NO_UTF16_FILTER
13124#ifdef DEBUGGING
13125 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
13126#endif
13127 s += 2;
13128 if (PL_bufend > (char*)s) {
13129 s = add_utf16_textfilter(s, TRUE);
13130 }
13131#else
13132 /* diag_listed_as: Unsupported script encoding %s */
13133 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13134#endif
13135 }
13136 break;
13137 case 0xFE:
13138 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
13139#ifndef PERL_NO_UTF16_FILTER
13140#ifdef DEBUGGING
13141 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13142#endif
13143 s += 2;
13144 if (PL_bufend > (char *)s) {
13145 s = add_utf16_textfilter(s, FALSE);
13146 }
13147#else
13148 /* diag_listed_as: Unsupported script encoding %s */
13149 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13150#endif
13151 }
13152 break;
13153 case BOM_UTF8_FIRST_BYTE: {
13154 if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
13155#ifdef DEBUGGING
13156 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13157#endif
13158 s += sizeof(BOM_UTF8) - 1; /* UTF-8 */
13159 }
13160 break;
13161 }
13162 case 0:
13163 if (slen > 3) {
13164 if (s[1] == 0) {
13165 if (s[2] == 0xFE && s[3] == 0xFF) {
13166 /* UTF-32 big-endian */
13167 /* diag_listed_as: Unsupported script encoding %s */
13168 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13169 }
13170 }
13171 else if (s[2] == 0 && s[3] != 0) {
13172 /* Leading bytes
13173 * 00 xx 00 xx
13174 * are a good indicator of UTF-16BE. */
13175#ifndef PERL_NO_UTF16_FILTER
13176#ifdef DEBUGGING
13177 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13178#endif
13179 s = add_utf16_textfilter(s, FALSE);
13180#else
13181 /* diag_listed_as: Unsupported script encoding %s */
13182 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13183#endif
13184 }
13185 }
13186 break;
13187
13188 default:
13189 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13190 /* Leading bytes
13191 * xx 00 xx 00
13192 * are a good indicator of UTF-16LE. */
13193#ifndef PERL_NO_UTF16_FILTER
13194#ifdef DEBUGGING
13195 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13196#endif
13197 s = add_utf16_textfilter(s, TRUE);
13198#else
13199 /* diag_listed_as: Unsupported script encoding %s */
13200 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13201#endif
13202 }
13203 }
13204 return (char*)s;
13205}
13206
13207
13208#ifndef PERL_NO_UTF16_FILTER
13209static I32
13210S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13211{
13212 SV *const filter = FILTER_DATA(idx);
13213 /* We re-use this each time round, throwing the contents away before we
13214 return. */
13215 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13216 SV *const utf8_buffer = filter;
13217 IV status = IoPAGE(filter);
13218 const bool reverse = cBOOL(IoLINES(filter));
13219 I32 retval;
13220
13221 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13222
13223 /* As we're automatically added, at the lowest level, and hence only called
13224 from this file, we can be sure that we're not called in block mode. Hence
13225 don't bother writing code to deal with block mode. */
13226 if (maxlen) {
13227 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13228 }
13229 if (status < 0) {
13230 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
13231 }
13232 DEBUG_P(PerlIO_printf(Perl_debug_log,
13233 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
13234 FPTR2DPTR(void *, S_utf16_textfilter),
13235 reverse ? 'l' : 'b', idx, maxlen, status,
13236 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13237
13238 while (1) {
13239 STRLEN chars;
13240 STRLEN have;
13241 Size_t newlen;
13242 U8 *end;
13243 /* First, look in our buffer of existing UTF-8 data: */
13244 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13245
13246 if (nl) {
13247 ++nl;
13248 } else if (status == 0) {
13249 /* EOF */
13250 IoPAGE(filter) = 0;
13251 nl = SvEND(utf8_buffer);
13252 }
13253 if (nl) {
13254 STRLEN got = nl - SvPVX(utf8_buffer);
13255 /* Did we have anything to append? */
13256 retval = got != 0;
13257 sv_catpvn(sv, SvPVX(utf8_buffer), got);
13258 /* Everything else in this code works just fine if SVp_POK isn't
13259 set. This, however, needs it, and we need it to work, else
13260 we loop infinitely because the buffer is never consumed. */
13261 sv_chop(utf8_buffer, nl);
13262 break;
13263 }
13264
13265 /* OK, not a complete line there, so need to read some more UTF-16.
13266 Read an extra octect if the buffer currently has an odd number. */
13267 while (1) {
13268 if (status <= 0)
13269 break;
13270 if (SvCUR(utf16_buffer) >= 2) {
13271 /* Location of the high octet of the last complete code point.
13272 Gosh, UTF-16 is a pain. All the benefits of variable length,
13273 *coupled* with all the benefits of partial reads and
13274 endianness. */
13275 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13276 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13277
13278 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13279 break;
13280 }
13281
13282 /* We have the first half of a surrogate. Read more. */
13283 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13284 }
13285
13286 status = FILTER_READ(idx + 1, utf16_buffer,
13287 160 + (SvCUR(utf16_buffer) & 1));
13288 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
13289 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13290 if (status < 0) {
13291 /* Error */
13292 IoPAGE(filter) = status;
13293 return status;
13294 }
13295 }
13296
13297 /* 'chars' isn't quite the right name, as code points above 0xFFFF
13298 * require 4 bytes per char */
13299 chars = SvCUR(utf16_buffer) >> 1;
13300 have = SvCUR(utf8_buffer);
13301
13302 /* Assume the worst case size as noted by the functions: twice the
13303 * number of input bytes */
13304 SvGROW(utf8_buffer, have + chars * 4 + 1);
13305
13306 if (reverse) {
13307 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13308 (U8*)SvPVX_const(utf8_buffer) + have,
13309 chars * 2, &newlen);
13310 } else {
13311 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13312 (U8*)SvPVX_const(utf8_buffer) + have,
13313 chars * 2, &newlen);
13314 }
13315 SvCUR_set(utf8_buffer, have + newlen);
13316 *end = '\0';
13317
13318 /* No need to keep this SV "well-formed" with a '\0' after the end, as
13319 it's private to us, and utf16_to_utf8{,reversed} take a
13320 (pointer,length) pair, rather than a NUL-terminated string. */
13321 if(SvCUR(utf16_buffer) & 1) {
13322 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13323 SvCUR_set(utf16_buffer, 1);
13324 } else {
13325 SvCUR_set(utf16_buffer, 0);
13326 }
13327 }
13328 DEBUG_P(PerlIO_printf(Perl_debug_log,
13329 "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
13330 status,
13331 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13332 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13333 return retval;
13334}
13335
13336static U8 *
13337S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13338{
13339 SV *filter = filter_add(S_utf16_textfilter, NULL);
13340
13341 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13342
13343 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13344 SvPVCLEAR(filter);
13345 IoLINES(filter) = reversed;
13346 IoPAGE(filter) = 1; /* Not EOF */
13347
13348 /* Sadly, we have to return a valid pointer, come what may, so we have to
13349 ignore any error return from this. */
13350 SvCUR_set(PL_linestr, 0);
13351 if (FILTER_READ(0, PL_linestr, 0)) {
13352 SvUTF8_on(PL_linestr);
13353 } else {
13354 SvUTF8_on(PL_linestr);
13355 }
13356 PL_bufend = SvEND(PL_linestr);
13357 return (U8*)SvPVX(PL_linestr);
13358}
13359#endif
13360
13361/*
13362=for apidoc scan_vstring
13363
13364Returns a pointer to the next character after the parsed
13365vstring, as well as updating the passed in sv.
13366
13367Function must be called like
13368
13369 sv = sv_2mortal(newSV(5));
13370 s = scan_vstring(s,e,sv);
13371
13372where s and e are the start and end of the string.
13373The sv should already be large enough to store the vstring
13374passed in, for performance reasons.
13375
13376This function may croak if fatal warnings are enabled in the
13377calling scope, hence the sv_2mortal in the example (to prevent
13378a leak). Make sure to do SvREFCNT_inc afterwards if you use
13379sv_2mortal.
13380
13381=cut
13382*/
13383
13384char *
13385Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13386{
13387 const char *pos = s;
13388 const char *start = s;
13389
13390 PERL_ARGS_ASSERT_SCAN_VSTRING;
13391
13392 if (*pos == 'v') pos++; /* get past 'v' */
13393 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13394 pos++;
13395 if ( *pos != '.') {
13396 /* this may not be a v-string if followed by => */
13397 const char *next = pos;
13398 while (next < e && isSPACE(*next))
13399 ++next;
13400 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13401 /* return string not v-string */
13402 sv_setpvn(sv,(char *)s,pos-s);
13403 return (char *)pos;
13404 }
13405 }
13406
13407 if (!isALPHA(*pos)) {
13408 U8 tmpbuf[UTF8_MAXBYTES+1];
13409
13410 if (*s == 'v')
13411 s++; /* get past 'v' */
13412
13413 SvPVCLEAR(sv);
13414
13415 for (;;) {
13416 /* this is atoi() that tolerates underscores */
13417 U8 *tmpend;
13418 UV rev = 0;
13419 const char *end = pos;
13420 UV mult = 1;
13421 while (--end >= s) {
13422 if (*end != '_') {
13423 const UV orev = rev;
13424 rev += (*end - '0') * mult;
13425 mult *= 10;
13426 if (orev > rev)
13427 /* diag_listed_as: Integer overflow in %s number */
13428 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13429 "Integer overflow in decimal number");
13430 }
13431 }
13432
13433 /* Append native character for the rev point */
13434 tmpend = uvchr_to_utf8(tmpbuf, rev);
13435 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13436 if (!UVCHR_IS_INVARIANT(rev))
13437 SvUTF8_on(sv);
13438 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13439 s = ++pos;
13440 else {
13441 s = pos;
13442 break;
13443 }
13444 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13445 pos++;
13446 }
13447 SvPOK_on(sv);
13448 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13449 SvRMAGICAL_on(sv);
13450 }
13451 return (char *)s;
13452}
13453
13454int
13455Perl_keyword_plugin_standard(pTHX_
13456 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13457{
13458 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13459 PERL_UNUSED_CONTEXT;
13460 PERL_UNUSED_ARG(keyword_ptr);
13461 PERL_UNUSED_ARG(keyword_len);
13462 PERL_UNUSED_ARG(op_ptr);
13463 return KEYWORD_PLUGIN_DECLINE;
13464}
13465
13466STRLEN
13467Perl_infix_plugin_standard(pTHX_
13468 char *operator_ptr, STRLEN operator_len, struct Perl_custom_infix **def)
13469{
13470 PERL_ARGS_ASSERT_INFIX_PLUGIN_STANDARD;
13471 PERL_UNUSED_CONTEXT;
13472 PERL_UNUSED_ARG(operator_ptr);
13473 PERL_UNUSED_ARG(operator_len);
13474 PERL_UNUSED_ARG(def);
13475 return 0;
13476}
13477
13478/*
13479=for apidoc_section $lexer
13480=for apidoc wrap_keyword_plugin
13481
13482Puts a C function into the chain of keyword plugins. This is the
13483preferred way to manipulate the L</PL_keyword_plugin> variable.
13484C<new_plugin> is a pointer to the C function that is to be added to the
13485keyword plugin chain, and C<old_plugin_p> points to the storage location
13486where a pointer to the next function in the chain will be stored. The
13487value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
13488while the value previously stored there is written to C<*old_plugin_p>.
13489
13490L</PL_keyword_plugin> is global to an entire process, and a module wishing
13491to hook keyword parsing may find itself invoked more than once per
13492process, typically in different threads. To handle that situation, this
13493function is idempotent. The location C<*old_plugin_p> must initially
13494(once per process) contain a null pointer. A C variable of static
13495duration (declared at file scope, typically also marked C<static> to give
13496it internal linkage) will be implicitly initialised appropriately, if it
13497does not have an explicit initialiser. This function will only actually
13498modify the plugin chain if it finds C<*old_plugin_p> to be null. This
13499function is also thread safe on the small scale. It uses appropriate
13500locking to avoid race conditions in accessing L</PL_keyword_plugin>.
13501
13502When this function is called, the function referenced by C<new_plugin>
13503must be ready to be called, except for C<*old_plugin_p> being unfilled.
13504In a threading situation, C<new_plugin> may be called immediately, even
13505before this function has returned. C<*old_plugin_p> will always be
13506appropriately set before C<new_plugin> is called. If C<new_plugin>
13507decides not to do anything special with the identifier that it is given
13508(which is the usual case for most calls to a keyword plugin), it must
13509chain the plugin function referenced by C<*old_plugin_p>.
13510
13511Taken all together, XS code to install a keyword plugin should typically
13512look something like this:
13513
13514 static Perl_keyword_plugin_t next_keyword_plugin;
13515 static OP *my_keyword_plugin(pTHX_
13516 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13517 {
13518 if (memEQs(keyword_ptr, keyword_len,
13519 "my_new_keyword")) {
13520 ...
13521 } else {
13522 return next_keyword_plugin(aTHX_
13523 keyword_ptr, keyword_len, op_ptr);
13524 }
13525 }
13526 BOOT:
13527 wrap_keyword_plugin(my_keyword_plugin,
13528 &next_keyword_plugin);
13529
13530Direct access to L</PL_keyword_plugin> should be avoided.
13531
13532=cut
13533*/
13534
13535void
13536Perl_wrap_keyword_plugin(pTHX_
13537 Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
13538{
13539
13540 PERL_UNUSED_CONTEXT;
13541 PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
13542 if (*old_plugin_p) return;
13543 KEYWORD_PLUGIN_MUTEX_LOCK;
13544 if (!*old_plugin_p) {
13545 *old_plugin_p = PL_keyword_plugin;
13546 PL_keyword_plugin = new_plugin;
13547 }
13548 KEYWORD_PLUGIN_MUTEX_UNLOCK;
13549}
13550
13551/*
13552=for apidoc wrap_infix_plugin
13553
13554B<NOTE:> This API exists entirely for the purpose of making the CPAN module
13555C<XS::Parse::Infix> work. It is not expected that additional modules will make
13556use of it; rather, that they should use C<XS::Parse::Infix> to provide parsing
13557of new infix operators.
13558
13559Puts a C function into the chain of infix plugins. This is the preferred
13560way to manipulate the L</PL_infix_plugin> variable. C<new_plugin> is a
13561pointer to the C function that is to be added to the infix plugin chain, and
13562C<old_plugin_p> points to a storage location where a pointer to the next
13563function in the chain will be stored. The value of C<new_plugin> is written
13564into the L</PL_infix_plugin> variable, while the value previously stored there
13565is written to C<*old_plugin_p>.
13566
13567Direct access to L</PL_infix_plugin> should be avoided.
13568
13569=cut
13570*/
13571
13572void
13573Perl_wrap_infix_plugin(pTHX_
13574 Perl_infix_plugin_t new_plugin, Perl_infix_plugin_t *old_plugin_p)
13575{
13576
13577 PERL_UNUSED_CONTEXT;
13578 PERL_ARGS_ASSERT_WRAP_INFIX_PLUGIN;
13579 if (*old_plugin_p) return;
13580 /* We use the same mutex as for PL_keyword_plugin as it's so rare either
13581 * of them is actually updated; no need for a dedicated one each */
13582 KEYWORD_PLUGIN_MUTEX_LOCK;
13583 if (!*old_plugin_p) {
13584 *old_plugin_p = PL_infix_plugin;
13585 PL_infix_plugin = new_plugin;
13586 }
13587 KEYWORD_PLUGIN_MUTEX_UNLOCK;
13588}
13589
13590#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
13591static void
13592S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
13593{
13594 SAVEI32(PL_lex_brackets);
13595 if (PL_lex_brackets > 100)
13596 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
13597 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
13598 SAVEI32(PL_lex_allbrackets);
13599 PL_lex_allbrackets = 0;
13600 SAVEI8(PL_lex_fakeeof);
13601 PL_lex_fakeeof = (U8)fakeeof;
13602 if(yyparse(gramtype) && !PL_parser->error_count)
13603 qerror(Perl_mess(aTHX_ "Parse error"));
13604}
13605
13606#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
13607static OP *
13608S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
13609{
13610 OP *o;
13611 ENTER;
13612 SAVEVPTR(PL_eval_root);
13613 PL_eval_root = NULL;
13614 parse_recdescent(gramtype, fakeeof);
13615 o = PL_eval_root;
13616 LEAVE;
13617 return o;
13618}
13619
13620#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
13621static OP *
13622S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
13623{
13624 OP *exprop;
13625 if (flags & ~PARSE_OPTIONAL)
13626 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
13627 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
13628 if (!exprop && !(flags & PARSE_OPTIONAL)) {
13629 if (!PL_parser->error_count)
13630 qerror(Perl_mess(aTHX_ "Parse error"));
13631 exprop = newOP(OP_NULL, 0);
13632 }
13633 return exprop;
13634}
13635
13636/*
13637=for apidoc parse_arithexpr
13638
13639Parse a Perl arithmetic expression. This may contain operators of precedence
13640down to the bit shift operators. The expression must be followed (and thus
13641terminated) either by a comparison or lower-precedence operator or by
13642something that would normally terminate an expression such as semicolon.
13643If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13644otherwise it is mandatory. It is up to the caller to ensure that the
13645dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13646the source of the code to be parsed and the lexical context for the
13647expression.
13648
13649The op tree representing the expression is returned. If an optional
13650expression is absent, a null pointer is returned, otherwise the pointer
13651will be non-null.
13652
13653If an error occurs in parsing or compilation, in most cases a valid op
13654tree is returned anyway. The error is reflected in the parser state,
13655normally resulting in a single exception at the top level of parsing
13656which covers all the compilation errors that occurred. Some compilation
13657errors, however, will throw an exception immediately.
13658
13659=for apidoc Amnh||PARSE_OPTIONAL
13660
13661=cut
13662
13663*/
13664
13665OP *
13666Perl_parse_arithexpr(pTHX_ U32 flags)
13667{
13668 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
13669}
13670
13671/*
13672=for apidoc parse_termexpr
13673
13674Parse a Perl term expression. This may contain operators of precedence
13675down to the assignment operators. The expression must be followed (and thus
13676terminated) either by a comma or lower-precedence operator or by
13677something that would normally terminate an expression such as semicolon.
13678If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13679otherwise it is mandatory. It is up to the caller to ensure that the
13680dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13681the source of the code to be parsed and the lexical context for the
13682expression.
13683
13684The op tree representing the expression is returned. If an optional
13685expression is absent, a null pointer is returned, otherwise the pointer
13686will be non-null.
13687
13688If an error occurs in parsing or compilation, in most cases a valid op
13689tree is returned anyway. The error is reflected in the parser state,
13690normally resulting in a single exception at the top level of parsing
13691which covers all the compilation errors that occurred. Some compilation
13692errors, however, will throw an exception immediately.
13693
13694=cut
13695*/
13696
13697OP *
13698Perl_parse_termexpr(pTHX_ U32 flags)
13699{
13700 return parse_expr(LEX_FAKEEOF_COMMA, flags);
13701}
13702
13703/*
13704=for apidoc parse_listexpr
13705
13706Parse a Perl list expression. This may contain operators of precedence
13707down to the comma operator. The expression must be followed (and thus
13708terminated) either by a low-precedence logic operator such as C<or> or by
13709something that would normally terminate an expression such as semicolon.
13710If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13711otherwise it is mandatory. It is up to the caller to ensure that the
13712dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13713the source of the code to be parsed and the lexical context for the
13714expression.
13715
13716The op tree representing the expression is returned. If an optional
13717expression is absent, a null pointer is returned, otherwise the pointer
13718will be non-null.
13719
13720If an error occurs in parsing or compilation, in most cases a valid op
13721tree is returned anyway. The error is reflected in the parser state,
13722normally resulting in a single exception at the top level of parsing
13723which covers all the compilation errors that occurred. Some compilation
13724errors, however, will throw an exception immediately.
13725
13726=cut
13727*/
13728
13729OP *
13730Perl_parse_listexpr(pTHX_ U32 flags)
13731{
13732 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
13733}
13734
13735/*
13736=for apidoc parse_fullexpr
13737
13738Parse a single complete Perl expression. This allows the full
13739expression grammar, including the lowest-precedence operators such
13740as C<or>. The expression must be followed (and thus terminated) by a
13741token that an expression would normally be terminated by: end-of-file,
13742closing bracketing punctuation, semicolon, or one of the keywords that
13743signals a postfix expression-statement modifier. If C<flags> has the
13744C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
13745mandatory. It is up to the caller to ensure that the dynamic parser
13746state (L</PL_parser> et al) is correctly set to reflect the source of
13747the code to be parsed and the lexical context for the expression.
13748
13749The op tree representing the expression is returned. If an optional
13750expression is absent, a null pointer is returned, otherwise the pointer
13751will be non-null.
13752
13753If an error occurs in parsing or compilation, in most cases a valid op
13754tree is returned anyway. The error is reflected in the parser state,
13755normally resulting in a single exception at the top level of parsing
13756which covers all the compilation errors that occurred. Some compilation
13757errors, however, will throw an exception immediately.
13758
13759=cut
13760*/
13761
13762OP *
13763Perl_parse_fullexpr(pTHX_ U32 flags)
13764{
13765 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
13766}
13767
13768/*
13769=for apidoc parse_block
13770
13771Parse a single complete Perl code block. This consists of an opening
13772brace, a sequence of statements, and a closing brace. The block
13773constitutes a lexical scope, so C<my> variables and various compile-time
13774effects can be contained within it. It is up to the caller to ensure
13775that the dynamic parser state (L</PL_parser> et al) is correctly set to
13776reflect the source of the code to be parsed and the lexical context for
13777the statement.
13778
13779The op tree representing the code block is returned. This is always a
13780real op, never a null pointer. It will normally be a C<lineseq> list,
13781including C<nextstate> or equivalent ops. No ops to construct any kind
13782of runtime scope are included by virtue of it being a block.
13783
13784If an error occurs in parsing or compilation, in most cases a valid op
13785tree (most likely null) is returned anyway. The error is reflected in
13786the parser state, normally resulting in a single exception at the top
13787level of parsing which covers all the compilation errors that occurred.
13788Some compilation errors, however, will throw an exception immediately.
13789
13790The C<flags> parameter is reserved for future use, and must always
13791be zero.
13792
13793=cut
13794*/
13795
13796OP *
13797Perl_parse_block(pTHX_ U32 flags)
13798{
13799 if (flags)
13800 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
13801 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
13802}
13803
13804/*
13805=for apidoc parse_barestmt
13806
13807Parse a single unadorned Perl statement. This may be a normal imperative
13808statement or a declaration that has compile-time effect. It does not
13809include any label or other affixture. It is up to the caller to ensure
13810that the dynamic parser state (L</PL_parser> et al) is correctly set to
13811reflect the source of the code to be parsed and the lexical context for
13812the statement.
13813
13814The op tree representing the statement is returned. This may be a
13815null pointer if the statement is null, for example if it was actually
13816a subroutine definition (which has compile-time side effects). If not
13817null, it will be ops directly implementing the statement, suitable to
13818pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
13819equivalent op (except for those embedded in a scope contained entirely
13820within the statement).
13821
13822If an error occurs in parsing or compilation, in most cases a valid op
13823tree (most likely null) is returned anyway. The error is reflected in
13824the parser state, normally resulting in a single exception at the top
13825level of parsing which covers all the compilation errors that occurred.
13826Some compilation errors, however, will throw an exception immediately.
13827
13828The C<flags> parameter is reserved for future use, and must always
13829be zero.
13830
13831=cut
13832*/
13833
13834OP *
13835Perl_parse_barestmt(pTHX_ U32 flags)
13836{
13837 if (flags)
13838 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
13839 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
13840}
13841
13842/*
13843=for apidoc parse_label
13844
13845Parse a single label, possibly optional, of the type that may prefix a
13846Perl statement. It is up to the caller to ensure that the dynamic parser
13847state (L</PL_parser> et al) is correctly set to reflect the source of
13848the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
13849label is optional, otherwise it is mandatory.
13850
13851The name of the label is returned in the form of a fresh scalar. If an
13852optional label is absent, a null pointer is returned.
13853
13854If an error occurs in parsing, which can only occur if the label is
13855mandatory, a valid label is returned anyway. The error is reflected in
13856the parser state, normally resulting in a single exception at the top
13857level of parsing which covers all the compilation errors that occurred.
13858
13859=cut
13860*/
13861
13862SV *
13863Perl_parse_label(pTHX_ U32 flags)
13864{
13865 if (flags & ~PARSE_OPTIONAL)
13866 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
13867 if (PL_nexttoke) {
13868 PL_parser->yychar = yylex();
13869 if (PL_parser->yychar == LABEL) {
13870 SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
13871 PL_parser->yychar = YYEMPTY;
13872 cSVOPx(pl_yylval.opval)->op_sv = NULL;
13873 op_free(pl_yylval.opval);
13874 return labelsv;
13875 } else {
13876 yyunlex();
13877 goto no_label;
13878 }
13879 } else {
13880 char *s, *t;
13881 STRLEN wlen, bufptr_pos;
13882 lex_read_space(0);
13883 t = s = PL_bufptr;
13884 if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
13885 goto no_label;
13886 t = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen, FALSE);
13887 if (word_takes_any_delimiter(s, wlen))
13888 goto no_label;
13889 bufptr_pos = s - SvPVX(PL_linestr);
13890 PL_bufptr = t;
13891 lex_read_space(LEX_KEEP_PREVIOUS);
13892 t = PL_bufptr;
13893 s = SvPVX(PL_linestr) + bufptr_pos;
13894 if (t[0] == ':' && t[1] != ':') {
13895 PL_oldoldbufptr = PL_oldbufptr;
13896 PL_oldbufptr = s;
13897 PL_bufptr = t+1;
13898 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
13899 } else {
13900 PL_bufptr = s;
13901 no_label:
13902 if (flags & PARSE_OPTIONAL) {
13903 return NULL;
13904 } else {
13905 qerror(Perl_mess(aTHX_ "Parse error"));
13906 return newSVpvs("x");
13907 }
13908 }
13909 }
13910}
13911
13912/*
13913=for apidoc parse_fullstmt
13914
13915Parse a single complete Perl statement. This may be a normal imperative
13916statement or a declaration that has compile-time effect, and may include
13917optional labels. It is up to the caller to ensure that the dynamic
13918parser state (L</PL_parser> et al) is correctly set to reflect the source
13919of the code to be parsed and the lexical context for the statement.
13920
13921The op tree representing the statement is returned. This may be a
13922null pointer if the statement is null, for example if it was actually
13923a subroutine definition (which has compile-time side effects). If not
13924null, it will be the result of a L</newSTATEOP> call, normally including
13925a C<nextstate> or equivalent op.
13926
13927If an error occurs in parsing or compilation, in most cases a valid op
13928tree (most likely null) is returned anyway. The error is reflected in
13929the parser state, normally resulting in a single exception at the top
13930level of parsing which covers all the compilation errors that occurred.
13931Some compilation errors, however, will throw an exception immediately.
13932
13933The C<flags> parameter is reserved for future use, and must always
13934be zero.
13935
13936=cut
13937*/
13938
13939OP *
13940Perl_parse_fullstmt(pTHX_ U32 flags)
13941{
13942 if (flags)
13943 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13944 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
13945}
13946
13947/*
13948=for apidoc parse_stmtseq
13949
13950Parse a sequence of zero or more Perl statements. These may be normal
13951imperative statements, including optional labels, or declarations
13952that have compile-time effect, or any mixture thereof. The statement
13953sequence ends when a closing brace or end-of-file is encountered in a
13954place where a new statement could have validly started. It is up to
13955the caller to ensure that the dynamic parser state (L</PL_parser> et al)
13956is correctly set to reflect the source of the code to be parsed and the
13957lexical context for the statements.
13958
13959The op tree representing the statement sequence is returned. This may
13960be a null pointer if the statements were all null, for example if there
13961were no statements or if there were only subroutine definitions (which
13962have compile-time side effects). If not null, it will be a C<lineseq>
13963list, normally including C<nextstate> or equivalent ops.
13964
13965If an error occurs in parsing or compilation, in most cases a valid op
13966tree is returned anyway. The error is reflected in the parser state,
13967normally resulting in a single exception at the top level of parsing
13968which covers all the compilation errors that occurred. Some compilation
13969errors, however, will throw an exception immediately.
13970
13971The C<flags> parameter is reserved for future use, and must always
13972be zero.
13973
13974=cut
13975*/
13976
13977OP *
13978Perl_parse_stmtseq(pTHX_ U32 flags)
13979{
13980 OP *stmtseqop;
13981 I32 c;
13982 if (flags)
13983 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
13984 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
13985 c = lex_peek_unichar(0);
13986 if (c != -1 && c != /*{*/'}')
13987 qerror(Perl_mess(aTHX_ "Parse error"));
13988 return stmtseqop;
13989}
13990
13991/*
13992=for apidoc parse_subsignature
13993
13994Parse a subroutine signature declaration. This is the contents of the
13995parentheses following a named or anonymous subroutine declaration when the
13996C<signatures> feature is enabled. Note that this function neither expects
13997nor consumes the opening and closing parentheses around the signature; it
13998is the caller's job to handle these.
13999
14000This function must only be called during parsing of a subroutine; after
14001L</start_subparse> has been called. It might allocate lexical variables on
14002the pad for the current subroutine.
14003
14004The op tree to unpack the arguments from the stack at runtime is returned.
14005This op tree should appear at the beginning of the compiled function. The
14006caller may wish to use L</op_append_list> to build their function body
14007after it, or splice it together with the body before calling L</newATTRSUB>.
14008
14009The C<flags> parameter is reserved for future use, and must always
14010be zero.
14011
14012=cut
14013*/
14014
14015OP *
14016Perl_parse_subsignature(pTHX_ U32 flags)
14017{
14018 if (flags)
14019 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
14020 return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
14021}
14022
14023/*
14024 * ex: set ts=8 sts=4 sw=4 et:
14025 */