perl5.git.perl.org Git - perl5.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* toke.c
	2	*
	3	* Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
	4	* 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
	5	*
	6	* You may distribute under the terms of either the GNU General Public
	7	* License or the Artistic License, as specified in the README file.
	8	*
	9	*/
	10
	11	/*
	12	* 'It all comes from here, the stench and the peril.' --Frodo
	13	*
	14	* [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
	15	*/
	16
	17	/*
	18	* This file is the lexer for Perl. It's closely linked to the
	19	* parser, perly.y.
	20	*
	21	* The main routine is yylex(), which returns the next token.
	22	*/
	23
	24	/*
	25	=head1 Lexer interface
	26	This is the lower layer of the Perl parser, managing characters and tokens.
	27
	28	=for apidoc AmnU\|yy_parser *\|PL_parser
	29
	30	Pointer to a structure encapsulating the state of the parsing operation
	31	currently in progress. The pointer can be locally changed to perform
	32	a nested parse without interfering with the state of an outer parse.
	33	Individual members of C<PL_parser> have their own documentation.
	34
	35	=cut
	36	*/
	37
	38	#include "EXTERN.h"
	39	#define PERL_IN_TOKE_C
	40	#include "perl.h"
	41	#include "invlist_inline.h"
	42
	43	#define new_constant(a,b,c,d,e,f,g, h) \
	44	S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
	45
	46	#define pl_yylval (PL_parser->yylval)
	47
	48	/* XXX temporary backwards compatibility */
	49	#define PL_lex_brackets (PL_parser->lex_brackets)
	50	#define PL_lex_allbrackets (PL_parser->lex_allbrackets)
	51	#define PL_lex_fakeeof (PL_parser->lex_fakeeof)
	52	#define PL_lex_brackstack (PL_parser->lex_brackstack)
	53	#define PL_lex_casemods (PL_parser->lex_casemods)
	54	#define PL_lex_casestack (PL_parser->lex_casestack)
	55	#define PL_lex_dojoin (PL_parser->lex_dojoin)
	56	#define PL_lex_formbrack (PL_parser->lex_formbrack)
	57	#define PL_lex_inpat (PL_parser->lex_inpat)
	58	#define PL_lex_inwhat (PL_parser->lex_inwhat)
	59	#define PL_lex_op (PL_parser->lex_op)
	60	#define PL_lex_repl (PL_parser->lex_repl)
	61	#define PL_lex_starts (PL_parser->lex_starts)
	62	#define PL_lex_stuff (PL_parser->lex_stuff)
	63	#define PL_multi_start (PL_parser->multi_start)
	64	#define PL_multi_open (PL_parser->multi_open)
	65	#define PL_multi_close (PL_parser->multi_close)
	66	#define PL_preambled (PL_parser->preambled)
	67	#define PL_linestr (PL_parser->linestr)
	68	#define PL_expect (PL_parser->expect)
	69	#define PL_copline (PL_parser->copline)
	70	#define PL_bufptr (PL_parser->bufptr)
	71	#define PL_oldbufptr (PL_parser->oldbufptr)
	72	#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
	73	#define PL_linestart (PL_parser->linestart)
	74	#define PL_bufend (PL_parser->bufend)
	75	#define PL_last_uni (PL_parser->last_uni)
	76	#define PL_last_lop (PL_parser->last_lop)
	77	#define PL_last_lop_op (PL_parser->last_lop_op)
	78	#define PL_lex_state (PL_parser->lex_state)
	79	#define PL_rsfp (PL_parser->rsfp)
	80	#define PL_rsfp_filters (PL_parser->rsfp_filters)
	81	#define PL_in_my (PL_parser->in_my)
	82	#define PL_in_my_stash (PL_parser->in_my_stash)
	83	#define PL_tokenbuf (PL_parser->tokenbuf)
	84	#define PL_multi_end (PL_parser->multi_end)
	85	#define PL_error_count (PL_parser->error_count)
	86
	87	# define PL_nexttoke (PL_parser->nexttoke)
	88	# define PL_nexttype (PL_parser->nexttype)
	89	# define PL_nextval (PL_parser->nextval)
	90
	91
	92	#define SvEVALED(sv) \
	93	(SvTYPE(sv) >= SVt_PVNV \
	94	&& ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
	95
	96	static const char ident_too_long[] = "Identifier too long";
	97	static const char ident_var_zero_multi_digit[] = "Numeric variables with more than one digit may not start with '0'";
	98
	99	# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
	100
	101	#define XENUMMASK 0x3f
	102	#define XFAKEEOF 0x40
	103	#define XFAKEBRACK 0x80
	104
	105	#ifdef USE_UTF8_SCRIPTS
	106	# define UTF cBOOL(!IN_BYTES)
	107	#else
	108	# define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) \|\| ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
	109	#endif
	110
	111	/* The maximum number of characters preceding the unrecognized one to display */
	112	#define UNRECOGNIZED_PRECEDE_COUNT 10
	113
	114	/* In variables named $^X, these are the legal values for X.
	115	* 1999-02-27 mjd-perl-patch@plover.com */
	116	#define isCONTROLVAR(x) (isUPPER(x) \|\| memCHRs("[\\]^_?", (x)))
	117
	118	/* Non-identifier plugin infix operators are allowed any printing character
	119	* except spaces, digits, or identifier chars
	120	*/
	121	#define isPLUGINFIX(c) (c && !isSPACE(c) && !isDIGIT(c) && !isALPHA(c))
	122	/* Plugin infix operators may not begin with a quote symbol */
	123	#define isPLUGINFIX_FIRST(c) (isPLUGINFIX(c) && c != '"' && c != '\'')
	124
	125	#define PLUGINFIX_IS_ENABLED UNLIKELY(PL_infix_plugin != &Perl_infix_plugin_standard)
	126
	127	#define SPACE_OR_TAB(c) isBLANK_A(c)
	128
	129	#define HEXFP_PEEK(s) \
	130	(((s[0] == '.') && \
	131	(isXDIGIT(s[1]) \|\| isALPHA_FOLD_EQ(s[1], 'p'))) \|\| \
	132	isALPHA_FOLD_EQ(s[0], 'p'))
	133
	134	/* LEX_* are values for PL_lex_state, the state of the lexer.
	135	* They are arranged oddly so that the guard on the switch statement
	136	* can get by with a single comparison (if the compiler is smart enough).
	137	*
	138	* These values refer to the various states within a sublex parse,
	139	* i.e. within a double quotish string
	140	*/
	141
	142	/* #define LEX_NOTPARSING 11 is done in perl.h. */
	143
	144	#define LEX_NORMAL 10 /* normal code (ie not within "...") */
	145	#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
	146	#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
	147	#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
	148	#define LEX_INTERPSTART 6 /* expecting the start of a $var */
	149
	150	/* at end of code, eg "$x" followed by: */
	151	#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
	152	#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
	153
	154	#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
	155	string or after \E, $foo, etc */
	156	#define LEX_INTERPCONST 2 /* NOT USED */
	157	#define LEX_FORMLINE 1 /* expecting a format line */
	158
	159	/* returned to yyl_try() to request it to retry the parse loop, expected to only
	160	be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof()
	161	can also return it.
	162
	163	yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1,
	164	other token values are 258 or higher (see perly.h), so -1 should be
	165	a safe value here.
	166	*/
	167	#define YYL_RETRY (-1)
	168
	169	#ifdef DEBUGGING
	170	static const char* const lex_state_names[] = {
	171	"KNOWNEXT",
	172	"FORMLINE",
	173	"INTERPCONST",
	174	"INTERPCONCAT",
	175	"INTERPENDMAYBE",
	176	"INTERPEND",
	177	"INTERPSTART",
	178	"INTERPPUSH",
	179	"INTERPCASEMOD",
	180	"INTERPNORMAL",
	181	"NORMAL"
	182	};
	183	#endif
	184
	185	#include "keywords.h"
	186
	187	/* CLINE is a macro that ensures PL_copline has a sane value */
	188
	189	#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
	190
	191	/*
	192	* Convenience functions to return different tokens and prime the
	193	* lexer for the next token. They all take an argument.
	194	*
	195	* TOKEN : generic token (used for '(', DOLSHARP, etc)
	196	* OPERATOR : generic operator
	197	* AOPERATOR : assignment operator
	198	* PREBLOCK : beginning the block after an if, while, foreach, ...
	199	* PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
	200	* PREREF : *EXPR where EXPR is not a simple identifier
	201	* TERM : expression term
	202	* POSTDEREF : postfix dereference (->$* ->@[...] etc.)
	203	* LOOPX : loop exiting command (goto, last, dump, etc)
	204	* FTST : file test operator
	205	* FUN0 : zero-argument function
	206	* FUN0OP : zero-argument function, with its op created in this file
	207	* FUN1 : not used, except for not, which isn't a UNIOP
	208	* BOop : bitwise or or xor
	209	* BAop : bitwise and
	210	* BCop : bitwise complement
	211	* SHop : shift operator
	212	* PWop : power operator
	213	* PMop : pattern-matching operator
	214	* Aop : addition-level operator
	215	* AopNOASSIGN : addition-level operator that is never part of .=
	216	* Mop : multiplication-level operator
	217	* ChEop : chaining equality-testing operator
	218	* NCEop : non-chaining comparison operator at equality precedence
	219	* ChRop : chaining relational operator <= != gt
	220	* NCRop : non-chaining relational operator isa
	221	*
	222	* Also see LOP and lop() below.
	223	*/
	224
	225	#ifdef DEBUGGING /* Serve -DT. */
	226	# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
	227	#else
	228	# define REPORT(retval) (retval)
	229	#endif
	230
	231	#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
	232	#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
	233	#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
	234	#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
	235	#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
	236	#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
	237	#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
	238	#define PHASERBLOCK(f) return (pl_yylval.ival=f, PL_expect = XBLOCK, PL_bufptr = s, REPORT((int)PHASER))
	239	#define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
	240	#define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
	241	pl_yylval.ival=f, \
	242	PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
	243	REPORT((int)LOOPEX))
	244	#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
	245	#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
	246	#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
	247	#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
	248	#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
	249	#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
	250	#define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
	251	REPORT(PERLY_TILDE)
	252	#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
	253	#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
	254	#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
	255	#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
	256	#define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
	257	#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
	258	#define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP))
	259	#define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP))
	260	#define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP))
	261	#define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP))
	262
	263	/* This bit of chicanery makes a unary function followed by
	264	* a parenthesis into a function with one argument, highest precedence.
	265	* The UNIDOR macro is for unary functions that can be followed by the //
	266	* operator (such as C<shift // 0>).
	267	*/
	268	#define UNI3(f,x,have_x) { \
	269	pl_yylval.ival = f; \
	270	if (have_x) PL_expect = x; \
	271	PL_bufptr = s; \
	272	PL_last_uni = PL_oldbufptr; \
	273	PL_last_lop_op = (f) < 0 ? -(f) : (f); \
	274	if (*s == '(') \
	275	return REPORT( (int)FUNC1 ); \
	276	s = skipspace(s); \
	277	return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
	278	}
	279	#define UNI(f) UNI3(f,XTERM,1)
	280	#define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
	281	#define UNIPROTO(f,optional) { \
	282	if (optional) PL_last_uni = PL_oldbufptr; \
	283	OPERATOR(f); \
	284	}
	285
	286	#define UNIBRACK(f) UNI3(f,0,0)
	287
	288	/* return has special case parsing.
	289	*
	290	* List operators have low precedence. Functions have high precedence.
	291	* Every built in, except return, if written with () around its arguments, is
	292	* parsed as a function. Hence every other list built in:
	293	*
	294	* $ perl -lwe 'sub foo { join 2,4,6 * 1.5 } print for foo()' # join 2,4,9
	295	* 429
	296	* $ perl -lwe 'sub foo { join(2,4,6) * 1.5 } print for foo()' # 426 * 1.5
	297	* 639
	298	* $ perl -lwe 'sub foo { join+(2,4,6) * 1.5 } print for foo()'
	299	* Useless use of a constant (2) in void context at -e line 1.
	300	* Useless use of a constant (4) in void context at -e line 1.
	301	*
	302	* $
	303	*
	304	* empty line output because C<(2, 4, 6) * 1.5> is the comma operator, not a
	305	* list. * forces scalar context, 6 * 1.5 is 9, and join(9) is the empty string.
	306	*
	307	* Whereas return:
	308	*
	309	* $ perl -lwe 'sub foo { return 2,4,6 * 1.5 } print for foo()'
	310	* 2
	311	* 4
	312	* 9
	313	* $ perl -lwe 'sub foo { return(2,4,6) * 1.5 } print for foo()'
	314	* Useless use of a constant (2) in void context at -e line 1.
	315	* Useless use of a constant (4) in void context at -e line 1.
	316	* 9
	317	* $ perl -lwe 'sub foo { return+(2,4,6) * 1.5 } print for foo()'
	318	* Useless use of a constant (2) in void context at -e line 1.
	319	* Useless use of a constant (4) in void context at -e line 1.
	320	* 9
	321	* $
	322	*
	323	* and:
	324	* $ perl -lwe 'sub foo { return(2,4,6) } print for foo()'
	325	* 2
	326	* 4
	327	* 6
	328	*
	329	* This last example is what we expect, but it's clearly inconsistent with how
	330	* C<return(2,4,6) * 1.5> ought to behave, if the rules were consistently
	331	* followed.
	332	*
	333	*
	334	* Perl 3 attempted to be consistent:
	335	*
	336	* The rules are more consistent about where parens are needed and
	337	* where they are not. In particular, unary operators and list operators now
	338	* behave like functions if they're called like functions.
	339	*
	340	* However, the behaviour for return was reverted to the "old" parsing with
	341	* patches 9-12:
	342	*
	343	* The construct
	344	* return (1,2,3);
	345	* did not do what was expected, since return was swallowing the
	346	* parens in order to consider itself a function. The solution,
	347	* since return never wants any trailing expression such as
	348	* return (1,2,3) + 2;
	349	* is to simply make return an exception to the paren-makes-a-function
	350	* rule, and treat it the way it always was, so that it doesn't
	351	* strip the parens.
	352	*
	353	* To demonstrate the special-case parsing, replace OLDLOP(OP_RETURN); with
	354	* LOP(OP_RETURN, XTERM);
	355	*
	356	* and constructs such as
	357	*
	358	* return (Internals::V())[2]
	359	*
	360	* turn into syntax errors
	361	*/
	362
	363	#define OLDLOP(f) \
	364	do { \
	365	if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
	366	PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
	367	pl_yylval.ival = (f); \
	368	PL_expect = XTERM; \
	369	PL_bufptr = s; \
	370	return (int)LSTOP; \
	371	} while(0)
	372
	373	#define COPLINE_INC_WITH_HERELINES \
	374	STMT_START { \
	375	CopLINE_inc(PL_curcop); \
	376	if (PL_parser->herelines) \
	377	CopLINE(PL_curcop) += PL_parser->herelines, \
	378	PL_parser->herelines = 0; \
	379	} STMT_END
	380	/* Called after scan_str to update CopLINE(PL_curcop), but only when there
	381	* is no sublex_push to follow. */
	382	#define COPLINE_SET_FROM_MULTI_END \
	383	STMT_START { \
	384	CopLINE_set(PL_curcop, PL_multi_end); \
	385	if (PL_multi_end != PL_multi_start) \
	386	PL_parser->herelines = 0; \
	387	} STMT_END
	388
	389
	390	/* A file-local structure for passing around information about subroutines and
	391	* related definable words */
	392	struct code {
	393	SV *sv;
	394	CV *cv;
	395	GV gv, *gvp;
	396	OP *rv2cv_op;
	397	PADOFFSET off;
	398	bool lex;
	399	};
	400
	401	static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE };
	402
	403	#ifdef DEBUGGING
	404
	405	/* how to interpret the pl_yylval associated with the token */
	406	enum token_type {
	407	TOKENTYPE_NONE,
	408	TOKENTYPE_IVAL,
	409	TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
	410	TOKENTYPE_PVAL,
	411	TOKENTYPE_OPVAL
	412	};
	413
	414	#define DEBUG_TOKEN(Type, Name) \
	415	{ Name, TOKENTYPE_##Type, #Name }
	416
	417	static struct debug_tokens {
	418	const int token;
	419	enum token_type type;
	420	const char *name;
	421	} const debug_tokens[] =
	422	{
	423	DEBUG_TOKEN (OPNUM, ADDOP),
	424	DEBUG_TOKEN (NONE, ANDAND),
	425	DEBUG_TOKEN (NONE, ANDOP),
	426	DEBUG_TOKEN (NONE, ARROW),
	427	DEBUG_TOKEN (OPNUM, ASSIGNOP),
	428	DEBUG_TOKEN (OPNUM, BITANDOP),
	429	DEBUG_TOKEN (OPNUM, BITOROP),
	430	DEBUG_TOKEN (OPNUM, CHEQOP),
	431	DEBUG_TOKEN (OPNUM, CHRELOP),
	432	DEBUG_TOKEN (NONE, COLONATTR),
	433	DEBUG_TOKEN (NONE, DOLSHARP),
	434	DEBUG_TOKEN (NONE, DORDOR),
	435	DEBUG_TOKEN (IVAL, DOTDOT),
	436	DEBUG_TOKEN (NONE, FORMLBRACK),
	437	DEBUG_TOKEN (NONE, FORMRBRACK),
	438	DEBUG_TOKEN (OPNUM, FUNC),
	439	DEBUG_TOKEN (OPNUM, FUNC0),
	440	DEBUG_TOKEN (OPVAL, FUNC0OP),
	441	DEBUG_TOKEN (OPVAL, FUNC0SUB),
	442	DEBUG_TOKEN (OPNUM, FUNC1),
	443	DEBUG_TOKEN (NONE, HASHBRACK),
	444	DEBUG_TOKEN (IVAL, KW_CATCH),
	445	DEBUG_TOKEN (IVAL, KW_CLASS),
	446	DEBUG_TOKEN (IVAL, KW_CONTINUE),
	447	DEBUG_TOKEN (IVAL, KW_DEFAULT),
	448	DEBUG_TOKEN (IVAL, KW_DO),
	449	DEBUG_TOKEN (IVAL, KW_ELSE),
	450	DEBUG_TOKEN (IVAL, KW_ELSIF),
	451	DEBUG_TOKEN (IVAL, KW_FIELD),
	452	DEBUG_TOKEN (IVAL, KW_GIVEN),
	453	DEBUG_TOKEN (IVAL, KW_FOR),
	454	DEBUG_TOKEN (IVAL, KW_FORMAT),
	455	DEBUG_TOKEN (IVAL, KW_IF),
	456	DEBUG_TOKEN (IVAL, KW_LOCAL),
	457	DEBUG_TOKEN (IVAL, KW_METHOD_anon),
	458	DEBUG_TOKEN (IVAL, KW_METHOD_named),
	459	DEBUG_TOKEN (IVAL, KW_MY),
	460	DEBUG_TOKEN (IVAL, KW_PACKAGE),
	461	DEBUG_TOKEN (IVAL, KW_REQUIRE),
	462	DEBUG_TOKEN (IVAL, KW_SUB_anon),
	463	DEBUG_TOKEN (IVAL, KW_SUB_anon_sig),
	464	DEBUG_TOKEN (IVAL, KW_SUB_named),
	465	DEBUG_TOKEN (IVAL, KW_SUB_named_sig),
	466	DEBUG_TOKEN (IVAL, KW_TRY),
	467	DEBUG_TOKEN (IVAL, KW_USE_or_NO),
	468	DEBUG_TOKEN (IVAL, KW_UNLESS),
	469	DEBUG_TOKEN (IVAL, KW_UNTIL),
	470	DEBUG_TOKEN (IVAL, KW_WHEN),
	471	DEBUG_TOKEN (IVAL, KW_WHILE),
	472	DEBUG_TOKEN (OPVAL, LABEL),
	473	DEBUG_TOKEN (OPNUM, LOOPEX),
	474	DEBUG_TOKEN (OPNUM, LSTOP),
	475	DEBUG_TOKEN (OPVAL, LSTOPSUB),
	476	DEBUG_TOKEN (OPNUM, MATCHOP),
	477	DEBUG_TOKEN (OPVAL, METHCALL),
	478	DEBUG_TOKEN (OPVAL, METHCALL0),
	479	DEBUG_TOKEN (OPNUM, MULOP),
	480	DEBUG_TOKEN (OPNUM, NCEQOP),
	481	DEBUG_TOKEN (OPNUM, NCRELOP),
	482	DEBUG_TOKEN (NONE, NOAMP),
	483	DEBUG_TOKEN (NONE, NOTOP),
	484	DEBUG_TOKEN (IVAL, OROP),
	485	DEBUG_TOKEN (NONE, OROR),
	486	DEBUG_TOKEN (IVAL, PERLY_AMPERSAND),
	487	DEBUG_TOKEN (IVAL, PERLY_BRACE_CLOSE),
	488	DEBUG_TOKEN (IVAL, PERLY_BRACE_OPEN),
	489	DEBUG_TOKEN (IVAL, PERLY_BRACKET_CLOSE),
	490	DEBUG_TOKEN (IVAL, PERLY_BRACKET_OPEN),
	491	DEBUG_TOKEN (IVAL, PERLY_COLON),
	492	DEBUG_TOKEN (IVAL, PERLY_COMMA),
	493	DEBUG_TOKEN (IVAL, PERLY_DOT),
	494	DEBUG_TOKEN (IVAL, PERLY_EQUAL_SIGN),
	495	DEBUG_TOKEN (IVAL, PERLY_EXCLAMATION_MARK),
	496	DEBUG_TOKEN (IVAL, PERLY_MINUS),
	497	DEBUG_TOKEN (IVAL, PERLY_PAREN_OPEN),
	498	DEBUG_TOKEN (IVAL, PERLY_PERCENT_SIGN),
	499	DEBUG_TOKEN (IVAL, PERLY_PLUS),
	500	DEBUG_TOKEN (IVAL, PERLY_QUESTION_MARK),
	501	DEBUG_TOKEN (IVAL, PERLY_SEMICOLON),
	502	DEBUG_TOKEN (IVAL, PERLY_SLASH),
	503	DEBUG_TOKEN (IVAL, PERLY_SNAIL),
	504	DEBUG_TOKEN (IVAL, PERLY_STAR),
	505	DEBUG_TOKEN (IVAL, PERLY_TILDE),
	506	DEBUG_TOKEN (OPVAL, PLUGEXPR),
	507	DEBUG_TOKEN (OPVAL, PLUGSTMT),
	508	DEBUG_TOKEN (PVAL, PLUGIN_ADD_OP),
	509	DEBUG_TOKEN (PVAL, PLUGIN_ASSIGN_OP),
	510	DEBUG_TOKEN (PVAL, PLUGIN_HIGH_OP),
	511	DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_AND_OP),
	512	DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_OR_OP),
	513	DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_AND_LOW_OP),
	514	DEBUG_TOKEN (PVAL, PLUGIN_LOGICAL_OR_LOW_OP),
	515	DEBUG_TOKEN (PVAL, PLUGIN_LOW_OP),
	516	DEBUG_TOKEN (PVAL, PLUGIN_MUL_OP),
	517	DEBUG_TOKEN (PVAL, PLUGIN_POW_OP),
	518	DEBUG_TOKEN (PVAL, PLUGIN_REL_OP),
	519	DEBUG_TOKEN (OPVAL, PMFUNC),
	520	DEBUG_TOKEN (NONE, POSTJOIN),
	521	DEBUG_TOKEN (NONE, POSTDEC),
	522	DEBUG_TOKEN (NONE, POSTINC),
	523	DEBUG_TOKEN (OPNUM, POWOP),
	524	DEBUG_TOKEN (NONE, PREDEC),
	525	DEBUG_TOKEN (NONE, PREINC),
	526	DEBUG_TOKEN (OPVAL, PRIVATEREF),
	527	DEBUG_TOKEN (OPVAL, QWLIST),
	528	DEBUG_TOKEN (NONE, REFGEN),
	529	DEBUG_TOKEN (OPNUM, SHIFTOP),
	530	DEBUG_TOKEN (NONE, SUBLEXEND),
	531	DEBUG_TOKEN (NONE, SUBLEXSTART),
	532	DEBUG_TOKEN (OPVAL, THING),
	533	DEBUG_TOKEN (NONE, UMINUS),
	534	DEBUG_TOKEN (OPNUM, UNIOP),
	535	DEBUG_TOKEN (OPVAL, UNIOPSUB),
	536	DEBUG_TOKEN (OPVAL, BAREWORD),
	537	DEBUG_TOKEN (IVAL, YADAYADA),
	538	{ 0, TOKENTYPE_NONE, NULL }
	539	};
	540
	541	#undef DEBUG_TOKEN
	542
	543	/* dump the returned token in rv, plus any optional arg in pl_yylval */
	544
	545	STATIC int
	546	S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
	547	{
	548	PERL_ARGS_ASSERT_TOKEREPORT;
	549
	550	if (DEBUG_T_TEST) {
	551	const char *name = NULL;
	552	enum token_type type = TOKENTYPE_NONE;
	553	const struct debug_tokens *p;
	554	SV* const report = newSVpvs("<== ");
	555
	556	for (p = debug_tokens; p->token; p++) {
	557	if (p->token == (int)rv) {
	558	name = p->name;
	559	type = p->type;
	560	break;
	561	}
	562	}
	563	if (name)
	564	Perl_sv_catpv(aTHX_ report, name);
	565	else if (isGRAPH(rv))
	566	{
	567	Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
	568	if ((char)rv == 'p')
	569	sv_catpvs(report, " (pending identifier)");
	570	}
	571	else if (!rv)
	572	sv_catpvs(report, "EOF");
	573	else
	574	Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
	575	switch (type) {
	576	case TOKENTYPE_NONE:
	577	break;
	578	case TOKENTYPE_IVAL:
	579	Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
	580	break;
	581	case TOKENTYPE_OPNUM:
	582	Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
	583	PL_op_name[lvalp->ival]);
	584	break;
	585	case TOKENTYPE_PVAL:
	586	Perl_sv_catpvf(aTHX_ report, "(pval=%p)", lvalp->pval);
	587	break;
	588	case TOKENTYPE_OPVAL:
	589	if (lvalp->opval) {
	590	Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
	591	PL_op_name[lvalp->opval->op_type]);
	592	if (lvalp->opval->op_type == OP_CONST) {
	593	Perl_sv_catpvf(aTHX_ report, " %s",
	594	SvPEEK(cSVOPx_sv(lvalp->opval)));
	595	}
	596
	597	}
	598	else
	599	sv_catpvs(report, "(opval=null)");
	600	break;
	601	}
	602	PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
	603	};
	604	return (int)rv;
	605	}
	606
	607
	608	/* print the buffer with suitable escapes */
	609
	610	STATIC void
	611	S_printbuf(pTHX_ const char const fmt, const char const s)
	612	{
	613	SV* const tmp = newSVpvs("");
	614
	615	PERL_ARGS_ASSERT_PRINTBUF;
	616
	617	GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
	618	PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
	619	GCC_DIAG_RESTORE_STMT;
	620	SvREFCNT_dec(tmp);
	621	}
	622
	623	#endif
	624
	625	/*
	626	* S_ao
	627	*
	628	* This subroutine looks for an '=' next to the operator that has just been
	629	* parsed and turns it into an ASSIGNOP if it finds one.
	630	*/
	631
	632	STATIC int
	633	S_ao(pTHX_ int toketype)
	634	{
	635	if (*PL_bufptr == '=') {
	636	PL_bufptr++;
	637
	638	switch (toketype) {
	639	case ANDAND: pl_yylval.ival = OP_ANDASSIGN; break;
	640	case OROR: pl_yylval.ival = OP_ORASSIGN; break;
	641	case DORDOR: pl_yylval.ival = OP_DORASSIGN; break;
	642	}
	643
	644	toketype = ASSIGNOP;
	645	}
	646	return REPORT(toketype);
	647	}
	648
	649	/*
	650	* S_no_op
	651	* When Perl expects an operator and finds something else, no_op
	652	* prints the warning. It always prints "<something> found where
	653	* operator expected. It prints "Missing semicolon on previous line?"
	654	* if the surprise occurs at the start of the line. "do you need to
	655	* predeclare ..." is printed out for code like "sub bar; foo bar $x"
	656	* where the compiler doesn't know if foo is a method call or a function.
	657	* It prints "Missing operator before end of line" if there's nothing
	658	* after the missing operator, or "... before <...>" if there is something
	659	* after the missing operator.
	660	*
	661	* PL_bufptr is expected to point to the start of the thing that was found,
	662	* and s after the next token or partial token.
	663	*/
	664
	665	STATIC void
	666	S_no_op(pTHX_ const char const what, char s)
	667	{
	668	char * const oldbp = PL_bufptr;
	669	const bool is_first = (PL_oldbufptr == PL_linestart);
	670	SV *message = sv_2mortal( newSVpvf(
	671	PERL_DIAG_WARN_SYNTAX("%s found where operator expected"),
	672	what
	673	) );
	674
	675	PERL_ARGS_ASSERT_NO_OP;
	676
	677	if (!s)
	678	s = oldbp;
	679	else
	680	PL_bufptr = s;
	681
	682	if (ckWARN_d(WARN_SYNTAX)) {
	683	bool has_more = FALSE;
	684	if (is_first) {
	685	has_more = TRUE;
	686	sv_catpvs(message,
	687	" (Missing semicolon on previous line?)");
	688	}
	689	else if (PL_oldoldbufptr) {
	690	/* yyerror (via yywarn) would do this itself, so we should too */
	691	const char *t;
	692	for (t = PL_oldoldbufptr;
	693	t < PL_bufptr && isSPACE(*t);
	694	t += UTF ? UTF8SKIP(t) : 1)
	695	{
	696	NOOP;
	697	}
	698	/* see if we can identify the cause of the warning */
	699	if (isIDFIRST_lazy_if_safe(t,PL_bufend,UTF))
	700	{
	701	const char *t_start= t;
	702	for ( ;
	703	(isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) \|\| *t == ':');
	704	t += UTF ? UTF8SKIP(t) : 1)
	705	{
	706	NOOP;
	707	}
	708	if (t < PL_bufptr && isSPACE(*t)) {
	709	has_more = TRUE;
	710	sv_catpvf( message,
	711	" (Do you need to predeclare \"%" UTF8f "\"?)",
	712	UTF8fARG(UTF, t - t_start, t_start));
	713	}
	714	}
	715	}
	716	if (!has_more) {
	717	const char *t= oldbp;
	718	assert(s >= oldbp);
	719	while (t < s && isSPACE(*t)) {
	720	t += UTF ? UTF8SKIP(t) : 1;
	721	}
	722
	723	sv_catpvf(message,
	724	" (Missing operator before \"%" UTF8f "\"?)",
	725	UTF8fARG(UTF, s - t, t));
	726	}
	727	}
	728	yywarn(SvPV_nolen(message), UTF ? SVf_UTF8 : 0);
	729	PL_bufptr = oldbp;
	730	}
	731
	732	/*
	733	* S_missingterm
	734	* Complain about missing quote/regexp/heredoc terminator.
	735	* If it's called with NULL then it cauterizes the line buffer.
	736	* If we're in a delimited string and the delimiter is a control
	737	* character, it's reformatted into a two-char sequence like ^C.
	738	* This is fatal.
	739	*/
	740
	741	STATIC void
	742	S_missingterm(pTHX_ char *s, STRLEN len)
	743	{
	744	char tmpbuf[UTF8_MAXBYTES + 1];
	745	char q;
	746	bool uni = FALSE;
	747	if (s) {
	748	char * const nl = (char *) my_memrchr(s, '\n', len);
	749	if (nl) {
	750	*nl = '\0';
	751	len = nl - s;
	752	}
	753	uni = UTF;
	754	}
	755	else if (PL_multi_close < 32) {
	756	*tmpbuf = '^';
	757	tmpbuf[1] = (char)toCTRL(PL_multi_close);
	758	tmpbuf[2] = '\0';
	759	s = tmpbuf;
	760	len = 2;
	761	}
	762	else {
	763	if (! UTF && LIKELY(PL_multi_close < 256)) {
	764	*tmpbuf = (char)PL_multi_close;
	765	tmpbuf[1] = '\0';
	766	len = 1;
	767	}
	768	else {
	769	char end = (char )uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
	770	*end = '\0';
	771	len = end - tmpbuf;
	772	uni = TRUE;
	773	}
	774	s = tmpbuf;
	775	}
	776	q = memchr(s, '"', len) ? '\'' : '"';
	777	Perl_croak(aTHX_ "Can't find string terminator %c%" UTF8f "%c"
	778	" anywhere before EOF", q, UTF8fARG(uni, len, s), q);
	779	}
	780
	781	#include "feature.h"
	782
	783	/*
	784	* experimental text filters for win32 carriage-returns, utf16-to-utf8 and
	785	* utf16-to-utf8-reversed.
	786	*/
	787
	788	#ifdef PERL_CR_FILTER
	789	static void
	790	strip_return(SV *sv)
	791	{
	792	const char *s = SvPVX_const(sv);
	793	const char * const e = s + SvCUR(sv);
	794
	795	PERL_ARGS_ASSERT_STRIP_RETURN;
	796
	797	/* outer loop optimized to do nothing if there are no CR-LFs */
	798	while (s < e) {
	799	if (s++ == '\r' && s == '\n') {
	800	/* hit a CR-LF, need to copy the rest */
	801	char *d = s - 1;
	802	d++ = s++;
	803	while (s < e) {
	804	if (*s == '\r' && s[1] == '\n')
	805	s++;
	806	d++ = s++;
	807	}
	808	SvCUR(sv) -= s - d;
	809	return;
	810	}
	811	}
	812	}
	813
	814	STATIC I32
	815	S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
	816	{
	817	const I32 count = FILTER_READ(idx+1, sv, maxlen);
	818	if (count > 0 && !maxlen)
	819	strip_return(sv);
	820	return count;
	821	}
	822	#endif
	823
	824	/*
	825	=for apidoc lex_start
	826
	827	Creates and initialises a new lexer/parser state object, supplying
	828	a context in which to lex and parse from a new source of Perl code.
	829	A pointer to the new state object is placed in L</PL_parser>. An entry
	830	is made on the save stack so that upon unwinding, the new state object
	831	will be destroyed and the former value of L</PL_parser> will be restored.
	832	Nothing else need be done to clean up the parsing context.
	833
	834	The code to be parsed comes from C<line> and C<rsfp>. C<line>, if
	835	non-null, provides a string (in SV form) containing code to be parsed.
	836	A copy of the string is made, so subsequent modification of C<line>
	837	does not affect parsing. C<rsfp>, if non-null, provides an input stream
	838	from which code will be read to be parsed. If both are non-null, the
	839	code in C<line> comes first and must consist of complete lines of input,
	840	and C<rsfp> supplies the remainder of the source.
	841
	842	The C<flags> parameter is reserved for future use. Currently it is only
	843	used by perl internally, so extensions should always pass zero.
	844
	845	=cut
	846	*/
	847
	848	/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
	849	can share filters with the current parser.
	850	LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
	851	caller, hence isn't owned by the parser, so shouldn't be closed on parser
	852	destruction. This is used to handle the case of defaulting to reading the
	853	script from the standard input because no filename was given on the command
	854	line (without getting confused by situation where STDIN has been closed, so
	855	the script handle is opened on fd 0) */
	856
	857	void
	858	Perl_lex_start(pTHX_ SV line, PerlIO rsfp, U32 flags)
	859	{
	860	const char *s = NULL;
	861	yy_parser parser, oparser;
	862
	863	if (flags && flags & ~LEX_START_FLAGS)
	864	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
	865
	866	/* create and initialise a parser */
	867
	868	Newxz(parser, 1, yy_parser);
	869	parser->old_parser = oparser = PL_parser;
	870	PL_parser = parser;
	871
	872	parser->stack = NULL;
	873	parser->stack_max1 = NULL;
	874	parser->ps = NULL;
	875
	876	/* on scope exit, free this parser and restore any outer one */
	877	SAVEPARSER(parser);
	878	parser->saved_curcop = PL_curcop;
	879
	880	/* initialise lexer state */
	881
	882	parser->nexttoke = 0;
	883	parser->error_count = oparser ? oparser->error_count : 0;
	884	parser->copline = parser->preambling = NOLINE;
	885	parser->lex_state = LEX_NORMAL;
	886	parser->expect = XSTATE;
	887	parser->rsfp = rsfp;
	888	parser->recheck_utf8_validity = TRUE;
	889	parser->rsfp_filters =
	890	!(flags & LEX_START_SAME_FILTER) \|\| !oparser
	891	? NULL
	892	: MUTABLE_AV(SvREFCNT_inc(
	893	oparser->rsfp_filters
	894	? oparser->rsfp_filters
	895	: (oparser->rsfp_filters = newAV())
	896	));
	897
	898	Newx(parser->lex_brackstack, 120, char);
	899	Newx(parser->lex_casestack, 12, char);
	900	*parser->lex_casestack = '\0';
	901	Newxz(parser->lex_shared, 1, LEXSHARED);
	902
	903	if (line) {
	904	Size_t len;
	905	const U8* first_bad_char_loc;
	906
	907	s = SvPV_const(line, len);
	908
	909	if ( SvUTF8(line)
	910	&& UNLIKELY(! is_utf8_string_loc((U8 *) s,
	911	SvCUR(line),
	912	&first_bad_char_loc)))
	913	{
	914	_force_out_malformed_utf8_message(first_bad_char_loc,
	915	(U8 *) s + SvCUR(line),
	916	0,
	917	1 /* 1 means die */ );
	918	NOT_REACHED; /* NOTREACHED */
	919	}
	920
	921	parser->linestr = flags & LEX_START_COPIED
	922	? SvREFCNT_inc_simple_NN(line)
	923	: newSVpvn_flags(s, len, SvUTF8(line));
	924	if (!rsfp)
	925	sv_catpvs(parser->linestr, "\n;");
	926	} else {
	927	parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
	928	}
	929
	930	parser->oldoldbufptr =
	931	parser->oldbufptr =
	932	parser->bufptr =
	933	parser->linestart = SvPVX(parser->linestr);
	934	parser->bufend = parser->bufptr + SvCUR(parser->linestr);
	935	parser->last_lop = parser->last_uni = NULL;
	936
	937	STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS\|LEX_EVALBYTES
	938	\|LEX_DONT_CLOSE_RSFP));
	939	parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS\|LEX_EVALBYTES
	940	\|LEX_DONT_CLOSE_RSFP));
	941
	942	parser->in_pod = parser->filtered = 0;
	943	}
	944
	945
	946	/* delete a parser object */
	947
	948	void
	949	Perl_parser_free(pTHX_ const yy_parser *parser)
	950	{
	951	PERL_ARGS_ASSERT_PARSER_FREE;
	952
	953	PL_curcop = parser->saved_curcop;
	954	SvREFCNT_dec(parser->linestr);
	955
	956	if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
	957	PerlIO_clearerr(parser->rsfp);
	958	else if (parser->rsfp && (!parser->old_parser
	959	\|\| (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
	960	PerlIO_close(parser->rsfp);
	961	SvREFCNT_dec(parser->rsfp_filters);
	962	SvREFCNT_dec(parser->lex_stuff);
	963	SvREFCNT_dec(parser->lex_sub_repl);
	964
	965	Safefree(parser->lex_brackstack);
	966	Safefree(parser->lex_casestack);
	967	Safefree(parser->lex_shared);
	968	PL_parser = parser->old_parser;
	969	Safefree(parser);
	970	}
	971
	972	void
	973	Perl_parser_free_nexttoke_ops(pTHX_ yy_parser parser, OPSLAB slab)
	974	{
	975	I32 nexttoke = parser->nexttoke;
	976	PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
	977	while (nexttoke--) {
	978	if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
	979	&& parser->nextval[nexttoke].opval
	980	&& parser->nextval[nexttoke].opval->op_slabbed
	981	&& OpSLAB(parser->nextval[nexttoke].opval) == slab) {
	982	op_free(parser->nextval[nexttoke].opval);
	983	parser->nextval[nexttoke].opval = NULL;
	984	}
	985	}
	986	}
	987
	988
	989	/*
	990	=for apidoc AmnxUN\|SV *\|PL_parser-E<gt>linestr
	991
	992	Buffer scalar containing the chunk currently under consideration of the
	993	text currently being lexed. This is always a plain string scalar (for
	994	which C<SvPOK> is true). It is not intended to be used as a scalar by
	995	normal scalar means; instead refer to the buffer directly by the pointer
	996	variables described below.
	997
	998	The lexer maintains various C<char*> pointers to things in the
	999	C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
	1000	reallocated, all of these pointers must be updated. Don't attempt to
	1001	do this manually, but rather use L</lex_grow_linestr> if you need to
	1002	reallocate the buffer.
	1003
	1004	The content of the text chunk in the buffer is commonly exactly one
	1005	complete line of input, up to and including a newline terminator,
	1006	but there are situations where it is otherwise. The octets of the
	1007	buffer may be intended to be interpreted as either UTF-8 or Latin-1.
	1008	The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
	1009	flag on this scalar, which may disagree with it.
	1010
	1011	For direct examination of the buffer, the variable
	1012	L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
	1013	lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
	1014	of these pointers is usually preferable to examination of the scalar
	1015	through normal scalar means.
	1016
	1017	=for apidoc AmnxUN\|char *\|PL_parser-E<gt>bufend
	1018
	1019	Direct pointer to the end of the chunk of text currently being lexed, the
	1020	end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
	1021	+ SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is
	1022	always located at the end of the buffer, and does not count as part of
	1023	the buffer's contents.
	1024
	1025	=for apidoc AmnxUN\|char *\|PL_parser-E<gt>bufptr
	1026
	1027	Points to the current position of lexing inside the lexer buffer.
	1028	Characters around this point may be freely examined, within
	1029	the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
	1030	L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
	1031	interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
	1032
	1033	Lexing code (whether in the Perl core or not) moves this pointer past
	1034	the characters that it consumes. It is also expected to perform some
	1035	bookkeeping whenever a newline character is consumed. This movement
	1036	can be more conveniently performed by the function L</lex_read_to>,
	1037	which handles newlines appropriately.
	1038
	1039	Interpretation of the buffer's octets can be abstracted out by
	1040	using the slightly higher-level functions L</lex_peek_unichar> and
	1041	L</lex_read_unichar>.
	1042
	1043	=for apidoc AmnxUN\|char *\|PL_parser-E<gt>linestart
	1044
	1045	Points to the start of the current line inside the lexer buffer.
	1046	This is useful for indicating at which column an error occurred, and
	1047	not much else. This must be updated by any lexing code that consumes
	1048	a newline; the function L</lex_read_to> handles this detail.
	1049
	1050	=cut
	1051	*/
	1052
	1053	/*
	1054	=for apidoc lex_bufutf8
	1055
	1056	Indicates whether the octets in the lexer buffer
	1057	(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
	1058	of Unicode characters. If not, they should be interpreted as Latin-1
	1059	characters. This is analogous to the C<SvUTF8> flag for scalars.
	1060
	1061	In UTF-8 mode, it is not guaranteed that the lexer buffer actually
	1062	contains valid UTF-8. Lexing code must be robust in the face of invalid
	1063	encoding.
	1064
	1065	The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
	1066	is significant, but not the whole story regarding the input character
	1067	encoding. Normally, when a file is being read, the scalar contains octets
	1068	and its C<SvUTF8> flag is off, but the octets should be interpreted as
	1069	UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
	1070	however, the scalar may have the C<SvUTF8> flag on, and in this case its
	1071	octets should be interpreted as UTF-8 unless the C<use bytes> pragma
	1072	is in effect. This logic may change in the future; use this function
	1073	instead of implementing the logic yourself.
	1074
	1075	=cut
	1076	*/
	1077
	1078	bool
	1079	Perl_lex_bufutf8(pTHX)
	1080	{
	1081	return UTF;
	1082	}
	1083
	1084	/*
	1085	=for apidoc lex_grow_linestr
	1086
	1087	Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
	1088	at least C<len> octets (including terminating C<NUL>). Returns a
	1089	pointer to the reallocated buffer. This is necessary before making
	1090	any direct modification of the buffer that would increase its length.
	1091	L</lex_stuff_pvn> provides a more convenient way to insert text into
	1092	the buffer.
	1093
	1094	Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
	1095	this function updates all of the lexer's variables that point directly
	1096	into the buffer.
	1097
	1098	=cut
	1099	*/
	1100
	1101	char *
	1102	Perl_lex_grow_linestr(pTHX_ STRLEN len)
	1103	{
	1104	SV *linestr;
	1105	char *buf;
	1106	STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
	1107	STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
	1108	bool current;
	1109
	1110	linestr = PL_parser->linestr;
	1111	buf = SvPVX(linestr);
	1112	if (len <= SvLEN(linestr))
	1113	return buf;
	1114
	1115	/* Is the lex_shared linestr SV the same as the current linestr SV?
	1116	* Only in this case does re_eval_start need adjusting, since it
	1117	* points within lex_shared->ls_linestr's buffer */
	1118	current = ( !PL_parser->lex_shared->ls_linestr
	1119	\|\| linestr == PL_parser->lex_shared->ls_linestr);
	1120
	1121	bufend_pos = PL_parser->bufend - buf;
	1122	bufptr_pos = PL_parser->bufptr - buf;
	1123	oldbufptr_pos = PL_parser->oldbufptr - buf;
	1124	oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
	1125	linestart_pos = PL_parser->linestart - buf;
	1126	last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
	1127	last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
	1128	re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
	1129	PL_parser->lex_shared->re_eval_start - buf : 0;
	1130
	1131	buf = sv_grow(linestr, len);
	1132
	1133	PL_parser->bufend = buf + bufend_pos;
	1134	PL_parser->bufptr = buf + bufptr_pos;
	1135	PL_parser->oldbufptr = buf + oldbufptr_pos;
	1136	PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
	1137	PL_parser->linestart = buf + linestart_pos;
	1138	if (PL_parser->last_uni)
	1139	PL_parser->last_uni = buf + last_uni_pos;
	1140	if (PL_parser->last_lop)
	1141	PL_parser->last_lop = buf + last_lop_pos;
	1142	if (current && PL_parser->lex_shared->re_eval_start)
	1143	PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
	1144	return buf;
	1145	}
	1146
	1147	/*
	1148	=for apidoc lex_stuff_pvn
	1149
	1150	Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
	1151	immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
	1152	reallocating the buffer if necessary. This means that lexing code that
	1153	runs later will see the characters as if they had appeared in the input.
	1154	It is not recommended to do this as part of normal parsing, and most
	1155	uses of this facility run the risk of the inserted characters being
	1156	interpreted in an unintended manner.
	1157
	1158	The string to be inserted is represented by C<len> octets starting
	1159	at C<pv>. These octets are interpreted as either UTF-8 or Latin-1,
	1160	according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
	1161	The characters are recoded for the lexer buffer, according to how the
	1162	buffer is currently being interpreted (L</lex_bufutf8>). If a string
	1163	to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
	1164	function is more convenient.
	1165
	1166	=for apidoc Amnh\|\|LEX_STUFF_UTF8
	1167
	1168	=cut
	1169	*/
	1170
	1171	void
	1172	Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
	1173	{
	1174	char *bufptr;
	1175	PERL_ARGS_ASSERT_LEX_STUFF_PVN;
	1176	if (flags & ~(LEX_STUFF_UTF8))
	1177	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
	1178	if (UTF) {
	1179	if (flags & LEX_STUFF_UTF8) {
	1180	goto plain_copy;
	1181	} else {
	1182	STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
	1183	(U8 *) pv + len);
	1184	const char p, e = pv+len;;
	1185	if (!highhalf)
	1186	goto plain_copy;
	1187	lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
	1188	bufptr = PL_parser->bufptr;
	1189	Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
	1190	SvCUR_set(PL_parser->linestr,
	1191	SvCUR(PL_parser->linestr) + len+highhalf);
	1192	PL_parser->bufend += len+highhalf;
	1193	for (p = pv; p != e; p++) {
	1194	append_utf8_from_native_byte(p, (U8 *) &bufptr);
	1195	}
	1196	}
	1197	} else {
	1198	if (flags & LEX_STUFF_UTF8) {
	1199	STRLEN highhalf = 0;
	1200	const char p, e = pv+len;
	1201	for (p = pv; p != e; p++) {
	1202	U8 c = (U8)*p;
	1203	if (UTF8_IS_ABOVE_LATIN1(c)) {
	1204	Perl_croak(aTHX_ "Lexing code attempted to stuff "
	1205	"non-Latin-1 character into Latin-1 input");
	1206	} else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
	1207	p++;
	1208	highhalf++;
	1209	} else assert(UTF8_IS_INVARIANT(c));
	1210	}
	1211	if (!highhalf)
	1212	goto plain_copy;
	1213	lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
	1214	bufptr = PL_parser->bufptr;
	1215	Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
	1216	SvCUR_set(PL_parser->linestr,
	1217	SvCUR(PL_parser->linestr) + len-highhalf);
	1218	PL_parser->bufend += len-highhalf;
	1219	p = pv;
	1220	while (p < e) {
	1221	if (UTF8_IS_INVARIANT(*p)) {
	1222	bufptr++ = p;
	1223	p++;
	1224	}
	1225	else {
	1226	assert(p < e -1 );
	1227	bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(p, *(p+1));
	1228	p += 2;
	1229	}
	1230	}
	1231	} else {
	1232	plain_copy:
	1233	lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
	1234	bufptr = PL_parser->bufptr;
	1235	Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
	1236	SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
	1237	PL_parser->bufend += len;
	1238	Copy(pv, bufptr, len, char);
	1239	}
	1240	}
	1241	}
	1242
	1243	/*
	1244	=for apidoc lex_stuff_pv
	1245
	1246	Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
	1247	immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
	1248	reallocating the buffer if necessary. This means that lexing code that
	1249	runs later will see the characters as if they had appeared in the input.
	1250	It is not recommended to do this as part of normal parsing, and most
	1251	uses of this facility run the risk of the inserted characters being
	1252	interpreted in an unintended manner.
	1253
	1254	The string to be inserted is represented by octets starting at C<pv>
	1255	and continuing to the first nul. These octets are interpreted as either
	1256	UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
	1257	in C<flags>. The characters are recoded for the lexer buffer, according
	1258	to how the buffer is currently being interpreted (L</lex_bufutf8>).
	1259	If it is not convenient to nul-terminate a string to be inserted, the
	1260	L</lex_stuff_pvn> function is more appropriate.
	1261
	1262	=cut
	1263	*/
	1264
	1265	void
	1266	Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
	1267	{
	1268	PERL_ARGS_ASSERT_LEX_STUFF_PV;
	1269	lex_stuff_pvn(pv, strlen(pv), flags);
	1270	}
	1271
	1272	/*
	1273	=for apidoc lex_stuff_sv
	1274
	1275	Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
	1276	immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
	1277	reallocating the buffer if necessary. This means that lexing code that
	1278	runs later will see the characters as if they had appeared in the input.
	1279	It is not recommended to do this as part of normal parsing, and most
	1280	uses of this facility run the risk of the inserted characters being
	1281	interpreted in an unintended manner.
	1282
	1283	The string to be inserted is the string value of C<sv>. The characters
	1284	are recoded for the lexer buffer, according to how the buffer is currently
	1285	being interpreted (L</lex_bufutf8>). If a string to be inserted is
	1286	not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
	1287	need to construct a scalar.
	1288
	1289	=cut
	1290	*/
	1291
	1292	void
	1293	Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
	1294	{
	1295	char *pv;
	1296	STRLEN len;
	1297	PERL_ARGS_ASSERT_LEX_STUFF_SV;
	1298	if (flags)
	1299	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
	1300	pv = SvPV(sv, len);
	1301	lex_stuff_pvn(pv, len, flags \| (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
	1302	}
	1303
	1304	/*
	1305	=for apidoc lex_unstuff
	1306
	1307	Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
	1308	C<ptr>. Text following C<ptr> will be moved, and the buffer shortened.
	1309	This hides the discarded text from any lexing code that runs later,
	1310	as if the text had never appeared.
	1311
	1312	This is not the normal way to consume lexed text. For that, use
	1313	L</lex_read_to>.
	1314
	1315	=cut
	1316	*/
	1317
	1318	void
	1319	Perl_lex_unstuff(pTHX_ char *ptr)
	1320	{
	1321	char buf, bufend;
	1322	STRLEN unstuff_len;
	1323	PERL_ARGS_ASSERT_LEX_UNSTUFF;
	1324	buf = PL_parser->bufptr;
	1325	if (ptr < buf)
	1326	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
	1327	if (ptr == buf)
	1328	return;
	1329	bufend = PL_parser->bufend;
	1330	if (ptr > bufend)
	1331	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
	1332	unstuff_len = ptr - buf;
	1333	Move(ptr, buf, bufend+1-ptr, char);
	1334	SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
	1335	PL_parser->bufend = bufend - unstuff_len;
	1336	}
	1337
	1338	/*
	1339	=for apidoc lex_read_to
	1340
	1341	Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
	1342	to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
	1343	performing the correct bookkeeping whenever a newline character is passed.
	1344	This is the normal way to consume lexed text.
	1345
	1346	Interpretation of the buffer's octets can be abstracted out by
	1347	using the slightly higher-level functions L</lex_peek_unichar> and
	1348	L</lex_read_unichar>.
	1349
	1350	=cut
	1351	*/
	1352
	1353	void
	1354	Perl_lex_read_to(pTHX_ char *ptr)
	1355	{
	1356	char *s;
	1357	PERL_ARGS_ASSERT_LEX_READ_TO;
	1358	s = PL_parser->bufptr;
	1359	if (ptr < s \|\| ptr > PL_parser->bufend)
	1360	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
	1361	for (; s != ptr; s++)
	1362	if (*s == '\n') {
	1363	COPLINE_INC_WITH_HERELINES;
	1364	PL_parser->linestart = s+1;
	1365	}
	1366	PL_parser->bufptr = ptr;
	1367	}
	1368
	1369	/*
	1370	=for apidoc lex_discard_to
	1371
	1372	Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
	1373	up to C<ptr>. The remaining content of the buffer will be moved, and
	1374	all pointers into the buffer updated appropriately. C<ptr> must not
	1375	be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
	1376	it is not permitted to discard text that has yet to be lexed.
	1377
	1378	Normally it is not necessarily to do this directly, because it suffices to
	1379	use the implicit discarding behaviour of L</lex_next_chunk> and things
	1380	based on it. However, if a token stretches across multiple lines,
	1381	and the lexing code has kept multiple lines of text in the buffer for
	1382	that purpose, then after completion of the token it would be wise to
	1383	explicitly discard the now-unneeded earlier lines, to avoid future
	1384	multi-line tokens growing the buffer without bound.
	1385
	1386	=cut
	1387	*/
	1388
	1389	void
	1390	Perl_lex_discard_to(pTHX_ char *ptr)
	1391	{
	1392	char *buf;
	1393	STRLEN discard_len;
	1394	PERL_ARGS_ASSERT_LEX_DISCARD_TO;
	1395	buf = SvPVX(PL_parser->linestr);
	1396	if (ptr < buf)
	1397	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
	1398	if (ptr == buf)
	1399	return;
	1400	if (ptr > PL_parser->bufptr)
	1401	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
	1402	discard_len = ptr - buf;
	1403	if (PL_parser->oldbufptr < ptr)
	1404	PL_parser->oldbufptr = ptr;
	1405	if (PL_parser->oldoldbufptr < ptr)
	1406	PL_parser->oldoldbufptr = ptr;
	1407	if (PL_parser->last_uni && PL_parser->last_uni < ptr)
	1408	PL_parser->last_uni = NULL;
	1409	if (PL_parser->last_lop && PL_parser->last_lop < ptr)
	1410	PL_parser->last_lop = NULL;
	1411	Move(ptr, buf, PL_parser->bufend+1-ptr, char);
	1412	SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
	1413	PL_parser->bufend -= discard_len;
	1414	PL_parser->bufptr -= discard_len;
	1415	PL_parser->oldbufptr -= discard_len;
	1416	PL_parser->oldoldbufptr -= discard_len;
	1417	if (PL_parser->last_uni)
	1418	PL_parser->last_uni -= discard_len;
	1419	if (PL_parser->last_lop)
	1420	PL_parser->last_lop -= discard_len;
	1421	}
	1422
	1423	void
	1424	Perl_notify_parser_that_changed_to_utf8(pTHX)
	1425	{
	1426	/* Called when $^H is changed to indicate that HINT_UTF8 has changed from
	1427	* off to on. At compile time, this has the effect of entering a 'use
	1428	* utf8' section. This means that any input was not previously checked for
	1429	* UTF-8 (because it was off), but now we do need to check it, or our
	1430	* assumptions about the input being sane could be wrong, and we could
	1431	* segfault. This routine just sets a flag so that the next time we look
	1432	* at the input we do the well-formed UTF-8 check. If we aren't in the
	1433	* proper phase, there may not be a parser object, but if there is, setting
	1434	* the flag is harmless */
	1435
	1436	if (PL_parser) {
	1437	PL_parser->recheck_utf8_validity = TRUE;
	1438	}
	1439	}
	1440
	1441	/*
	1442	=for apidoc lex_next_chunk
	1443
	1444	Reads in the next chunk of text to be lexed, appending it to
	1445	L</PL_parser-E<gt>linestr>. This should be called when lexing code has
	1446	looked to the end of the current chunk and wants to know more. It is
	1447	usual, but not necessary, for lexing to have consumed the entirety of
	1448	the current chunk at this time.
	1449
	1450	If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
	1451	chunk (i.e., the current chunk has been entirely consumed), normally the
	1452	current chunk will be discarded at the same time that the new chunk is
	1453	read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
	1454	will not be discarded. If the current chunk has not been entirely
	1455	consumed, then it will not be discarded regardless of the flag.
	1456
	1457	Returns true if some new text was added to the buffer, or false if the
	1458	buffer has reached the end of the input text.
	1459
	1460	=for apidoc Amnh\|\|LEX_KEEP_PREVIOUS
	1461
	1462	=cut
	1463	*/
	1464
	1465	#define LEX_FAKE_EOF 0x80000000
	1466	#define LEX_NO_TERM 0x40000000 /* here-doc */
	1467
	1468	bool
	1469	Perl_lex_next_chunk(pTHX_ U32 flags)
	1470	{
	1471	SV *linestr;
	1472	char *buf;
	1473	STRLEN old_bufend_pos, new_bufend_pos;
	1474	STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
	1475	STRLEN linestart_pos, last_uni_pos, last_lop_pos;
	1476	bool got_some_for_debugger = 0;
	1477	bool got_some;
	1478
	1479	if (flags & ~(LEX_KEEP_PREVIOUS\|LEX_FAKE_EOF\|LEX_NO_TERM))
	1480	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
	1481	if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
	1482	return FALSE;
	1483	linestr = PL_parser->linestr;
	1484	buf = SvPVX(linestr);
	1485	if (!(flags & LEX_KEEP_PREVIOUS)
	1486	&& PL_parser->bufptr == PL_parser->bufend)
	1487	{
	1488	old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
	1489	linestart_pos = 0;
	1490	if (PL_parser->last_uni != PL_parser->bufend)
	1491	PL_parser->last_uni = NULL;
	1492	if (PL_parser->last_lop != PL_parser->bufend)
	1493	PL_parser->last_lop = NULL;
	1494	last_uni_pos = last_lop_pos = 0;
	1495	*buf = 0;
	1496	SvCUR_set(linestr, 0);
	1497	} else {
	1498	old_bufend_pos = PL_parser->bufend - buf;
	1499	bufptr_pos = PL_parser->bufptr - buf;
	1500	oldbufptr_pos = PL_parser->oldbufptr - buf;
	1501	oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
	1502	linestart_pos = PL_parser->linestart - buf;
	1503	last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
	1504	last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
	1505	}
	1506	if (flags & LEX_FAKE_EOF) {
	1507	goto eof;
	1508	} else if (!PL_parser->rsfp && !PL_parser->filtered) {
	1509	got_some = 0;
	1510	} else if (filter_gets(linestr, old_bufend_pos)) {
	1511	got_some = 1;
	1512	got_some_for_debugger = 1;
	1513	} else if (flags & LEX_NO_TERM) {
	1514	got_some = 0;
	1515	} else {
	1516	if (!SvPOK(linestr)) /* can get undefined by filter_gets */
	1517	SvPVCLEAR(linestr);
	1518	eof:
	1519	/* End of real input. Close filehandle (unless it was STDIN),
	1520	* then add implicit termination.
	1521	*/
	1522	if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
	1523	PerlIO_clearerr(PL_parser->rsfp);
	1524	else if (PL_parser->rsfp)
	1525	(void)PerlIO_close(PL_parser->rsfp);
	1526	PL_parser->rsfp = NULL;
	1527	PL_parser->in_pod = PL_parser->filtered = 0;
	1528	if (!PL_in_eval && PL_minus_p) {
	1529	sv_catpvs(linestr,
	1530	/{/";}continue{print or die qq(-p destination: $!\\n);}");
	1531	PL_minus_n = PL_minus_p = 0;
	1532	} else if (!PL_in_eval && PL_minus_n) {
	1533	sv_catpvs(linestr, /{/";}");
	1534	PL_minus_n = 0;
	1535	} else
	1536	sv_catpvs(linestr, ";");
	1537	got_some = 1;
	1538	}
	1539	buf = SvPVX(linestr);
	1540	new_bufend_pos = SvCUR(linestr);
	1541	PL_parser->bufend = buf + new_bufend_pos;
	1542	PL_parser->bufptr = buf + bufptr_pos;
	1543
	1544	if (UTF) {
	1545	const U8* first_bad_char_loc;
	1546	if (UNLIKELY(! is_utf8_string_loc(
	1547	(U8 *) PL_parser->bufptr,
	1548	PL_parser->bufend - PL_parser->bufptr,
	1549	&first_bad_char_loc)))
	1550	{
	1551	_force_out_malformed_utf8_message(first_bad_char_loc,
	1552	(U8 *) PL_parser->bufend,
	1553	0,
	1554	1 /* 1 means die */ );
	1555	NOT_REACHED; /* NOTREACHED */
	1556	}
	1557	}
	1558
	1559	PL_parser->oldbufptr = buf + oldbufptr_pos;
	1560	PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
	1561	PL_parser->linestart = buf + linestart_pos;
	1562	if (PL_parser->last_uni)
	1563	PL_parser->last_uni = buf + last_uni_pos;
	1564	if (PL_parser->last_lop)
	1565	PL_parser->last_lop = buf + last_lop_pos;
	1566	if (PL_parser->preambling != NOLINE) {
	1567	CopLINE_set(PL_curcop, PL_parser->preambling + 1);
	1568	PL_parser->preambling = NOLINE;
	1569	}
	1570	if ( got_some_for_debugger
	1571	&& PERLDB_LINE_OR_SAVESRC
	1572	&& PL_curstash != PL_debstash)
	1573	{
	1574	/* debugger active and we're not compiling the debugger code,
	1575	* so store the line into the debugger's array of lines
	1576	*/
	1577	update_debugger_info(NULL, buf+old_bufend_pos,
	1578	new_bufend_pos-old_bufend_pos);
	1579	}
	1580	return got_some;
	1581	}
	1582
	1583	/*
	1584	=for apidoc lex_peek_unichar
	1585
	1586	Looks ahead one (Unicode) character in the text currently being lexed.
	1587	Returns the codepoint (unsigned integer value) of the next character,
	1588	or -1 if lexing has reached the end of the input text. To consume the
	1589	peeked character, use L</lex_read_unichar>.
	1590
	1591	If the next character is in (or extends into) the next chunk of input
	1592	text, the next chunk will be read in. Normally the current chunk will be
	1593	discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
	1594	bit set, then the current chunk will not be discarded.
	1595
	1596	If the input is being interpreted as UTF-8 and a UTF-8 encoding error
	1597	is encountered, an exception is generated.
	1598
	1599	=cut
	1600	*/
	1601
	1602	I32
	1603	Perl_lex_peek_unichar(pTHX_ U32 flags)
	1604	{
	1605	char s, bufend;
	1606	if (flags & ~(LEX_KEEP_PREVIOUS))
	1607	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
	1608	s = PL_parser->bufptr;
	1609	bufend = PL_parser->bufend;
	1610	if (UTF) {
	1611	U8 head;
	1612	I32 unichar;
	1613	STRLEN len, retlen;
	1614	if (s == bufend) {
	1615	if (!lex_next_chunk(flags))
	1616	return -1;
	1617	s = PL_parser->bufptr;
	1618	bufend = PL_parser->bufend;
	1619	}
	1620	head = (U8)*s;
	1621	if (UTF8_IS_INVARIANT(head))
	1622	return head;
	1623	if (UTF8_IS_START(head)) {
	1624	len = UTF8SKIP(&head);
	1625	while ((STRLEN)(bufend-s) < len) {
	1626	if (!lex_next_chunk(flags \| LEX_KEEP_PREVIOUS))
	1627	break;
	1628	s = PL_parser->bufptr;
	1629	bufend = PL_parser->bufend;
	1630	}
	1631	}
	1632	unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
	1633	if (retlen == (STRLEN)-1) {
	1634	_force_out_malformed_utf8_message((U8 *) s,
	1635	(U8 *) bufend,
	1636	0,
	1637	1 /* 1 means die */ );
	1638	NOT_REACHED; /* NOTREACHED */
	1639	}
	1640	return unichar;
	1641	} else {
	1642	if (s == bufend) {
	1643	if (!lex_next_chunk(flags))
	1644	return -1;
	1645	s = PL_parser->bufptr;
	1646	}
	1647	return (U8)*s;
	1648	}
	1649	}
	1650
	1651	/*
	1652	=for apidoc lex_read_unichar
	1653
	1654	Reads the next (Unicode) character in the text currently being lexed.
	1655	Returns the codepoint (unsigned integer value) of the character read,
	1656	and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
	1657	if lexing has reached the end of the input text. To non-destructively
	1658	examine the next character, use L</lex_peek_unichar> instead.
	1659
	1660	If the next character is in (or extends into) the next chunk of input
	1661	text, the next chunk will be read in. Normally the current chunk will be
	1662	discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
	1663	bit set, then the current chunk will not be discarded.
	1664
	1665	If the input is being interpreted as UTF-8 and a UTF-8 encoding error
	1666	is encountered, an exception is generated.
	1667
	1668	=cut
	1669	*/
	1670
	1671	I32
	1672	Perl_lex_read_unichar(pTHX_ U32 flags)
	1673	{
	1674	I32 c;
	1675	if (flags & ~(LEX_KEEP_PREVIOUS))
	1676	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
	1677	c = lex_peek_unichar(flags);
	1678	if (c != -1) {
	1679	if (c == '\n')
	1680	COPLINE_INC_WITH_HERELINES;
	1681	if (UTF)
	1682	PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
	1683	else
	1684	++(PL_parser->bufptr);
	1685	}
	1686	return c;
	1687	}
	1688
	1689	/*
	1690	=for apidoc lex_read_space
	1691
	1692	Reads optional spaces, in Perl style, in the text currently being
	1693	lexed. The spaces may include ordinary whitespace characters and
	1694	Perl-style comments. C<#line> directives are processed if encountered.
	1695	L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
	1696	at a non-space character (or the end of the input text).
	1697
	1698	If spaces extend into the next chunk of input text, the next chunk will
	1699	be read in. Normally the current chunk will be discarded at the same
	1700	time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
	1701	chunk will not be discarded.
	1702
	1703	=cut
	1704	*/
	1705
	1706	#define LEX_NO_INCLINE 0x40000000
	1707	#define LEX_NO_NEXT_CHUNK 0x80000000
	1708
	1709	void
	1710	Perl_lex_read_space(pTHX_ U32 flags)
	1711	{
	1712	char s, bufend;
	1713	const bool can_incline = !(flags & LEX_NO_INCLINE);
	1714	bool need_incline = 0;
	1715	if (flags & ~(LEX_KEEP_PREVIOUS\|LEX_NO_NEXT_CHUNK\|LEX_NO_INCLINE))
	1716	Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
	1717	s = PL_parser->bufptr;
	1718	bufend = PL_parser->bufend;
	1719	while (1) {
	1720	char c = *s;
	1721	if (c == '#') {
	1722	do {
	1723	c = *++s;
	1724	} while (!(c == '\n' \|\| (c == 0 && s == bufend)));
	1725	} else if (c == '\n') {
	1726	s++;
	1727	if (can_incline) {
	1728	PL_parser->linestart = s;
	1729	if (s == bufend)
	1730	need_incline = 1;
	1731	else
	1732	incline(s, bufend);
	1733	}
	1734	} else if (isSPACE(c)) {
	1735	s++;
	1736	} else if (c == 0 && s == bufend) {
	1737	bool got_more;
	1738	line_t l;
	1739	if (flags & LEX_NO_NEXT_CHUNK)
	1740	break;
	1741	PL_parser->bufptr = s;
	1742	l = CopLINE(PL_curcop);
	1743	CopLINE(PL_curcop) += PL_parser->herelines + 1;
	1744	got_more = lex_next_chunk(flags);
	1745	CopLINE_set(PL_curcop, l);
	1746	s = PL_parser->bufptr;
	1747	bufend = PL_parser->bufend;
	1748	if (!got_more)
	1749	break;
	1750	if (can_incline && need_incline && PL_parser->rsfp) {
	1751	incline(s, bufend);
	1752	need_incline = 0;
	1753	}
	1754	} else if (!c) {
	1755	s++;
	1756	} else {
	1757	break;
	1758	}
	1759	}
	1760	PL_parser->bufptr = s;
	1761	}
	1762
	1763	/*
	1764
	1765	=for apidoc validate_proto
	1766
	1767	This function performs syntax checking on a prototype, C<proto>.
	1768	If C<warn> is true, any illegal characters or mismatched brackets
	1769	will trigger illegalproto warnings, declaring that they were
	1770	detected in the prototype for C<name>.
	1771
	1772	The return value is C<true> if this is a valid prototype, and
	1773	C<false> if it is not, regardless of whether C<warn> was C<true> or
	1774	C<false>.
	1775
	1776	Note that C<NULL> is a valid C<proto> and will always return C<true>.
	1777
	1778	=cut
	1779
	1780	*/
	1781
	1782	bool
	1783	Perl_validate_proto(pTHX_ SV name, SV proto, bool warn, bool curstash)
	1784	{
	1785	STRLEN len, origlen;
	1786	char *p;
	1787	bool bad_proto = FALSE;
	1788	bool in_brackets = FALSE;
	1789	bool after_slash = FALSE;
	1790	char greedy_proto = ' ';
	1791	bool proto_after_greedy_proto = FALSE;
	1792	bool must_be_last = FALSE;
	1793	bool underscore = FALSE;
	1794	bool bad_proto_after_underscore = FALSE;
	1795
	1796	PERL_ARGS_ASSERT_VALIDATE_PROTO;
	1797
	1798	if (!proto)
	1799	return TRUE;
	1800
	1801	p = SvPV(proto, len);
	1802	origlen = len;
	1803	for (; len--; p++) {
	1804	if (!isSPACE(*p)) {
	1805	if (must_be_last)
	1806	proto_after_greedy_proto = TRUE;
	1807	if (underscore) {
	1808	if (!memCHRs(";@%", *p))
	1809	bad_proto_after_underscore = TRUE;
	1810	underscore = FALSE;
	1811	}
	1812	if (!memCHRs("$@%;[]&\\_+", p) \|\| *p == '\0') {
	1813	bad_proto = TRUE;
	1814	}
	1815	else {
	1816	if (*p == '[')
	1817	in_brackets = TRUE;
	1818	else if (*p == ']')
	1819	in_brackets = FALSE;
	1820	else if ((p == '@' \|\| p == '%')
	1821	&& !after_slash
	1822	&& !in_brackets )
	1823	{
	1824	must_be_last = TRUE;
	1825	greedy_proto = *p;
	1826	}
	1827	else if (*p == '_')
	1828	underscore = TRUE;
	1829	}
	1830	if (*p == '\\')
	1831	after_slash = TRUE;
	1832	else
	1833	after_slash = FALSE;
	1834	}
	1835	}
	1836
	1837	if (warn) {
	1838	SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
	1839	p -= origlen;
	1840	p = SvUTF8(proto)
	1841	? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP \| SVf_UTF8),
	1842	origlen, UNI_DISPLAY_ISPRINT)
	1843	: pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
	1844
	1845	if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
	1846	SV *name2 = sv_2mortal(newSVsv(PL_curstname));
	1847	sv_catpvs(name2, "::");
	1848	sv_catsv(name2, (SV *)name);
	1849	name = name2;
	1850	}
	1851
	1852	if (proto_after_greedy_proto)
	1853	Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
	1854	"Prototype after '%c' for %" SVf " : %s",
	1855	greedy_proto, SVfARG(name), p);
	1856	if (in_brackets)
	1857	Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
	1858	"Missing ']' in prototype for %" SVf " : %s",
	1859	SVfARG(name), p);
	1860	if (bad_proto)
	1861	Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
	1862	"Illegal character in prototype for %" SVf " : %s",
	1863	SVfARG(name), p);
	1864	if (bad_proto_after_underscore)
	1865	Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
	1866	"Illegal character after '_' in prototype for %" SVf " : %s",
	1867	SVfARG(name), p);
	1868	}
	1869
	1870	return (! (proto_after_greedy_proto \|\| bad_proto) );
	1871	}
	1872
	1873	/*
	1874	* S_incline
	1875	* This subroutine has nothing to do with tilting, whether at windmills
	1876	* or pinball tables. Its name is short for "increment line". It
	1877	* increments the current line number in CopLINE(PL_curcop) and checks
	1878	* to see whether the line starts with a comment of the form
	1879	* # line 500 "foo.pm"
	1880	* If so, it sets the current line number and file to the values in the comment.
	1881	*/
	1882
	1883	STATIC void
	1884	S_incline(pTHX_ const char s, const char end)
	1885	{
	1886	const char *t;
	1887	const char *n;
	1888	const char *e;
	1889	line_t line_num;
	1890	UV uv;
	1891
	1892	PERL_ARGS_ASSERT_INCLINE;
	1893
	1894	assert(end >= s);
	1895
	1896	COPLINE_INC_WITH_HERELINES;
	1897	if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
	1898	&& s+1 == PL_bufend && *s == ';') {
	1899	/* fake newline in string eval */
	1900	CopLINE_dec(PL_curcop);
	1901	return;
	1902	}
	1903	if (*s++ != '#')
	1904	return;
	1905	while (SPACE_OR_TAB(*s))
	1906	s++;
	1907	if (memBEGINs(s, (STRLEN) (end - s), "line"))
	1908	s += sizeof("line") - 1;
	1909	else
	1910	return;
	1911	if (SPACE_OR_TAB(*s))
	1912	s++;
	1913	else
	1914	return;
	1915	while (SPACE_OR_TAB(*s))
	1916	s++;
	1917	if (!isDIGIT(*s))
	1918	return;
	1919
	1920	n = s;
	1921	while (isDIGIT(*s))
	1922	s++;
	1923	if (!SPACE_OR_TAB(s) && s != '\r' && s != '\n' && s != '\0')
	1924	return;
	1925	while (SPACE_OR_TAB(*s))
	1926	s++;
	1927	if (s == '"' && (t = (char ) memchr(s+1, '"', end - s))) {
	1928	s++;
	1929	e = t + 1;
	1930	}
	1931	else {
	1932	t = s;
	1933	while (t && !isSPACE(t))
	1934	t++;
	1935	e = t;
	1936	}
	1937	while (SPACE_OR_TAB(e) \|\| e == '\r' \|\| *e == '\f')
	1938	e++;
	1939	if (e != '\n' && e != '\0')
	1940	return; /* false alarm */
	1941
	1942	if (!grok_atoUV(n, &uv, &e))
	1943	return;
	1944	line_num = ((line_t)uv) - 1;
	1945
	1946	if (t - s > 0) {
	1947	const STRLEN len = t - s;
	1948
	1949	if (!PL_rsfp && !PL_parser->filtered) {
	1950	/* must copy *{"::_<(eval N)[oldfilename:L]"}
	1951	* to {"::_<newfilename"} /
	1952	/* However, the long form of evals is only turned on by the
	1953	debugger - usually they're "(eval %lu)" */
	1954	GV * const cfgv = CopFILEGV(PL_curcop);
	1955	if (cfgv) {
	1956	char smallbuf[128];
	1957	STRLEN tmplen2 = len;
	1958	char *tmpbuf2;
	1959	GV *gv2;
	1960
	1961	if (tmplen2 + 2 <= sizeof smallbuf)
	1962	tmpbuf2 = smallbuf;
	1963	else
	1964	Newx(tmpbuf2, tmplen2 + 2, char);
	1965
	1966	tmpbuf2[0] = '_';
	1967	tmpbuf2[1] = '<';
	1968
	1969	memcpy(tmpbuf2 + 2, s, tmplen2);
	1970	tmplen2 += 2;
	1971
	1972	gv2 = (GV*)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
	1973	if (!isGV(gv2)) {
	1974	gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
	1975	/* adjust ${"::_<newfilename"} to store the new file name */
	1976	GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
	1977	/* The line number may differ. If that is the case,
	1978	alias the saved lines that are in the array.
	1979	Otherwise alias the whole array. */
	1980	if (CopLINE(PL_curcop) == line_num) {
	1981	GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
	1982	GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
	1983	}
	1984	else if (GvAV(cfgv)) {
	1985	AV * const av = GvAV(cfgv);
	1986	const line_t start = CopLINE(PL_curcop)+1;
	1987	SSize_t items = AvFILLp(av) - start;
	1988	if (items > 0) {
	1989	AV * const av2 = GvAVn(gv2);
	1990	SV **svp = AvARRAY(av) + start;
	1991	Size_t l = line_num+1;
	1992	while (items-- && l < SSize_t_MAX && l == (line_t)l)
	1993	av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
	1994	}
	1995	}
	1996	}
	1997
	1998	if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
	1999	}
	2000	}
	2001	CopFILE_free(PL_curcop);
	2002	CopFILE_setn(PL_curcop, s, len);
	2003	}
	2004	CopLINE_set(PL_curcop, line_num);
	2005	}
	2006
	2007	STATIC void
	2008	S_update_debugger_info(pTHX_ SV orig_sv, const char const buf, STRLEN len)
	2009	{
	2010	AV *av = CopFILEAVx(PL_curcop);
	2011	if (av) {
	2012	SV * sv;
	2013	if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
	2014	else {
	2015	sv = *av_fetch(av, 0, 1);
	2016	SvUPGRADE(sv, SVt_PVMG);
	2017	}
	2018	if (!SvPOK(sv)) SvPVCLEAR(sv);
	2019	if (orig_sv)
	2020	sv_catsv(sv, orig_sv);
	2021	else
	2022	sv_catpvn(sv, buf, len);
	2023	if (!SvIOK(sv)) {
	2024	(void)SvIOK_on(sv);
	2025	SvIV_set(sv, 0);
	2026	}
	2027	if (PL_parser->preambling == NOLINE)
	2028	av_store(av, CopLINE(PL_curcop), sv);
	2029	}
	2030	}
	2031
	2032	/*
	2033	* skipspace
	2034	* Called to gobble the appropriate amount and type of whitespace.
	2035	* Skips comments as well.
	2036	* Returns the next character after the whitespace that is skipped.
	2037	*
	2038	* peekspace
	2039	* Same thing, but look ahead without incrementing line numbers or
	2040	* adjusting PL_linestart.
	2041	*/
	2042
	2043	#define skipspace(s) skipspace_flags(s, 0)
	2044	#define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
	2045
	2046	char *
	2047	Perl_skipspace_flags(pTHX_ char *s, U32 flags)
	2048	{
	2049	PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
	2050	if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
	2051	while (s < PL_bufend && (SPACE_OR_TAB(s) \|\| !s))
	2052	s++;
	2053	} else {
	2054	STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
	2055	PL_bufptr = s;
	2056	lex_read_space(flags \| LEX_KEEP_PREVIOUS \|
	2057	(PL_lex_inwhat \|\| PL_lex_state == LEX_FORMLINE ?
	2058	LEX_NO_NEXT_CHUNK : 0));
	2059	s = PL_bufptr;
	2060	PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
	2061	if (PL_linestart > PL_bufptr)
	2062	PL_bufptr = PL_linestart;
	2063	return s;
	2064	}
	2065	return s;
	2066	}
	2067
	2068	/*
	2069	* S_check_uni
	2070	* Check the unary operators to ensure there's no ambiguity in how they're
	2071	* used. An ambiguous piece of code would be:
	2072	* rand + 5
	2073	* This doesn't mean rand() + 5. Because rand() is a unary operator,
	2074	* the +5 is its argument.
	2075	*/
	2076
	2077	STATIC void
	2078	S_check_uni(pTHX)
	2079	{
	2080	const char *s;
	2081
	2082	if (PL_oldoldbufptr != PL_last_uni)
	2083	return;
	2084	while (isSPACE(*PL_last_uni))
	2085	PL_last_uni++;
	2086	s = PL_last_uni;
	2087	while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) \|\| *s == '-')
	2088	s += UTF ? UTF8SKIP(s) : 1;
	2089	if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
	2090	return;
	2091
	2092	Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
	2093	"Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
	2094	UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
	2095	}
	2096
	2097	/*
	2098	* LOP : macro to build a list operator. Its behaviour has been replaced
	2099	* with a subroutine, S_lop() for which LOP is just another name.
	2100	*/
	2101
	2102	#define LOP(f,x) return lop(f,x,s)
	2103
	2104	/*
	2105	* S_lop
	2106	* Build a list operator (or something that might be one). The rules:
	2107	* - if we have a next token, then it's a list operator (no parens) for
	2108	* which the next token has already been parsed; e.g.,
	2109	* sort foo @args
	2110	* sort foo (@args)
	2111	* - if the next thing is an opening paren, then it's a function
	2112	* - else it's a list operator
	2113	*/
	2114
	2115	STATIC I32
	2116	S_lop(pTHX_ I32 f, U8 x, char *s)
	2117	{
	2118	PERL_ARGS_ASSERT_LOP;
	2119
	2120	pl_yylval.ival = f;
	2121	CLINE;
	2122	PL_bufptr = s;
	2123	PL_last_lop = PL_oldbufptr;
	2124	PL_last_lop_op = (OPCODE)f;
	2125	if (PL_nexttoke)
	2126	goto lstop;
	2127	PL_expect = x;
	2128	if (*s == '(')
	2129	return REPORT(FUNC);
	2130	s = skipspace(s);
	2131	if (*s == '(')
	2132	return REPORT(FUNC);
	2133	else {
	2134	lstop:
	2135	if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
	2136	PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
	2137	return REPORT(LSTOP);
	2138	}
	2139	}
	2140
	2141	/*
	2142	* S_force_next
	2143	* When the lexer realizes it knows the next token (for instance,
	2144	* it is reordering tokens for the parser) then it can call S_force_next
	2145	* to know what token to return the next time the lexer is called. Caller
	2146	* will need to set PL_nextval[] and possibly PL_expect to ensure
	2147	* the lexer handles the token correctly.
	2148	*/
	2149
	2150	STATIC void
	2151	S_force_next(pTHX_ I32 type)
	2152	{
	2153	#ifdef DEBUGGING
	2154	if (DEBUG_T_TEST) {
	2155	PerlIO_printf(Perl_debug_log, "### forced token:\n");
	2156	tokereport(type, &NEXTVAL_NEXTTOKE);
	2157	}
	2158	#endif
	2159	assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
	2160	PL_nexttype[PL_nexttoke] = type;
	2161	PL_nexttoke++;
	2162	}
	2163
	2164	/*
	2165	* S_postderef
	2166	*
	2167	* This subroutine handles postfix deref syntax after the arrow has already
	2168	* been emitted. @* $* etc. are emitted as two separate tokens right here.
	2169	* @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
	2170	* only the first, leaving yylex to find the next.
	2171	*/
	2172
	2173	static int
	2174	S_postderef(pTHX_ int const funny, char const next)
	2175	{
	2176	assert(funny == DOLSHARP
	2177	\|\| funny == PERLY_DOLLAR
	2178	\|\| funny == PERLY_SNAIL
	2179	\|\| funny == PERLY_PERCENT_SIGN
	2180	\|\| funny == PERLY_AMPERSAND
	2181	\|\| funny == PERLY_STAR
	2182	);
	2183	if (next == '*') {
	2184	PL_expect = XOPERATOR;
	2185	if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
	2186	assert(PERLY_SNAIL == funny \|\| PERLY_DOLLAR == funny \|\| DOLSHARP == funny);
	2187	PL_lex_state = LEX_INTERPEND;
	2188	if (PERLY_SNAIL == funny)
	2189	force_next(POSTJOIN);
	2190	}
	2191	force_next(PERLY_STAR);
	2192	PL_bufptr+=2;
	2193	}
	2194	else {
	2195	if (PERLY_SNAIL == funny && PL_lex_state == LEX_INTERPNORMAL
	2196	&& !PL_lex_brackets)
	2197	PL_lex_dojoin = 2;
	2198	PL_expect = XOPERATOR;
	2199	PL_bufptr++;
	2200	}
	2201	return funny;
	2202	}
	2203
	2204	void
	2205	Perl_yyunlex(pTHX)
	2206	{
	2207	int yyc = PL_parser->yychar;
	2208	if (yyc != YYEMPTY) {
	2209	if (yyc) {
	2210	NEXTVAL_NEXTTOKE = PL_parser->yylval;
	2211	if (yyc == PERLY_BRACE_OPEN \|\| yyc == HASHBRACK \|\| yyc == PERLY_BRACKET_OPEN) {
	2212	PL_lex_allbrackets--;
	2213	PL_lex_brackets--;
	2214	yyc \|= (3<<24) \| (PL_lex_brackstack[PL_lex_brackets] << 16);
	2215	} else if (yyc == PERLY_PAREN_OPEN) {
	2216	PL_lex_allbrackets--;
	2217	yyc \|= (2<<24);
	2218	}
	2219	force_next(yyc);
	2220	}
	2221	PL_parser->yychar = YYEMPTY;
	2222	}
	2223	}
	2224
	2225	STATIC SV *
	2226	S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
	2227	{
	2228	SV * const sv = newSVpvn_utf8(start, len,
	2229	! IN_BYTES
	2230	&& UTF
	2231	&& len != 0
	2232	&& is_utf8_non_invariant_string((const U8*)start, len));
	2233	return sv;
	2234	}
	2235
	2236	/*
	2237	* S_force_word
	2238	* When the lexer knows the next thing is a word (for instance, it has
	2239	* just seen -> and it knows that the next char is a word char, then
	2240	* it calls S_force_word to stick the next word into the PL_nexttoke/val
	2241	* lookahead.
	2242	*
	2243	* Arguments:
	2244	* char *start : buffer position (must be within PL_linestr)
	2245	* int token : PL_next* will be this type of bare word
	2246	* (e.g., METHCALL0,BAREWORD)
	2247	* int check_keyword : if true, Perl checks to make sure the word isn't
	2248	* a keyword (do this if the word is a label, e.g. goto FOO)
	2249	* int allow_pack : if true, : characters will also be allowed (require,
	2250	* use, etc. do this)
	2251	*/
	2252
	2253	STATIC char *
	2254	S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
	2255	{
	2256	char *s;
	2257	STRLEN len;
	2258
	2259	PERL_ARGS_ASSERT_FORCE_WORD;
	2260
	2261	start = skipspace(start);
	2262	s = start;
	2263	if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
	2264	\|\| (allow_pack && *s == ':' && s[1] == ':') )
	2265	{
	2266	s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len, allow_pack);
	2267	if (check_keyword) {
	2268	char *s2 = PL_tokenbuf;
	2269	STRLEN len2 = len;
	2270	if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
	2271	s2 += sizeof("CORE::") - 1;
	2272	len2 -= sizeof("CORE::") - 1;
	2273	}
	2274	if (keyword(s2, len2, 0))
	2275	return start;
	2276	}
	2277	if (token == METHCALL0) {
	2278	s = skipspace(s);
	2279	if (*s == '(')
	2280	PL_expect = XTERM;
	2281	else {
	2282	PL_expect = XOPERATOR;
	2283	}
	2284	}
	2285	NEXTVAL_NEXTTOKE.opval
	2286	= newSVOP(OP_CONST,0,
	2287	S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
	2288	NEXTVAL_NEXTTOKE.opval->op_private \|= OPpCONST_BARE;
	2289	force_next(token);
	2290	}
	2291	return s;
	2292	}
	2293
	2294	/*
	2295	* S_force_ident
	2296	* Called when the lexer wants $foo *foo &foo etc, but the program
	2297	* text only contains the "foo" portion. The first argument is a pointer
	2298	* to the "foo", and the second argument is the type symbol to prefix.
	2299	* Forces the next token to be a "BAREWORD".
	2300	* Creates the symbol if it didn't already exist (via gv_fetchpv()).
	2301	*/
	2302
	2303	STATIC void
	2304	S_force_ident(pTHX_ const char *s, int kind)
	2305	{
	2306	PERL_ARGS_ASSERT_FORCE_IDENT;
	2307
	2308	if (s[0]) {
	2309	const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
	2310	OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
	2311	UTF ? SVf_UTF8 : 0));
	2312	NEXTVAL_NEXTTOKE.opval = o;
	2313	force_next(BAREWORD);
	2314	if (kind) {
	2315	o->op_private = OPpCONST_ENTERED;
	2316	/* XXX see note in pp_entereval() for why we forgo typo
	2317	warnings if the symbol must be introduced in an eval.
	2318	GSAR 96-10-12 */
	2319	gv_fetchpvn_flags(s, len,
	2320	(PL_in_eval ? GV_ADDMULTI
	2321	: GV_ADD) \| ( UTF ? SVf_UTF8 : 0 ),
	2322	kind == PERLY_DOLLAR ? SVt_PV :
	2323	kind == PERLY_SNAIL ? SVt_PVAV :
	2324	kind == PERLY_PERCENT_SIGN ? SVt_PVHV :
	2325	SVt_PVGV
	2326	);
	2327	}
	2328	}
	2329	}
	2330
	2331	static void
	2332	S_force_ident_maybe_lex(pTHX_ char pit)
	2333	{
	2334	NEXTVAL_NEXTTOKE.ival = pit;
	2335	force_next('p');
	2336	}
	2337
	2338	NV
	2339	Perl_str_to_version(pTHX_ SV *sv)
	2340	{
	2341	NV retval = 0.0;
	2342	NV nshift = 1.0;
	2343	STRLEN len;
	2344	const char *start = SvPV_const(sv,len);
	2345	const char * const end = start + len;
	2346	const bool utf = cBOOL(SvUTF8(sv));
	2347
	2348	PERL_ARGS_ASSERT_STR_TO_VERSION;
	2349
	2350	while (start < end) {
	2351	STRLEN skip;
	2352	UV n;
	2353	if (utf)
	2354	n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
	2355	else {
	2356	n = (U8)start;
	2357	skip = 1;
	2358	}
	2359	retval += ((NV)n)/nshift;
	2360	start += skip;
	2361	nshift *= 1000;
	2362	}
	2363	return retval;
	2364	}
	2365
	2366	/*
	2367	* S_force_version
	2368	* Forces the next token to be a version number.
	2369	* If the next token appears to be an invalid version number, (e.g. "v2b"),
	2370	* and if "guessing" is TRUE, then no new token is created (and the caller
	2371	* must use an alternative parsing method).
	2372	*/
	2373
	2374	STATIC char *
	2375	S_force_version(pTHX_ char *s, int guessing)
	2376	{
	2377	OP *version = NULL;
	2378	char *d;
	2379
	2380	PERL_ARGS_ASSERT_FORCE_VERSION;
	2381
	2382	s = skipspace(s);
	2383
	2384	d = s;
	2385	if (*d == 'v')
	2386	d++;
	2387	if (isDIGIT(*d)) {
	2388	while (isDIGIT(d) \|\| d == '_' \|\| *d == '.')
	2389	d++;
	2390	if (d == ';' \|\| isSPACE(d) \|\| d == '{' \|\| d == '}' \|\| !*d) {
	2391	SV *ver;
	2392	s = scan_num(s, &pl_yylval);
	2393	version = pl_yylval.opval;
	2394	ver = cSVOPx(version)->op_sv;
	2395	if (SvPOK(ver) && !SvNIOK(ver)) {
	2396	SvUPGRADE(ver, SVt_PVNV);
	2397	SvNV_set(ver, str_to_version(ver));
	2398	SvNOK_on(ver); /* hint that it is a version */
	2399	}
	2400	}
	2401	else if (guessing) {
	2402	return s;
	2403	}
	2404	}
	2405
	2406	/* NOTE: The parser sees the package name and the VERSION swapped */
	2407	NEXTVAL_NEXTTOKE.opval = version;
	2408	force_next(BAREWORD);
	2409
	2410	return s;
	2411	}
	2412
	2413	/*
	2414	* S_force_strict_version
	2415	* Forces the next token to be a version number using strict syntax rules.
	2416	*/
	2417
	2418	STATIC char *
	2419	S_force_strict_version(pTHX_ char *s)
	2420	{
	2421	OP *version = NULL;
	2422	const char *errstr = NULL;
	2423
	2424	PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
	2425
	2426	while (isSPACE(s)) / leading whitespace */
	2427	s++;
	2428
	2429	if (is_STRICT_VERSION(s,&errstr)) {
	2430	SV *ver = newSV_type(SVt_NULL);
	2431	s = (char *)scan_version(s, ver, 0);
	2432	version = newSVOP(OP_CONST, 0, ver);
	2433	}
	2434	else if ((s != ';' && s != ':' && s != '{' && s != '}' )
	2435	&& (s = skipspace(s), (s != ';' && s != ':' && s != '{' && s != '}' )))
	2436	{
	2437	PL_bufptr = s;
	2438	if (errstr)
	2439	yyerror(errstr); /* version required */
	2440	return s;
	2441	}
	2442
	2443	/* NOTE: The parser sees the package name and the VERSION swapped */
	2444	NEXTVAL_NEXTTOKE.opval = version;
	2445	force_next(BAREWORD);
	2446
	2447	return s;
	2448	}
	2449
	2450	/*
	2451	* S_tokeq
	2452	* Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
	2453	* modified as necessary. However, if HINT_NEW_STRING is on, 'sv' is
	2454	* unchanged, and a new SV containing the modified input is returned.
	2455	*/
	2456
	2457	STATIC SV *
	2458	S_tokeq(pTHX_ SV *sv)
	2459	{
	2460	char *s;
	2461	char *send;
	2462	char *d;
	2463	SV *pv = sv;
	2464
	2465	PERL_ARGS_ASSERT_TOKEQ;
	2466
	2467	assert (SvPOK(sv));
	2468	assert (SvLEN(sv));
	2469	assert (!SvIsCOW(sv));
	2470	if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
	2471	goto finish;
	2472	s = SvPVX(sv);
	2473	send = SvEND(sv);
	2474	/* This is relying on the SV being "well formed" with a trailing '\0' */
	2475	while (s < send && !(*s == '\\' && s[1] == '\\'))
	2476	s++;
	2477	if (s == send)
	2478	goto finish;
	2479	d = s;
	2480	if ( PL_hints & HINT_NEW_STRING ) {
	2481	pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
	2482	SVs_TEMP \| SvUTF8(sv));
	2483	}
	2484	while (s < send) {
	2485	if (*s == '\\') {
	2486	if (s + 1 < send && (s[1] == '\\'))
	2487	s++; /* all that, just for this */
	2488	}
	2489	d++ = s++;
	2490	}
	2491	*d = '\0';
	2492	SvCUR_set(sv, d - SvPVX_const(sv));
	2493	finish:
	2494	if ( PL_hints & HINT_NEW_STRING )
	2495	return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
	2496	return sv;
	2497	}
	2498
	2499	/*
	2500	* Now come three functions related to double-quote context,
	2501	* S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
	2502	* converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
	2503	* interact with PL_lex_state, and create fake ( ... ) argument lists
	2504	* to handle functions and concatenation.
	2505	* For example,
	2506	* "foo\lbar"
	2507	* is tokenised as
	2508	* stringify ( const[foo] concat lcfirst ( const[bar] ) )
	2509	*/
	2510
	2511	/*
	2512	* S_sublex_start
	2513	* Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
	2514	*
	2515	* Pattern matching will set PL_lex_op to the pattern-matching op to
	2516	* make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
	2517	*
	2518	* OP_CONST is easy--just make the new op and return.
	2519	*
	2520	* Everything else becomes a FUNC.
	2521	*
	2522	* Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
	2523	* had an OP_CONST. This just sets us up for a
	2524	* call to S_sublex_push().
	2525	*/
	2526
	2527	STATIC I32
	2528	S_sublex_start(pTHX)
	2529	{
	2530	const I32 op_type = pl_yylval.ival;
	2531
	2532	if (op_type == OP_NULL) {
	2533	pl_yylval.opval = PL_lex_op;
	2534	PL_lex_op = NULL;
	2535	return THING;
	2536	}
	2537	if (op_type == OP_CONST) {
	2538	SV *sv = PL_lex_stuff;
	2539	PL_lex_stuff = NULL;
	2540	sv = tokeq(sv);
	2541
	2542	if (SvTYPE(sv) == SVt_PVIV) {
	2543	/* Overloaded constants, nothing fancy: Convert to SVt_PV: */
	2544	STRLEN len;
	2545	const char * const p = SvPV_const(sv, len);
	2546	SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
	2547	SvREFCNT_dec(sv);
	2548	sv = nsv;
	2549	}
	2550	pl_yylval.opval = newSVOP(op_type, 0, sv);
	2551	return THING;
	2552	}
	2553
	2554	PL_parser->lex_super_state = PL_lex_state;
	2555	PL_parser->lex_sub_inwhat = (U16)op_type;
	2556	PL_parser->lex_sub_op = PL_lex_op;
	2557	PL_parser->sub_no_recover = FALSE;
	2558	PL_parser->sub_error_count = PL_error_count;
	2559	PL_lex_state = LEX_INTERPPUSH;
	2560
	2561	PL_expect = XTERM;
	2562	if (PL_lex_op) {
	2563	pl_yylval.opval = PL_lex_op;
	2564	PL_lex_op = NULL;
	2565	return PMFUNC;
	2566	}
	2567	else
	2568	return FUNC;
	2569	}
	2570
	2571	/*
	2572	* S_sublex_push
	2573	* Create a new scope to save the lexing state. The scope will be
	2574	* ended in S_sublex_done. Returns a '(', starting the function arguments
	2575	* to the uc, lc, etc. found before.
	2576	* Sets PL_lex_state to LEX_INTERPCONCAT.
	2577	*/
	2578
	2579	STATIC I32
	2580	S_sublex_push(pTHX)
	2581	{
	2582	LEXSHARED *shared;
	2583	const bool is_heredoc = PL_multi_close == '<';
	2584	ENTER;
	2585
	2586	PL_lex_state = PL_parser->lex_super_state;
	2587	SAVEI8(PL_lex_dojoin);
	2588	SAVEI32(PL_lex_brackets);
	2589	SAVEI32(PL_lex_allbrackets);
	2590	SAVEI32(PL_lex_formbrack);
	2591	SAVEI8(PL_lex_fakeeof);
	2592	SAVEI32(PL_lex_casemods);
	2593	SAVEI32(PL_lex_starts);
	2594	SAVEI8(PL_lex_state);
	2595	SAVESPTR(PL_lex_repl);
	2596	SAVEVPTR(PL_lex_inpat);
	2597	SAVEI16(PL_lex_inwhat);
	2598	if (is_heredoc)
	2599	{
	2600	SAVECOPLINE(PL_curcop);
	2601	SAVEI32(PL_multi_end);
	2602	SAVEI32(PL_parser->herelines);
	2603	PL_parser->herelines = 0;
	2604	}
	2605	SAVEIV(PL_multi_close);
	2606	SAVEPPTR(PL_bufptr);
	2607	SAVEPPTR(PL_bufend);
	2608	SAVEPPTR(PL_oldbufptr);
	2609	SAVEPPTR(PL_oldoldbufptr);
	2610	SAVEPPTR(PL_last_lop);
	2611	SAVEPPTR(PL_last_uni);
	2612	SAVEPPTR(PL_linestart);
	2613	SAVESPTR(PL_linestr);
	2614	SAVEGENERICPV(PL_lex_brackstack);
	2615	SAVEGENERICPV(PL_lex_casestack);
	2616	SAVEGENERICPV(PL_parser->lex_shared);
	2617	SAVEBOOL(PL_parser->lex_re_reparsing);
	2618	SAVEI32(PL_copline);
	2619
	2620	/* The here-doc parser needs to be able to peek into outer lexing
	2621	scopes to find the body of the here-doc. So we put PL_linestr and
	2622	PL_bufptr into lex_shared, to 'share' those values.
	2623	*/
	2624	PL_parser->lex_shared->ls_linestr = PL_linestr;
	2625	PL_parser->lex_shared->ls_bufptr = PL_bufptr;
	2626
	2627	PL_linestr = PL_lex_stuff;
	2628	PL_lex_repl = PL_parser->lex_sub_repl;
	2629	PL_lex_stuff = NULL;
	2630	PL_parser->lex_sub_repl = NULL;
	2631
	2632	/* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
	2633	set for an inner quote-like operator and then an error causes scope-
	2634	popping. We must not have a PL_lex_stuff value left dangling, as
	2635	that breaks assumptions elsewhere. See bug #123617. */
	2636	SAVEGENERICSV(PL_lex_stuff);
	2637	SAVEGENERICSV(PL_parser->lex_sub_repl);
	2638
	2639	PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
	2640	= SvPVX(PL_linestr);
	2641	PL_bufend += SvCUR(PL_linestr);
	2642	PL_last_lop = PL_last_uni = NULL;
	2643	SAVEFREESV(PL_linestr);
	2644	if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
	2645
	2646	PL_lex_dojoin = FALSE;
	2647	PL_lex_brackets = PL_lex_formbrack = 0;
	2648	PL_lex_allbrackets = 0;
	2649	PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
	2650	Newx(PL_lex_brackstack, 120, char);
	2651	Newx(PL_lex_casestack, 12, char);
	2652	PL_lex_casemods = 0;
	2653	*PL_lex_casestack = '\0';
	2654	PL_lex_starts = 0;
	2655	PL_lex_state = LEX_INTERPCONCAT;
	2656	if (is_heredoc)
	2657	CopLINE_set(PL_curcop, (line_t)PL_multi_start);
	2658	PL_copline = NOLINE;
	2659
	2660	Newxz(shared, 1, LEXSHARED);
	2661	shared->ls_prev = PL_parser->lex_shared;
	2662	PL_parser->lex_shared = shared;
	2663
	2664	PL_lex_inwhat = PL_parser->lex_sub_inwhat;
	2665	if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
	2666	if (PL_lex_inwhat == OP_MATCH \|\| PL_lex_inwhat == OP_QR \|\| PL_lex_inwhat == OP_SUBST)
	2667	PL_lex_inpat = PL_parser->lex_sub_op;
	2668	else
	2669	PL_lex_inpat = NULL;
	2670
	2671	PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
	2672	PL_in_eval &= ~EVAL_RE_REPARSING;
	2673
	2674	return SUBLEXSTART;
	2675	}
	2676
	2677	/*
	2678	* S_sublex_done
	2679	* Restores lexer state after a S_sublex_push.
	2680	*/
	2681
	2682	STATIC I32
	2683	S_sublex_done(pTHX)
	2684	{
	2685	if (!PL_lex_starts++) {
	2686	SV * const sv = newSVpvs("");
	2687	if (SvUTF8(PL_linestr))
	2688	SvUTF8_on(sv);
	2689	PL_expect = XOPERATOR;
	2690	pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
	2691	return THING;
	2692	}
	2693
	2694	if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
	2695	PL_lex_state = LEX_INTERPCASEMOD;
	2696	return yylex();
	2697	}
	2698
	2699	/* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
	2700	assert(PL_lex_inwhat != OP_TRANSR);
	2701	if (PL_lex_repl) {
	2702	assert (PL_lex_inwhat == OP_SUBST \|\| PL_lex_inwhat == OP_TRANS);
	2703	PL_linestr = PL_lex_repl;
	2704	PL_lex_inpat = 0;
	2705	PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
	2706	PL_bufend += SvCUR(PL_linestr);
	2707	PL_last_lop = PL_last_uni = NULL;
	2708	PL_lex_dojoin = FALSE;
	2709	PL_lex_brackets = 0;
	2710	PL_lex_allbrackets = 0;
	2711	PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
	2712	PL_lex_casemods = 0;
	2713	*PL_lex_casestack = '\0';
	2714	PL_lex_starts = 0;
	2715	if (SvEVALED(PL_lex_repl)) {
	2716	PL_lex_state = LEX_INTERPNORMAL;
	2717	PL_lex_starts++;
	2718	/* we don't clear PL_lex_repl here, so that we can check later
	2719	whether this is an evalled subst; that means we rely on the
	2720	logic to ensure sublex_done() is called again only via the
	2721	branch (in yylex()) that clears PL_lex_repl, else we'll loop */
	2722	}
	2723	else {
	2724	PL_lex_state = LEX_INTERPCONCAT;
	2725	PL_lex_repl = NULL;
	2726	}
	2727	if (SvTYPE(PL_linestr) >= SVt_PVNV) {
	2728	CopLINE(PL_curcop) +=
	2729	((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
	2730	+ PL_parser->herelines;
	2731	PL_parser->herelines = 0;
	2732	}
	2733	return PERLY_SLASH;
	2734	}
	2735	else {
	2736	const line_t l = CopLINE(PL_curcop);
	2737	LEAVE;
	2738	if (PL_parser->sub_error_count != PL_error_count) {
	2739	if (PL_parser->sub_no_recover) {
	2740	yyquit();
	2741	NOT_REACHED;
	2742	}
	2743	}
	2744	if (PL_multi_close == '<')
	2745	PL_parser->herelines += l - PL_multi_end;
	2746	PL_bufend = SvPVX(PL_linestr);
	2747	PL_bufend += SvCUR(PL_linestr);
	2748	PL_expect = XOPERATOR;
	2749	return SUBLEXEND;
	2750	}
	2751	}
	2752
	2753	HV *
	2754	Perl_load_charnames(pTHX_ SV * char_name, const char * context,
	2755	const STRLEN context_len, const char ** error_msg)
	2756	{
	2757	/* Load the official _charnames module if not already there. The
	2758	* parameters are just to give info for any error messages generated:
	2759	* char_name a name to look up which is the reason for loading this
	2760	* context 'char_name' in the context in the input in which it appears
	2761	* context_len how many bytes 'context' occupies
	2762	* error_msg *error_msg will be set to any error
	2763	*
	2764	* Returns the ^H table if success; otherwise NULL */
	2765
	2766	unsigned int i;
	2767	HV * table;
	2768	SV **cvp;
	2769	SV * res;
	2770
	2771	PERL_ARGS_ASSERT_LOAD_CHARNAMES;
	2772
	2773	/* This loop is executed 1 1/2 times. On the first time through, if it
	2774	* isn't already loaded, try loading it, and iterate just once to see if it
	2775	* worked. */
	2776	for (i = 0; i < 2; i++) {
	2777	table = GvHV(PL_hintgv); /* ^H */
	2778
	2779	if ( table
	2780	&& (PL_hints & HINT_LOCALIZE_HH)
	2781	&& (cvp = hv_fetchs(table, "charnames", FALSE))
	2782	&& SvOK(*cvp))
	2783	{
	2784	return table; /* Quit if already loaded */
	2785	}
	2786
	2787	if (i == 0) {
	2788	Perl_load_module(aTHX_
	2789	0,
	2790	newSVpvs("_charnames"),
	2791
	2792	/* version parameter; no need to specify it, as if we get too early
	2793	* a version, will fail anyway, not being able to find 'charnames'
	2794	* */
	2795	NULL,
	2796	newSVpvs(":full"),
	2797	newSVpvs(":short"),
	2798	NULL);
	2799	}
	2800	}
	2801
	2802	/* Here, it failed; new_constant will give appropriate error messages */
	2803	*error_msg = NULL;
	2804	res = new_constant( NULL, 0, "charnames", char_name, NULL,
	2805	context, context_len, error_msg);
	2806	SvREFCNT_dec(res);
	2807
	2808	return NULL;
	2809	}
	2810
	2811	STATIC SV*
	2812	S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
	2813	{
	2814	/* This justs wraps get_and_check_backslash_N_name() to output any error
	2815	* message it returns. */
	2816
	2817	const char * error_msg = NULL;
	2818	SV * result;
	2819
	2820	PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
	2821
	2822	/* charnames doesn't work well if there have been errors found */
	2823	if (PL_error_count > 0) {
	2824	return NULL;
	2825	}
	2826
	2827	result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
	2828
	2829	if (error_msg) {
	2830	yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
	2831	}
	2832
	2833	return result;
	2834	}
	2835
	2836	SV*
	2837	Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
	2838	const char* e,
	2839	const bool is_utf8,
	2840	const char ** error_msg)
	2841	{
	2842	/* <s> points to first character of interior of \N{}, <e> to one beyond the
	2843	* interior, hence to the "}". Finds what the name resolves to, returning
	2844	* an SV* containing it; NULL if no valid one found.
	2845	*
	2846	* 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
	2847	* doesn't have to be. */
	2848
	2849	SV* char_name;
	2850	SV* res;
	2851	HV * table;
	2852	SV **cvp;
	2853	SV *cv;
	2854	SV *rv;
	2855	HV *stash;
	2856
	2857	/* Points to the beginning of the \N{... so that any messages include the
	2858	* context of what's failing*/
	2859	const char* context = s - 3;
	2860	STRLEN context_len = e - context + 1; /* include all of \N{...} */
	2861
	2862
	2863	PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
	2864
	2865	assert(e >= s);
	2866	assert(s > (char *) 3);
	2867
	2868	while (s < e && isBLANK(*s)) {
	2869	s++;
	2870	}
	2871
	2872	while (s < e && isBLANK(*(e - 1))) {
	2873	e--;
	2874	}
	2875
	2876	char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
	2877
	2878	if (!SvCUR(char_name)) {
	2879	SvREFCNT_dec_NN(char_name);
	2880	/* diag_listed_as: Unknown charname '%s' */
	2881	*error_msg = Perl_form(aTHX_ "Unknown charname ''");
	2882	return NULL;
	2883	}
	2884
	2885	/* Autoload the charnames module */
	2886
	2887	table = load_charnames(char_name, context, context_len, error_msg);
	2888	if (table == NULL) {
	2889	return NULL;
	2890	}
	2891
	2892	*error_msg = NULL;
	2893	res = new_constant( NULL, 0, "charnames", char_name, NULL,
	2894	context, context_len, error_msg);
	2895	if (*error_msg) {
	2896	*error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name));
	2897
	2898	SvREFCNT_dec(res);
	2899	return NULL;
	2900	}
	2901
	2902	/* See if the charnames handler is the Perl core's, and if so, we can skip
	2903	* the validation needed for a user-supplied one, as Perl's does its own
	2904	* validation. */
	2905	cvp = hv_fetchs(table, "charnames", FALSE);
	2906	if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
	2907	SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
	2908	{
	2909	const char * const name = HvNAME(stash);
	2910	if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
	2911	return res;
	2912	}
	2913	}
	2914
	2915	/* Here, it isn't Perl's charname handler. We can't rely on a
	2916	* user-supplied handler to validate the input name. For non-ut8 input,
	2917	* look to see that the first character is legal. Then loop through the
	2918	* rest checking that each is a continuation */
	2919
	2920	/* This code makes the reasonable assumption that the only Latin1-range
	2921	* characters that begin a character name alias are alphabetic, otherwise
	2922	* would have to create a isCHARNAME_BEGIN macro */
	2923
	2924	if (! is_utf8) {
	2925	if (! isALPHAU(*s)) {
	2926	goto bad_charname;
	2927	}
	2928	s++;
	2929	while (s < e) {
	2930	if (! isCHARNAME_CONT(*s)) {
	2931	goto bad_charname;
	2932	}
	2933	if (s == ' ' && (s-1) == ' ') {
	2934	goto multi_spaces;
	2935	}
	2936	s++;
	2937	}
	2938	}
	2939	else {
	2940	/* Similarly for utf8. For invariants can check directly; for other
	2941	* Latin1, can calculate their code point and check; otherwise use an
	2942	* inversion list */
	2943	if (UTF8_IS_INVARIANT(*s)) {
	2944	if (! isALPHAU(*s)) {
	2945	goto bad_charname;
	2946	}
	2947	s++;
	2948	} else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
	2949	if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(s, (s+1)))) {
	2950	goto bad_charname;
	2951	}
	2952	s += 2;
	2953	}
	2954	else {
	2955	if (! _invlist_contains_cp(PL_utf8_charname_begin,
	2956	utf8_to_uvchr_buf((U8 *) s,
	2957	(U8 *) e,
	2958	NULL)))
	2959	{
	2960	goto bad_charname;
	2961	}
	2962	s += UTF8SKIP(s);
	2963	}
	2964
	2965	while (s < e) {
	2966	if (UTF8_IS_INVARIANT(*s)) {
	2967	if (! isCHARNAME_CONT(*s)) {
	2968	goto bad_charname;
	2969	}
	2970	if (s == ' ' && (s-1) == ' ') {
	2971	goto multi_spaces;
	2972	}
	2973	s++;
	2974	}
	2975	else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
	2976	if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(s, (s+1))))
	2977	{
	2978	goto bad_charname;
	2979	}
	2980	s += 2;
	2981	}
	2982	else {
	2983	if (! _invlist_contains_cp(PL_utf8_charname_continue,
	2984	utf8_to_uvchr_buf((U8 *) s,
	2985	(U8 *) e,
	2986	NULL)))
	2987	{
	2988	goto bad_charname;
	2989	}
	2990	s += UTF8SKIP(s);
	2991	}
	2992	}
	2993	}
	2994	if (*(s-1) == ' ') {
	2995	/* diag_listed_as: charnames alias definitions may not contain
	2996	trailing white-space; marked by <-- HERE in %s
	2997	*/
	2998	*error_msg = Perl_form(aTHX_
	2999	"charnames alias definitions may not contain trailing "
	3000	"white-space; marked by <-- HERE in %.s<-- HERE %.s",
	3001	(int)(s - context + 1), context,
	3002	(int)(e - s + 1), s + 1);
	3003	return NULL;
	3004	}
	3005
	3006	if (SvUTF8(res)) { /* Don't accept malformed charname value */
	3007	const U8* first_bad_char_loc;
	3008	STRLEN len;
	3009	const char* const str = SvPV_const(res, len);
	3010	if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
	3011	&first_bad_char_loc)))
	3012	{
	3013	_force_out_malformed_utf8_message(first_bad_char_loc,
	3014	(U8 *) PL_parser->bufend,
	3015	0,
	3016	0 /* 0 means don't die */ );
	3017	/* diag_listed_as: Malformed UTF-8 returned by \N{%s}
	3018	immediately after '%s' */
	3019	*error_msg = Perl_form(aTHX_
	3020	"Malformed UTF-8 returned by %.s immediately after '%.s'",
	3021	(int) context_len, context,
	3022	(int) ((char *) first_bad_char_loc - str), str);
	3023	return NULL;
	3024	}
	3025	}
	3026
	3027	return res;
	3028
	3029	bad_charname: {
	3030
	3031	/* The final %.*s makes sure that should the trailing NUL be missing
	3032	* that this print won't run off the end of the string */
	3033	/* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
	3034	in \N{%s} */
	3035	*error_msg = Perl_form(aTHX_
	3036	"Invalid character in \\N{...}; marked by <-- HERE in %.s<-- HERE %.s",
	3037	(int)(s - context + 1), context,
	3038	(int)(e - s + 1), s + 1);
	3039	return NULL;
	3040	}
	3041
	3042	multi_spaces:
	3043	/* diag_listed_as: charnames alias definitions may not contain a
	3044	sequence of multiple spaces; marked by <-- HERE
	3045	in %s */
	3046	*error_msg = Perl_form(aTHX_
	3047	"charnames alias definitions may not contain a sequence of "
	3048	"multiple spaces; marked by <-- HERE in %.s<-- HERE %.s",
	3049	(int)(s - context + 1), context,
	3050	(int)(e - s + 1), s + 1);
	3051	return NULL;
	3052	}
	3053
	3054	/*
	3055	scan_const
	3056
	3057	Extracts the next constant part of a pattern, double-quoted string,
	3058	or transliteration. This is terrifying code.
	3059
	3060	For example, in parsing the double-quoted string "ab\x63$d", it would
	3061	stop at the '$' and return an OP_CONST containing 'abc'.
	3062
	3063	It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
	3064	processing a pattern (PL_lex_inpat is true), a transliteration
	3065	(PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
	3066
	3067	Returns a pointer to the character scanned up to. If this is
	3068	advanced from the start pointer supplied (i.e. if anything was
	3069	successfully parsed), will leave an OP_CONST for the substring scanned
	3070	in pl_yylval. Caller must intuit reason for not parsing further
	3071	by looking at the next characters herself.
	3072
	3073	In patterns:
	3074	expand:
	3075	\N{FOO} => \N{U+hex_for_character_FOO}
	3076	(if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
	3077
	3078	pass through:
	3079	all other \-char, including \N and \N{ apart from \N{ABC}
	3080
	3081	stops on:
	3082	@ and $ where it appears to be a var, but not for $ as tail anchor
	3083	\l \L \u \U \Q \E
	3084	(?{ or (??{ or (*{
	3085
	3086	In transliterations:
	3087	characters are VERY literal, except for - not at the start or end
	3088	of the string, which indicates a range. However some backslash sequences
	3089	are recognized: \r, \n, and the like
	3090	\007 \o{}, \x{}, \N{}
	3091	If all elements in the transliteration are below 256,
	3092	scan_const expands the range to the full set of intermediate
	3093	characters. If the range is in utf8, the hyphen is replaced with
	3094	a certain range mark which will be handled by pmtrans() in op.c.
	3095
	3096	In double-quoted strings:
	3097	backslashes:
	3098	all those recognized in transliterations
	3099	deprecated backrefs: \1 (in substitution replacements)
	3100	case and quoting: \U \Q \E
	3101	stops on @ and $
	3102
	3103	scan_const does not construct ops to handle interpolated strings.
	3104	It stops processing as soon as it finds an embedded $ or @ variable
	3105	and leaves it to the caller to work out what's going on.
	3106
	3107	embedded arrays (whether in pattern or not) could be:
	3108	@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
	3109
	3110	$ in double-quoted strings must be the symbol of an embedded scalar.
	3111
	3112	$ in pattern could be $foo or could be tail anchor. Assumption:
	3113	it's a tail anchor if $ is the last thing in the string, or if it's
	3114	followed by one of "()\| \r\n\t"
	3115
	3116	\1 (backreferences) are turned into $1 in substitutions
	3117
	3118	The structure of the code is
	3119	while (there's a character to process) {
	3120	handle transliteration ranges
	3121	skip regexp comments /(?#comment)/ and codes /(?{code})/ ((*{code})/
	3122	skip #-initiated comments in //x patterns
	3123	check for embedded arrays
	3124	check for embedded scalars
	3125	if (backslash) {
	3126	deprecate \1 in substitution replacements
	3127	handle string-changing backslashes \l \U \Q \E, etc.
	3128	switch (what was escaped) {
	3129	handle \- in a transliteration (becomes a literal -)
	3130	if a pattern and not \N{, go treat as regular character
	3131	handle \132 (octal characters)
	3132	handle \x15 and \x{1234} (hex characters)
	3133	handle \N{name} (named characters, also \N{3,5} in a pattern)
	3134	handle \cV (control characters)
	3135	handle printf-style backslashes (\f, \r, \n, etc)
	3136	} (end switch)
	3137	continue
	3138	} (end if backslash)
	3139	handle regular character
	3140	} (end while character to read)
	3141
	3142	*/
	3143
	3144	STATIC char *
	3145	S_scan_const(pTHX_ char *start)
	3146	{
	3147	const char * const send = PL_bufend;/* end of the constant */
	3148	SV sv = newSV(send - start); / sv for the constant. See note below
	3149	on sizing. */
	3150	char s = start; / start of the constant */
	3151	char d = SvPVX(sv); / destination for copies */
	3152	bool dorange = FALSE; /* are we in a translit range? */
	3153	bool didrange = FALSE; /* did we just finish a range? */
	3154	bool in_charclass = FALSE; /* within /[...]/ */
	3155	const bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be
	3156	UTF8? But, this can show as true
	3157	when the source isn't utf8, as for
	3158	example when it is entirely composed
	3159	of hex constants */
	3160	bool d_is_utf8 = FALSE; /* Output constant is UTF8 */
	3161	STRLEN utf8_variant_count = 0; /* When not in UTF-8, this counts the
	3162	number of characters found so far
	3163	that will expand (into 2 bytes)
	3164	should we have to convert to
	3165	UTF-8) */
	3166	SV res; / result from charnames */
	3167	STRLEN offset_to_max = 0; /* The offset in the output to where the range
	3168	high-end character is temporarily placed */
	3169
	3170	/* Does something require special handling in tr/// ? This avoids extra
	3171	* work in a less likely case. As such, khw didn't feel it was worth
	3172	* adding any branches to the more mainline code to handle this, which
	3173	* means that this doesn't get set in some circumstances when things like
	3174	* \x{100} get expanded out. As a result there needs to be extra testing
	3175	* done in the tr code */
	3176	bool has_above_latin1 = FALSE;
	3177
	3178	/* Note on sizing: The scanned constant is placed into sv, which is
	3179	* initialized by newSV() assuming one byte of output for every byte of
	3180	* input. This routine expects newSV() to allocate an extra byte for a
	3181	* trailing NUL, which this routine will append if it gets to the end of
	3182	* the input. There may be more bytes of input than output (eg., \N{LATIN
	3183	* CAPITAL LETTER A}), or more output than input if the constant ends up
	3184	* recoded to utf8, but each time a construct is found that might increase
	3185	* the needed size, SvGROW() is called. Its size parameter each time is
	3186	* based on the best guess estimate at the time, namely the length used so
	3187	* far, plus the length the current construct will occupy, plus room for
	3188	* the trailing NUL, plus one byte for every input byte still unscanned */
	3189
	3190	UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
	3191	before set */
	3192	#ifdef EBCDIC
	3193	int backslash_N = 0; /* ? was the character from \N{} */
	3194	int non_portable_endpoint = 0; /* ? In a range is an endpoint
	3195	platform-specific like \x65 */
	3196	#endif
	3197
	3198	PERL_ARGS_ASSERT_SCAN_CONST;
	3199
	3200	assert(PL_lex_inwhat != OP_TRANSR);
	3201
	3202	/* Protect sv from errors and fatal warnings. */
	3203	ENTER_with_name("scan_const");
	3204	SAVEFREESV(sv);
	3205
	3206	/* A bunch of code in the loop below assumes that if s[n] exists and is not
	3207	* NUL, then s[n+1] exists. This assertion makes sure that assumption is
	3208	* valid */
	3209	assert(*send == '\0');
	3210
	3211	while (s < send
	3212	\|\| dorange /* Handle tr/// range at right edge of input */
	3213	) {
	3214
	3215	/* get transliterations out of the way (they're most literal) */
	3216	if (PL_lex_inwhat == OP_TRANS) {
	3217
	3218	/* But there isn't any special handling necessary unless there is a
	3219	* range, so for most cases we just drop down and handle the value
	3220	* as any other. There are two exceptions.
	3221	*
	3222	* 1. A hyphen indicates that we are actually going to have a
	3223	* range. In this case, skip the '-', set a flag, then drop
	3224	* down to handle what should be the end range value.
	3225	* 2. After we've handled that value, the next time through, that
	3226	* flag is set and we fix up the range.
	3227	*
	3228	* Ranges entirely within Latin1 are expanded out entirely, in
	3229	* order to make the transliteration a simple table look-up.
	3230	* Ranges that extend above Latin1 have to be done differently, so
	3231	* there is no advantage to expanding them here, so they are
	3232	* stored here as Min, RANGE_INDICATOR, Max. 'RANGE_INDICATOR' is
	3233	* a byte that can't occur in legal UTF-8, and hence can signify a
	3234	* hyphen without any possible ambiguity. On EBCDIC machines, if
	3235	* the range is expressed as Unicode, the Latin1 portion is
	3236	* expanded out even if the range extends above Latin1. This is
	3237	* because each code point in it has to be processed here
	3238	* individually to get its native translation */
	3239
	3240	if (! dorange) {
	3241
	3242	/* Here, we don't think we're in a range. If the new character
	3243	* is not a hyphen; or if it is a hyphen, but it's too close to
	3244	* either edge to indicate a range, or if we haven't output any
	3245	* characters yet then it's a regular character. */
	3246	if (*s != '-' \|\| s >= send - 1 \|\| s == start \|\| d == SvPVX(sv))
	3247	{
	3248
	3249	/* A regular character. Process like any other, but first
	3250	* clear any flags */
	3251	didrange = FALSE;
	3252	dorange = FALSE;
	3253	#ifdef EBCDIC
	3254	non_portable_endpoint = 0;
	3255	backslash_N = 0;
	3256	#endif
	3257	/* The tests here for being above Latin1 and similar ones
	3258	* in the following 'else' suffice to find all such
	3259	* occurences in the constant, except those added by a
	3260	* backslash escape sequence, like \x{100}. Mostly, those
	3261	* set 'has_above_latin1' as appropriate */
	3262	if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
	3263	has_above_latin1 = TRUE;
	3264	}
	3265
	3266	/* Drops down to generic code to process current byte */
	3267	}
	3268	else { /* Is a '-' in the context where it means a range */
	3269	if (didrange) { /* Something like y/A-C-Z// */
	3270	Perl_croak(aTHX_ "Ambiguous range in transliteration"
	3271	" operator");
	3272	}
	3273
	3274	dorange = TRUE;
	3275
	3276	s++; /* Skip past the hyphen */
	3277
	3278	/* d now points to where the end-range character will be
	3279	* placed. Drop down to get that character. We'll finish
	3280	* processing the range the next time through the loop */
	3281
	3282	if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
	3283	has_above_latin1 = TRUE;
	3284	}
	3285
	3286	/* Drops down to generic code to process current byte */
	3287	}
	3288	} /* End of not a range */
	3289	else {
	3290	/* Here we have parsed a range. Now must handle it. At this
	3291	* point:
	3292	* 'sv' is a SV* that contains the output string we are
	3293	* constructing. The final two characters in that string
	3294	* are the range start and range end, in order.
	3295	* 'd' points to just beyond the range end in the 'sv' string,
	3296	* where we would next place something
	3297	*/
	3298	char * max_ptr;
	3299	char * min_ptr;
	3300	IV range_min;
	3301	IV range_max; /* last character in range */
	3302	STRLEN grow;
	3303	Size_t offset_to_min = 0;
	3304	Size_t extras = 0;
	3305	#ifdef EBCDIC
	3306	bool convert_unicode;
	3307	IV real_range_max = 0;
	3308	#endif
	3309	/* Get the code point values of the range ends. */
	3310	max_ptr = (d_is_utf8) ? (char ) utf8_hop( (U8) d, -1) : d - 1;
	3311	offset_to_max = max_ptr - SvPVX_const(sv);
	3312	if (d_is_utf8) {
	3313	/* We know the utf8 is valid, because we just constructed
	3314	* it ourselves in previous loop iterations */
	3315	min_ptr = (char) utf8_hop( (U8) max_ptr, -1);
	3316	range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
	3317	range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
	3318
	3319	/* This compensates for not all code setting
	3320	* 'has_above_latin1', so that we don't skip stuff that
	3321	* should be executed */
	3322	if (range_max > 255) {
	3323	has_above_latin1 = TRUE;
	3324	}
	3325	}
	3326	else {
	3327	min_ptr = max_ptr - 1;
	3328	range_min = * (U8*) min_ptr;
	3329	range_max = * (U8*) max_ptr;
	3330	}
	3331
	3332	/* If the range is just a single code point, like tr/a-a/.../,
	3333	* that code point is already in the output, twice. We can
	3334	* just back up over the second instance and avoid all the rest
	3335	* of the work. But if it is a variant character, it's been
	3336	* counted twice, so decrement. (This unlikely scenario is
	3337	* special cased, like the one for a range of 2 code points
	3338	* below, only because the main-line code below needs a range
	3339	* of 3 or more to work without special casing. Might as well
	3340	* get it out of the way now.) */
	3341	if (UNLIKELY(range_max == range_min)) {
	3342	d = max_ptr;
	3343	if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
	3344	utf8_variant_count--;
	3345	}
	3346	goto range_done;
	3347	}
	3348
	3349	#ifdef EBCDIC
	3350	/* On EBCDIC platforms, we may have to deal with portable
	3351	* ranges. These happen if at least one range endpoint is a
	3352	* Unicode value (\N{...}), or if the range is a subset of
	3353	* [A-Z] or [a-z], and both ends are literal characters,
	3354	* like 'A', and not like \x{C1} */
	3355	convert_unicode =
	3356	cBOOL(backslash_N) /* \N{} forces Unicode,
	3357	hence portable range */
	3358	\|\| ( ! non_portable_endpoint
	3359	&& (( isLOWER_A(range_min) && isLOWER_A(range_max))
	3360	\|\| (isUPPER_A(range_min) && isUPPER_A(range_max))));
	3361	if (convert_unicode) {
	3362
	3363	/* Special handling is needed for these portable ranges.
	3364	* They are defined to be in Unicode terms, which includes
	3365	* all the Unicode code points between the end points.
	3366	* Convert to Unicode to get the Unicode range. Later we
	3367	* will convert each code point in the range back to
	3368	* native. */
	3369	range_min = NATIVE_TO_UNI(range_min);
	3370	range_max = NATIVE_TO_UNI(range_max);
	3371	}
	3372	#endif
	3373
	3374	if (range_min > range_max) {
	3375	#ifdef EBCDIC
	3376	if (convert_unicode) {
	3377	/* Need to convert back to native for meaningful
	3378	* messages for this platform */
	3379	range_min = UNI_TO_NATIVE(range_min);
	3380	range_max = UNI_TO_NATIVE(range_max);
	3381	}
	3382	#endif
	3383	/* Use the characters themselves for the error message if
	3384	* ASCII printables; otherwise some visible representation
	3385	* of them */
	3386	if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
	3387	Perl_croak(aTHX_
	3388	"Invalid range \"%c-%c\" in transliteration operator",
	3389	(char)range_min, (char)range_max);
	3390	}
	3391	#ifdef EBCDIC
	3392	else if (convert_unicode) {
	3393	/* diag_listed_as: Invalid range "%s" in transliteration operator */
	3394	Perl_croak(aTHX_
	3395	"Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
	3396	UVXf "}\" in transliteration operator",
	3397	range_min, range_max);
	3398	}
	3399	#endif
	3400	else {
	3401	/* diag_listed_as: Invalid range "%s" in transliteration operator */
	3402	Perl_croak(aTHX_
	3403	"Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
	3404	" in transliteration operator",
	3405	range_min, range_max);
	3406	}
	3407	}
	3408
	3409	/* If the range is exactly two code points long, they are
	3410	* already both in the output */
	3411	if (UNLIKELY(range_min + 1 == range_max)) {
	3412	goto range_done;
	3413	}
	3414
	3415	/* Here the range contains at least 3 code points */
	3416
	3417	if (d_is_utf8) {
	3418
	3419	/* If everything in the transliteration is below 256, we
	3420	* can avoid special handling later. A translation table
	3421	* for each of those bytes is created by op.c. So we
	3422	* expand out all ranges to their constituent code points.
	3423	* But if we've encountered something above 255, the
	3424	* expanding won't help, so skip doing that. But if it's
	3425	* EBCDIC, we may have to look at each character below 256
	3426	* if we have to convert to/from Unicode values */
	3427	if ( has_above_latin1
	3428	#ifdef EBCDIC
	3429	&& (range_min > 255 \|\| ! convert_unicode)
	3430	#endif
	3431	) {
	3432	const STRLEN off = d - SvPVX(sv);
	3433	const STRLEN extra = 1 + (send - s) + 1;
	3434	char *e;
	3435
	3436	/* Move the high character one byte to the right; then
	3437	* insert between it and the range begin, an illegal
	3438	* byte which serves to indicate this is a range (using
	3439	* a '-' would be ambiguous). */
	3440
	3441	if (off + extra > SvLEN(sv)) {
	3442	d = off + SvGROW(sv, off + extra);
	3443	max_ptr = d - off + offset_to_max;
	3444	}
	3445
	3446	e = d++;
	3447	while (e-- > max_ptr) {
	3448	(e + 1) = e;
	3449	}
	3450	*(e + 1) = (char) RANGE_INDICATOR;
	3451	goto range_done;
	3452	}
	3453
	3454	/* Here, we're going to expand out the range. For EBCDIC
	3455	* the range can extend above 255 (not so in ASCII), so
	3456	* for EBCDIC, split it into the parts above and below
	3457	* 255/256 */
	3458	#ifdef EBCDIC
	3459	if (range_max > 255) {
	3460	real_range_max = range_max;
	3461	range_max = 255;
	3462	}
	3463	#endif
	3464	}
	3465
	3466	/* Here we need to expand out the string to contain each
	3467	* character in the range. Grow the output to handle this.
	3468	* For non-UTF8, we need a byte for each code point in the
	3469	* range, minus the three that we've already allocated for: the
	3470	* hyphen, the min, and the max. For UTF-8, we need this
	3471	* plus an extra byte for each code point that occupies two
	3472	* bytes (is variant) when in UTF-8 (except we've already
	3473	* allocated for the end points, including if they are
	3474	* variants). For ASCII platforms and Unicode ranges on EBCDIC
	3475	* platforms, it's easy to calculate a precise number. To
	3476	* start, we count the variants in the range, which we need
	3477	* elsewhere in this function anyway. (For the case where it
	3478	* isn't easy to calculate, 'extras' has been initialized to 0,
	3479	* and the calculation is done in a loop further down.) */
	3480	#ifdef EBCDIC
	3481	if (convert_unicode)
	3482	#endif
	3483	{
	3484	/* This is executed unconditionally on ASCII, and for
	3485	* Unicode ranges on EBCDIC. Under these conditions, all
	3486	* code points above a certain value are variant; and none
	3487	* under that value are. We just need to find out how much
	3488	* of the range is above that value. We don't count the
	3489	* end points here, as they will already have been counted
	3490	* as they were parsed. */
	3491	if (range_min >= UTF_CONTINUATION_MARK) {
	3492
	3493	/* The whole range is made up of variants */
	3494	extras = (range_max - 1) - (range_min + 1) + 1;
	3495	}
	3496	else if (range_max >= UTF_CONTINUATION_MARK) {
	3497
	3498	/* Only the higher portion of the range is variants */
	3499	extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
	3500	}
	3501
	3502	utf8_variant_count += extras;
	3503	}
	3504
	3505	/* The base growth is the number of code points in the range,
	3506	* not including the endpoints, which have already been sized
	3507	* for (and output). We don't subtract for the hyphen, as it
	3508	* has been parsed but not output, and the SvGROW below is
	3509	* based only on what's been output plus what's left to parse.
	3510	* */
	3511	grow = (range_max - 1) - (range_min + 1) + 1;
	3512
	3513	if (d_is_utf8) {
	3514	#ifdef EBCDIC
	3515	/* In some cases in EBCDIC, we haven't yet calculated a
	3516	* precise amount needed for the UTF-8 variants. Just
	3517	* assume the worst case, that everything will expand by a
	3518	* byte */
	3519	if (! convert_unicode) {
	3520	grow *= 2;
	3521	}
	3522	else
	3523	#endif
	3524	{
	3525	/* Otherwise we know exactly how many variants there
	3526	* are in the range. */
	3527	grow += extras;
	3528	}
	3529	}
	3530
	3531	/* Grow, but position the output to overwrite the range min end
	3532	* point, because in some cases we overwrite that */
	3533	SvCUR_set(sv, d - SvPVX_const(sv));
	3534	offset_to_min = min_ptr - SvPVX_const(sv);
	3535
	3536	/* See Note on sizing above. */
	3537	d = offset_to_min + SvGROW(sv, SvCUR(sv)
	3538	+ (send - s)
	3539	+ grow
	3540	+ 1 /* Trailing NUL */ );
	3541
	3542	/* Now, we can expand out the range. */
	3543	#ifdef EBCDIC
	3544	if (convert_unicode) {
	3545	SSize_t i;
	3546
	3547	/* Recall that the min and max are now in Unicode terms, so
	3548	* we have to convert each character to its native
	3549	* equivalent */
	3550	if (d_is_utf8) {
	3551	for (i = range_min; i <= range_max; i++) {
	3552	append_utf8_from_native_byte(
	3553	LATIN1_TO_NATIVE((U8) i),
	3554	(U8 **) &d);
	3555	}
	3556	}
	3557	else {
	3558	for (i = range_min; i <= range_max; i++) {
	3559	*d++ = (char)LATIN1_TO_NATIVE((U8) i);
	3560	}
	3561	}
	3562	}
	3563	else
	3564	#endif
	3565	/* Always gets run for ASCII, and sometimes for EBCDIC. */
	3566	{
	3567	/* Here, no conversions are necessary, which means that the
	3568	* first character in the range is already in 'd' and
	3569	* valid, so we can skip overwriting it */
	3570	if (d_is_utf8) {
	3571	SSize_t i;
	3572	d += UTF8SKIP(d);
	3573	for (i = range_min + 1; i <= range_max; i++) {
	3574	append_utf8_from_native_byte((U8) i, (U8 **) &d);
	3575	}
	3576	}
	3577	else {
	3578	SSize_t i;
	3579	d++;
	3580	assert(range_min + 1 <= range_max);
	3581	for (i = range_min + 1; i < range_max; i++) {
	3582	#ifdef EBCDIC
	3583	/* In this case on EBCDIC, we haven't calculated
	3584	* the variants. Do it here, as we go along */
	3585	if (! UVCHR_IS_INVARIANT(i)) {
	3586	utf8_variant_count++;
	3587	}
	3588	#endif
	3589	*d++ = (char)i;
	3590	}
	3591
	3592	/* The range_max is done outside the loop so as to
	3593	* avoid having to special case not incrementing
	3594	* 'utf8_variant_count' on EBCDIC (it's already been
	3595	* counted when originally parsed) */
	3596	*d++ = (char) range_max;
	3597	}
	3598	}
	3599
	3600	#ifdef EBCDIC
	3601	/* If the original range extended above 255, add in that
	3602	* portion. */
	3603	if (real_range_max) {
	3604	*d++ = (char) UTF8_TWO_BYTE_HI(0x100);
	3605	*d++ = (char) UTF8_TWO_BYTE_LO(0x100);
	3606	if (real_range_max > 0x100) {
	3607	if (real_range_max > 0x101) {
	3608	*d++ = (char) RANGE_INDICATOR;
	3609	}
	3610	d = (char)uvchr_to_utf8((U8)d, real_range_max);
	3611	}
	3612	}
	3613	#endif
	3614
	3615	range_done:
	3616	/* mark the range as done, and continue */
	3617	didrange = TRUE;
	3618	dorange = FALSE;
	3619	#ifdef EBCDIC
	3620	non_portable_endpoint = 0;
	3621	backslash_N = 0;
	3622	#endif
	3623	continue;
	3624	} /* End of is a range */
	3625	} /* End of transliteration. Joins main code after these else's */
	3626	else if (*s == '[' && PL_lex_inpat && !in_charclass) {
	3627	char *s1 = s-1;
	3628	int esc = 0;
	3629	while (s1 >= start && *s1-- == '\\')
	3630	esc = !esc;
	3631	if (!esc)
	3632	in_charclass = TRUE;
	3633	}
	3634	else if (*s == ']' && PL_lex_inpat && in_charclass) {
	3635	char *s1 = s-1;
	3636	int esc = 0;
	3637	while (s1 >= start && *s1-- == '\\')
	3638	esc = !esc;
	3639	if (!esc)
	3640	in_charclass = FALSE;
	3641	}
	3642	/* skip for regexp comments /(?#comment)/, except for the last
	3643	* char, which will be done separately. Stop on (?{..}) and
	3644	* friends (??{ ... }) or ({ ... }) /
	3645	else if (s == '(' && PL_lex_inpat && (s[1] == '?' \|\| s[1] == '') && !in_charclass) {
	3646	if (s[1] == '?' && s[2] == '#') {
	3647	if (s_is_utf8) {
	3648	PERL_UINT_FAST8_T len = UTF8SKIP(s);
	3649
	3650	while (s + len < send && *s != ')') {
	3651	Copy(s, d, len, U8);
	3652	d += len;
	3653	s += len;
	3654	len = UTF8_SAFE_SKIP(s, send);
	3655	}
	3656	}
	3657	else while (s+1 < send && *s != ')') {
	3658	d++ = s++;
	3659	}
	3660	}
	3661	else
	3662	if (!PL_lex_casemods &&
	3663	/* The following should match regcomp.c */
	3664	((s[1] == '?' && (s[2] == '{' /* (?{ ... }) */
	3665	\|\| (s[2] == '?' && s[3] == '{'))) \|\| /* (??{ ... }) */
	3666	(s[1] == '' && (s[2] == '{' ))) / ({ ... }) /
	3667	){
	3668	break;
	3669	}
	3670	}
	3671	/* likewise skip #-initiated comments in //x patterns */
	3672	else if (*s == '#'
	3673	&& PL_lex_inpat
	3674	&& !in_charclass
	3675	&& ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
	3676	{
	3677	while (s < send && *s != '\n')
	3678	d++ = s++;
	3679	}
	3680	/* no further processing of single-quoted regex */
	3681	else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
	3682	goto default_action;
	3683
	3684	/* check for embedded arrays
	3685	* (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
	3686	*/
	3687	else if (*s == '@' && s[1]) {
	3688	if (UTF
	3689	? isIDFIRST_utf8_safe(s+1, send)
	3690	: isWORDCHAR_A(s[1]))
	3691	{
	3692	break;
	3693	}
	3694	if (memCHRs(":'{$", s[1]))
	3695	break;
	3696	if (!PL_lex_inpat && (s[1] == '+' \|\| s[1] == '-'))
	3697	break; /* in regexp, neither @+ nor @- are interpolated */
	3698	}
	3699	/* check for embedded scalars. only stop if we're sure it's a
	3700	* variable. */
	3701	else if (*s == '$') {
	3702	if (!PL_lex_inpat) /* not a regexp, so $ must be var */
	3703	break;
	3704	if (s + 1 < send && !memCHRs("()\| \r\n\t", s[1])) {
	3705	if (s[1] == '\\') {
	3706	Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
	3707	"Possible unintended interpolation of $\\ in regex");
	3708	}
	3709	break; /* in regexp, $ might be tail anchor */
	3710	}
	3711	}
	3712
	3713	/* End of else if chain - OP_TRANS rejoin rest */
	3714
	3715	if (UNLIKELY(s >= send)) {
	3716	assert(s == send);
	3717	break;
	3718	}
	3719
	3720	/* backslashes */
	3721	if (*s == '\\' && s+1 < send) {
	3722	char* bslash = s; /* point to beginning \ */
	3723	char* rbrace; /* point to ending '}' */
	3724	char* e; /* 1 past the meat (non-blanks) before the
	3725	brace */
	3726	s++;
	3727
	3728	/* warn on \1 - \9 in substitution replacements, but note that \11
	3729	* is an octal; and \19 is \1 followed by '9' */
	3730	if (PL_lex_inwhat == OP_SUBST
	3731	&& !PL_lex_inpat
	3732	&& isDIGIT(*s)
	3733	&& *s != '0'
	3734	&& !isDIGIT(s[1]))
	3735	{
	3736	/* diag_listed_as: \%d better written as $%d */
	3737	Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", s, s);
	3738	s = bslash;
	3739	*s = '$';
	3740	break;
	3741	}
	3742
	3743	/* string-change backslash escapes */
	3744	if (PL_lex_inwhat != OP_TRANS && s && memCHRs("lLuUEQF", s)) {
	3745	s = bslash;
	3746	break;
	3747	}
	3748	/* In a pattern, process \N, but skip any other backslash escapes.
	3749	* This is because we don't want to translate an escape sequence
	3750	* into a meta symbol and have the regex compiler use the meta
	3751	* symbol meaning, e.g. \x{2E} would be confused with a dot. But
	3752	* in spite of this, we do have to process \N here while the proper
	3753	* charnames handler is in scope. See bugs #56444 and #62056.
	3754	*
	3755	* There is a complication because \N in a pattern may also stand
	3756	* for 'match a non-nl', and not mean a charname, in which case its
	3757	* processing should be deferred to the regex compiler. To be a
	3758	* charname it must be followed immediately by a '{', and not look
	3759	* like \N followed by a curly quantifier, i.e., not something like
	3760	* \N{3,}. regcurly returns a boolean indicating if it is a legal
	3761	* quantifier */
	3762	else if (PL_lex_inpat
	3763	&& (*s != 'N'
	3764	\|\| s[1] != '{'
	3765	\|\| regcurly(s + 1, send, NULL)))
	3766	{
	3767	*d++ = '\\';
	3768	goto default_action;
	3769	}
	3770
	3771	switch (*s) {
	3772	default:
	3773	{
	3774	if ((isALPHANUMERIC(*s)))
	3775	Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
	3776	"Unrecognized escape \\%c passed through",
	3777	*s);
	3778	/* default action is to copy the quoted character */
	3779	goto default_action;
	3780	}
	3781
	3782	/* eg. \132 indicates the octal constant 0132 */
	3783	case '0': case '1': case '2': case '3':
	3784	case '4': case '5': case '6': case '7':
	3785	{
	3786	I32 flags = PERL_SCAN_SILENT_ILLDIGIT
	3787	\| PERL_SCAN_NOTIFY_ILLDIGIT;
	3788	STRLEN len = 3;
	3789	uv = grok_oct(s, &len, &flags, NULL);
	3790	s += len;
	3791	if ( (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
	3792	&& s < send
	3793	&& isDIGIT(s) / like \08, \178 */
	3794	&& ckWARN(WARN_MISC))
	3795	{
	3796	Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
	3797	form_alien_digit_msg(8, len, s, send, UTF, FALSE));
	3798	}
	3799	}
	3800	goto NUM_ESCAPE_INSERT;
	3801
	3802	/* eg. \o{24} indicates the octal constant \024 */
	3803	case 'o':
	3804	{
	3805	const char* error;
	3806
	3807	if (! grok_bslash_o(&s, send,
	3808	&uv, &error,
	3809	NULL,
	3810	FALSE, /* Not strict */
	3811	FALSE, /* No illegal cp's */
	3812	UTF))
	3813	{
	3814	yyerror(error);
	3815	uv = 0; /* drop through to ensure range ends are set */
	3816	}
	3817	goto NUM_ESCAPE_INSERT;
	3818	}
	3819
	3820	/* eg. \x24 indicates the hex constant 0x24 */
	3821	case 'x':
	3822	{
	3823	const char* error;
	3824
	3825	if (! grok_bslash_x(&s, send,
	3826	&uv, &error,
	3827	NULL,
	3828	FALSE, /* Not strict */
	3829	FALSE, /* No illegal cp's */
	3830	UTF))
	3831	{
	3832	yyerror(error);
	3833	uv = 0; /* drop through to ensure range ends are set */
	3834	}
	3835	}
	3836
	3837	NUM_ESCAPE_INSERT:
	3838	/* Insert oct or hex escaped character. */
	3839
	3840	/* Here uv is the ordinal of the next character being added */
	3841	if (UVCHR_IS_INVARIANT(uv)) {
	3842	*d++ = (char) uv;
	3843	}
	3844	else {
	3845	if (!d_is_utf8 && uv > 255) {
	3846
	3847	/* Here, 'uv' won't fit unless we convert to UTF-8.
	3848	* If we've only seen invariants so far, all we have to
	3849	* do is turn on the flag */
	3850	if (utf8_variant_count == 0) {
	3851	SvUTF8_on(sv);
	3852	}
	3853	else {
	3854	SvCUR_set(sv, d - SvPVX_const(sv));
	3855	SvPOK_on(sv);
	3856	*d = '\0';
	3857
	3858	sv_utf8_upgrade_flags_grow(
	3859	sv,
	3860	SV_GMAGIC\|SV_FORCE_UTF8_UPGRADE,
	3861
	3862	/* Since we're having to grow here,
	3863	* make sure we have enough room for
	3864	* this escape and a NUL, so the
	3865	* code immediately below won't have
	3866	* to actually grow again */
	3867	UVCHR_SKIP(uv)
	3868	+ (STRLEN)(send - s) + 1);
	3869	d = SvPVX(sv) + SvCUR(sv);
	3870	}
	3871
	3872	has_above_latin1 = TRUE;
	3873	d_is_utf8 = TRUE;
	3874	}
	3875
	3876	if (! d_is_utf8) {
	3877	*d++ = (char)uv;
	3878	utf8_variant_count++;
	3879	}
	3880	else {
	3881	/* Usually, there will already be enough room in 'sv'
	3882	* since such escapes are likely longer than any UTF-8
	3883	* sequence they can end up as. This isn't the case on
	3884	* EBCDIC where \x{40000000} contains 12 bytes, and the
	3885	* UTF-8 for it contains 14. And, we have to allow for
	3886	* a trailing NUL. It probably can't happen on ASCII
	3887	* platforms, but be safe. See Note on sizing above. */
	3888	const STRLEN needed = d - SvPVX(sv)
	3889	+ UVCHR_SKIP(uv)
	3890	+ (send - s)
	3891	+ 1;
	3892	if (UNLIKELY(needed > SvLEN(sv))) {
	3893	SvCUR_set(sv, d - SvPVX_const(sv));
	3894	d = SvCUR(sv) + SvGROW(sv, needed);
	3895	}
	3896
	3897	d = (char) uvchr_to_utf8_flags((U8)d, uv,
	3898	(ckWARN(WARN_PORTABLE))
	3899	? UNICODE_WARN_PERL_EXTENDED
	3900	: 0);
	3901	}
	3902	}
	3903	#ifdef EBCDIC
	3904	non_portable_endpoint++;
	3905	#endif
	3906	continue;
	3907
	3908	case 'N':
	3909	/* In a non-pattern \N must be like \N{U+0041}, or it can be a
	3910	* named character, like \N{LATIN SMALL LETTER A}, or a named
	3911	* sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
	3912	* GRAVE} (except y/// can't handle the latter, croaking). For
	3913	* convenience all three forms are referred to as "named
	3914	* characters" below.
	3915	*
	3916	* For patterns, \N also can mean to match a non-newline. Code
	3917	* before this 'switch' statement should already have handled
	3918	* this situation, and hence this code only has to deal with
	3919	* the named character cases.
	3920	*
	3921	* For non-patterns, the named characters are converted to
	3922	* their string equivalents. In patterns, named characters are
	3923	* not converted to their ultimate forms for the same reasons
	3924	* that other escapes aren't (mainly that the ultimate
	3925	* character could be considered a meta-symbol by the regex
	3926	* compiler). Instead, they are converted to the \N{U+...}
	3927	* form to get the value from the charnames that is in effect
	3928	* right now, while preserving the fact that it was a named
	3929	* character, so that the regex compiler knows this.
	3930	*
	3931	* The structure of this section of code (besides checking for
	3932	* errors and upgrading to utf8) is:
	3933	* If the named character is of the form \N{U+...}, pass it
	3934	* through if a pattern; otherwise convert the code point
	3935	* to utf8
	3936	* Otherwise must be some \N{NAME}: convert to
	3937	* \N{U+c1.c2...} if a pattern; otherwise convert to utf8
	3938	*
	3939	* Transliteration is an exception. The conversion to utf8 is
	3940	* only done if the code point requires it to be representable.
	3941	*
	3942	* Here, 's' points to the 'N'; the test below is guaranteed to
	3943	* succeed if we are being called on a pattern, as we already
	3944	* know from a test above that the next character is a '{'. A
	3945	* non-pattern \N must mean 'named character', which requires
	3946	* braces */
	3947	s++;
	3948	if (*s != '{') {
	3949	yyerror("Missing braces on \\N{}");
	3950	*d++ = '\0';
	3951	continue;
	3952	}
	3953	s++;
	3954
	3955	/* If there is no matching '}', it is an error. */
	3956	if (! (rbrace = (char *) memchr(s, '}', send - s))) {
	3957	if (! PL_lex_inpat) {
	3958	yyerror("Missing right brace on \\N{}");
	3959	} else {
	3960	yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
	3961	}
	3962	yyquit(); /* Have exhausted the input. */
	3963	}
	3964
	3965	/* Here it looks like a named character */
	3966	while (s < rbrace && isBLANK(*s)) {
	3967	s++;
	3968	}
	3969
	3970	e = rbrace;
	3971	while (s < e && isBLANK(*(e - 1))) {
	3972	e--;
	3973	}
	3974
	3975	if (s == 'U' && s[1] == '+') { / \N{U+...} */
	3976	s += 2; /* Skip to next char after the 'U+' */
	3977	if (PL_lex_inpat) {
	3978
	3979	/* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
	3980	/* Check the syntax. */
	3981	if (!isXDIGIT(*s)) {
	3982	bad_NU:
	3983	yyerror(
	3984	"Invalid hexadecimal number in \\N{U+...}"
	3985	);
	3986	s = rbrace + 1;
	3987	*d++ = '\0';
	3988	continue;
	3989	}
	3990	while (++s < e) {
	3991	if (isXDIGIT(*s))
	3992	continue;
	3993	else if ((s == '.' \|\| s == '_')
	3994	&& isXDIGIT(s[1]))
	3995	continue;
	3996	goto bad_NU;
	3997	}
	3998
	3999	/* Pass everything through unchanged.
	4000	* +1 is to include the '}' */
	4001	Copy(bslash, d, rbrace - bslash + 1, char);
	4002	d += rbrace - bslash + 1;
	4003	}
	4004	else { /* Not a pattern: convert the hex to string */
	4005	I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
	4006	\| PERL_SCAN_SILENT_ILLDIGIT
	4007	\| PERL_SCAN_SILENT_OVERFLOW
	4008	\| PERL_SCAN_DISALLOW_PREFIX;
	4009	STRLEN len = e - s;
	4010
	4011	uv = grok_hex(s, &len, &flags, NULL);
	4012	if (len == 0 \|\| (len != (STRLEN)(e - s)))
	4013	goto bad_NU;
	4014
	4015	if ( uv > MAX_LEGAL_CP
	4016	\|\| (flags & PERL_SCAN_GREATER_THAN_UV_MAX))
	4017	{
	4018	yyerror(form_cp_too_large_msg(16, s, len, 0));
	4019	uv = 0; /* drop through to ensure range ends are
	4020	set */
	4021	}
	4022
	4023	/* For non-tr///, if the destination is not in utf8,
	4024	* unconditionally recode it to be so. This is
	4025	* because \N{} implies Unicode semantics, and scalars
	4026	* have to be in utf8 to guarantee those semantics.
	4027	* tr/// doesn't care about Unicode rules, so no need
	4028	* there to upgrade to UTF-8 for small enough code
	4029	* points */
	4030	if (! d_is_utf8 && ( uv > 0xFF
	4031	\|\| PL_lex_inwhat != OP_TRANS))
	4032	{
	4033	/* See Note on sizing above. */
	4034	const STRLEN extra = OFFUNISKIP(uv) + (send - rbrace) + 1;
	4035
	4036	SvCUR_set(sv, d - SvPVX_const(sv));
	4037	SvPOK_on(sv);
	4038	*d = '\0';
	4039
	4040	if (utf8_variant_count == 0) {
	4041	SvUTF8_on(sv);
	4042	d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
	4043	}
	4044	else {
	4045	sv_utf8_upgrade_flags_grow(
	4046	sv,
	4047	SV_GMAGIC\|SV_FORCE_UTF8_UPGRADE,
	4048	extra);
	4049	d = SvPVX(sv) + SvCUR(sv);
	4050	}
	4051
	4052	d_is_utf8 = TRUE;
	4053	has_above_latin1 = TRUE;
	4054	}
	4055
	4056	/* Add the (Unicode) code point to the output. */
	4057	if (OFFUNI_IS_INVARIANT(uv)) {
	4058	*d++ = (char) LATIN1_TO_NATIVE(uv);
	4059	}
	4060	else if (! d_is_utf8) {
	4061	*d++ = (char) LATIN1_TO_NATIVE(uv);
	4062	utf8_variant_count++;
	4063	}
	4064	else {
	4065	d = (char) uvoffuni_to_utf8_flags((U8)d, uv,
	4066	(ckWARN(WARN_PORTABLE))
	4067	? UNICODE_WARN_PERL_EXTENDED
	4068	: 0);
	4069	}
	4070	}
	4071	}
	4072	else /* Here is \N{NAME} but not \N{U+...}. */
	4073	if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
	4074	{ /* Failed. We should die eventually, but for now use a NUL
	4075	to keep parsing */
	4076	*d++ = '\0';
	4077	}
	4078	else { /* Successfully evaluated the name */
	4079	STRLEN len;
	4080	const char *str = SvPV_const(res, len);
	4081	if (PL_lex_inpat) {
	4082
	4083	if (! len) { /* The name resolved to an empty string */
	4084	const char empty_N[] = "\\N{_}";
	4085	Copy(empty_N, d, sizeof(empty_N) - 1, char);
	4086	d += sizeof(empty_N) - 1;
	4087	}
	4088	else {
	4089	/* In order to not lose information for the regex
	4090	* compiler, pass the result in the specially made
	4091	* syntax: \N{U+c1.c2.c3...}, where c1 etc. are
	4092	* the code points in hex of each character
	4093	* returned by charnames */
	4094
	4095	const char *str_end = str + len;
	4096	const STRLEN off = d - SvPVX_const(sv);
	4097
	4098	if (! SvUTF8(res)) {
	4099	/* For the non-UTF-8 case, we can determine the
	4100	* exact length needed without having to parse
	4101	* through the string. Each character takes up
	4102	* 2 hex digits plus either a trailing dot or
	4103	* the "}" */
	4104	const char initial_text[] = "\\N{U+";
	4105	const STRLEN initial_len = sizeof(initial_text)
	4106	- 1;
	4107	d = off + SvGROW(sv, off
	4108	+ 3 * len
	4109
	4110	/* +1 for trailing NUL */
	4111	+ initial_len + 1
	4112
	4113	+ (STRLEN)(send - rbrace));
	4114	Copy(initial_text, d, initial_len, char);
	4115	d += initial_len;
	4116	while (str < str_end) {
	4117	char hex_string[4];
	4118	int len =
	4119	my_snprintf(hex_string,
	4120	sizeof(hex_string),
	4121	"%02X.",
	4122
	4123	/* The regex compiler is
	4124	* expecting Unicode, not
	4125	* native */
	4126	NATIVE_TO_LATIN1(*str));
	4127	PERL_MY_SNPRINTF_POST_GUARD(len,
	4128	sizeof(hex_string));
	4129	Copy(hex_string, d, 3, char);
	4130	d += 3;
	4131	str++;
	4132	}
	4133	d--; /* Below, we will overwrite the final
	4134	dot with a right brace */
	4135	}
	4136	else {
	4137	STRLEN char_length; /* cur char's byte length */
	4138
	4139	/* and the number of bytes after this is
	4140	* translated into hex digits */
	4141	STRLEN output_length;
	4142
	4143	/* 2 hex per byte; 2 chars for '\N'; 2 chars
	4144	* for max('U+', '.'); and 1 for NUL */
	4145	char hex_string[2 * UTF8_MAXBYTES + 5];
	4146
	4147	/* Get the first character of the result. */
	4148	U32 uv = utf8n_to_uvchr((U8 *) str,
	4149	len,
	4150	&char_length,
	4151	UTF8_ALLOW_ANYUV);
	4152	/* Convert first code point to Unicode hex,
	4153	* including the boiler plate before it. */
	4154	output_length =
	4155	my_snprintf(hex_string, sizeof(hex_string),
	4156	"\\N{U+%X",
	4157	(unsigned int) NATIVE_TO_UNI(uv));
	4158
	4159	/* Make sure there is enough space to hold it */
	4160	d = off + SvGROW(sv, off
	4161	+ output_length
	4162	+ (STRLEN)(send - rbrace)
	4163	+ 2); /* '}' + NUL */
	4164	/* And output it */
	4165	Copy(hex_string, d, output_length, char);
	4166	d += output_length;
	4167
	4168	/* For each subsequent character, append dot and
	4169	* its Unicode code point in hex */
	4170	while ((str += char_length) < str_end) {
	4171	const STRLEN off = d - SvPVX_const(sv);
	4172	U32 uv = utf8n_to_uvchr((U8 *) str,
	4173	str_end - str,
	4174	&char_length,
	4175	UTF8_ALLOW_ANYUV);
	4176	output_length =
	4177	my_snprintf(hex_string,
	4178	sizeof(hex_string),
	4179	".%X",
	4180	(unsigned int) NATIVE_TO_UNI(uv));
	4181
	4182	d = off + SvGROW(sv, off
	4183	+ output_length
	4184	+ (STRLEN)(send - rbrace)
	4185	+ 2); /* '}' + NUL */
	4186	Copy(hex_string, d, output_length, char);
	4187	d += output_length;
	4188	}
	4189	}
	4190
	4191	d++ = '}'; / Done. Add the trailing brace */
	4192	}
	4193	}
	4194	else { /* Here, not in a pattern. Convert the name to a
	4195	* string. */
	4196
	4197	if (PL_lex_inwhat == OP_TRANS) {
	4198	str = SvPV_const(res, len);
	4199	if (len > ((SvUTF8(res))
	4200	? UTF8SKIP(str)
	4201	: 1U))
	4202	{
	4203	yyerror(Perl_form(aTHX_
	4204	"%.*s must not be a named sequence"
	4205	" in transliteration operator",
	4206	/* +1 to include the "}" */
	4207	(int) (rbrace + 1 - start), start));
	4208	*d++ = '\0';
	4209	goto end_backslash_N;
	4210	}
	4211
	4212	if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
	4213	has_above_latin1 = TRUE;
	4214	}
	4215
	4216	}
	4217	else if (! SvUTF8(res)) {
	4218	/* Make sure \N{} return is UTF-8. This is because
	4219	* \N{} implies Unicode semantics, and scalars have
	4220	* to be in utf8 to guarantee those semantics; but
	4221	* not needed in tr/// */
	4222	sv_utf8_upgrade_flags(res, 0);
	4223	str = SvPV_const(res, len);
	4224	}
	4225
	4226	/* Upgrade destination to be utf8 if this new
	4227	* component is */
	4228	if (! d_is_utf8 && SvUTF8(res)) {
	4229	/* See Note on sizing above. */
	4230	const STRLEN extra = len + (send - s) + 1;
	4231
	4232	SvCUR_set(sv, d - SvPVX_const(sv));
	4233	SvPOK_on(sv);
	4234	*d = '\0';
	4235
	4236	if (utf8_variant_count == 0) {
	4237	SvUTF8_on(sv);
	4238	d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
	4239	}
	4240	else {
	4241	sv_utf8_upgrade_flags_grow(sv,
	4242	SV_GMAGIC\|SV_FORCE_UTF8_UPGRADE,
	4243	extra);
	4244	d = SvPVX(sv) + SvCUR(sv);
	4245	}
	4246	d_is_utf8 = TRUE;
	4247	} else if (len > (STRLEN)(e - s + 4)) { /* +4 is for \N{} */
	4248
	4249	/* See Note on sizing above. (NOTE: SvCUR() is not
	4250	* set correctly here). */
	4251	const STRLEN extra = len + (send - rbrace) + 1;
	4252	const STRLEN off = d - SvPVX_const(sv);
	4253	d = off + SvGROW(sv, off + extra);
	4254	}
	4255	Copy(str, d, len, char);
	4256	d += len;
	4257	}
	4258
	4259	SvREFCNT_dec(res);
	4260
	4261	} /* End \N{NAME} */
	4262
	4263	end_backslash_N:
	4264	#ifdef EBCDIC
	4265	backslash_N++; /* \N{} is defined to be Unicode */
	4266	#endif
	4267	s = rbrace + 1; /* Point to just after the '}' */
	4268	continue;
	4269
	4270	/* \c is a control character */
	4271	case 'c':
	4272	s++;
	4273	if (s < send) {
	4274	const char * message;
	4275
	4276	if (! grok_bslash_c(s, (U8 ) d, &message, NULL)) {
	4277	yyerror(message);
	4278	yyquit(); /* Have always immediately croaked on
	4279	errors in this */
	4280	}
	4281	d++;
	4282	}
	4283	else {
	4284	yyerror("Missing control char name in \\c");
	4285	yyquit(); /* Are at end of input, no sense continuing */
	4286	}
	4287	#ifdef EBCDIC
	4288	non_portable_endpoint++;
	4289	#endif
	4290	break;
	4291
	4292	/* printf-style backslashes, formfeeds, newlines, etc */
	4293	case 'b':
	4294	*d++ = '\b';
	4295	break;
	4296	case 'n':
	4297	*d++ = '\n';
	4298	break;
	4299	case 'r':
	4300	*d++ = '\r';
	4301	break;
	4302	case 'f':
	4303	*d++ = '\f';
	4304	break;
	4305	case 't':
	4306	*d++ = '\t';
	4307	break;
	4308	case 'e':
	4309	*d++ = ESC_NATIVE;
	4310	break;
	4311	case 'a':
	4312	*d++ = '\a';
	4313	break;
	4314	} /* end switch */
	4315
	4316	s++;
	4317	continue;
	4318	} /* end if (backslash) */
	4319
	4320	default_action:
	4321	/* Just copy the input to the output, though we may have to convert
	4322	* to/from UTF-8.
	4323	*
	4324	* If the input has the same representation in UTF-8 as not, it will be
	4325	* a single byte, and we don't care about UTF8ness; just copy the byte */
	4326	if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
	4327	d++ = s++;
	4328	}
	4329	else if (! s_is_utf8 && ! d_is_utf8) {
	4330	/* If neither source nor output is UTF-8, is also a single byte,
	4331	* just copy it; but this byte counts should we later have to
	4332	* convert to UTF-8 */
	4333	d++ = s++;
	4334	utf8_variant_count++;
	4335	}
	4336	else if (s_is_utf8 && d_is_utf8) { /* Both UTF-8, can just copy */
	4337	const STRLEN len = UTF8SKIP(s);
	4338
	4339	/* We expect the source to have already been checked for
	4340	* malformedness */
	4341	assert(isUTF8_CHAR((U8 ) s, (U8 ) send));
	4342
	4343	Copy(s, d, len, U8);
	4344	d += len;
	4345	s += len;
	4346	}
	4347	else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
	4348	STRLEN need = send - s + 1; /* See Note on sizing above. */
	4349
	4350	SvCUR_set(sv, d - SvPVX_const(sv));
	4351	SvPOK_on(sv);
	4352	*d = '\0';
	4353
	4354	if (utf8_variant_count == 0) {
	4355	SvUTF8_on(sv);
	4356	d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
	4357	}
	4358	else {
	4359	sv_utf8_upgrade_flags_grow(sv,
	4360	SV_GMAGIC\|SV_FORCE_UTF8_UPGRADE,
	4361	need);
	4362	d = SvPVX(sv) + SvCUR(sv);
	4363	}
	4364	d_is_utf8 = TRUE;
	4365	goto default_action; /* Redo, having upgraded so both are UTF-8 */
	4366	}
	4367	else { /* UTF8ness matters: convert this non-UTF8 source char to
	4368	UTF-8 for output. It will occupy 2 bytes, but don't include
	4369	the input byte since we haven't incremented 's' yet. See
	4370	Note on sizing above. */
	4371	const STRLEN off = d - SvPVX(sv);
	4372	const STRLEN extra = 2 + (send - s - 1) + 1;
	4373	if (off + extra > SvLEN(sv)) {
	4374	d = off + SvGROW(sv, off + extra);
	4375	}
	4376	d++ = UTF8_EIGHT_BIT_HI(s);
	4377	d++ = UTF8_EIGHT_BIT_LO(s);
	4378	s++;
	4379	}
	4380	} /* while loop to process each character */
	4381
	4382	{
	4383	const STRLEN off = d - SvPVX(sv);
	4384
	4385	/* See if room for the terminating NUL */
	4386	if (UNLIKELY(off >= SvLEN(sv))) {
	4387
	4388	#ifndef DEBUGGING
	4389
	4390	if (off > SvLEN(sv))
	4391	#endif
	4392	Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
	4393	" %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
	4394
	4395	/* Whew! Here we don't have room for the terminating NUL, but
	4396	* everything else so far has fit. It's not too late to grow
	4397	* to fit the NUL and continue on. But it is a bug, as the code
	4398	* above was supposed to have made room for this, so under
	4399	* DEBUGGING builds, we panic anyway. */
	4400	d = off + SvGROW(sv, off + 1);
	4401	}
	4402	}
	4403
	4404	/* terminate the string and set up the sv */
	4405	*d = '\0';
	4406	SvCUR_set(sv, d - SvPVX_const(sv));
	4407
	4408	SvPOK_on(sv);
	4409	if (d_is_utf8) {
	4410	SvUTF8_on(sv);
	4411	}
	4412
	4413	/* shrink the sv if we allocated more than we used */
	4414	if (SvCUR(sv) + 5 < SvLEN(sv)) {
	4415	SvPV_shrink_to_cur(sv);
	4416	}
	4417
	4418	/* return the substring (via pl_yylval) only if we parsed anything */
	4419	if (s > start) {
	4420	char *s2 = start;
	4421	for (; s2 < s; s2++) {
	4422	if (*s2 == '\n')
	4423	COPLINE_INC_WITH_HERELINES;
	4424	}
	4425	SvREFCNT_inc_simple_void_NN(sv);
	4426	if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
	4427	&& ! PL_parser->lex_re_reparsing)
	4428	{
	4429	const char *const key = PL_lex_inpat ? "qr" : "q";
	4430	const STRLEN keylen = PL_lex_inpat ? 2 : 1;
	4431	const char *type;
	4432	STRLEN typelen;
	4433
	4434	if (PL_lex_inwhat == OP_TRANS) {
	4435	type = "tr";
	4436	typelen = 2;
	4437	} else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
	4438	type = "s";
	4439	typelen = 1;
	4440	} else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
	4441	type = "q";
	4442	typelen = 1;
	4443	} else {
	4444	type = "qq";
	4445	typelen = 2;
	4446	}
	4447
	4448	sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
	4449	type, typelen, NULL);
	4450	}
	4451	pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
	4452	}
	4453	LEAVE_with_name("scan_const");
	4454	return s;
	4455	}
	4456
	4457	/* S_intuit_more
	4458	* Returns TRUE if there's more to the expression (e.g., a subscript),
	4459	* FALSE otherwise.
	4460	*
	4461	* It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
	4462	*
	4463	* ->[ and ->{ return TRUE
	4464	* ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
	4465	* { and [ outside a pattern are always subscripts, so return TRUE
	4466	* if we're outside a pattern and it's not { or [, then return FALSE
	4467	* if we're in a pattern and the first char is a {
	4468	* {4,5} (any digits around the comma) returns FALSE
	4469	* if we're in a pattern and the first char is a [
	4470	* [] returns FALSE
	4471	* [SOMETHING] has a funky heuristic to decide whether it's a
	4472	* character class or not. It has to deal with things like
	4473	* /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
	4474	* anything else returns TRUE
	4475	*/
	4476
	4477	/* This is the one truly awful dwimmer necessary to conflate C and sed. */
	4478
	4479	STATIC int
	4480	S_intuit_more(pTHX_ char s, char e)
	4481	{
	4482	PERL_ARGS_ASSERT_INTUIT_MORE;
	4483
	4484	/* This function has been mostly untouched for a long time, due to its,
	4485	* 'scariness', and lack of comments. khw has gone through and done some
	4486	* cleanup, while finding various instances of problematic behavior.
	4487	* Rather than change this base-level function immediately, khw has added
	4488	* commentary to those areas. */
	4489
	4490	/* If recursed within brackets, there is more to the expression */
	4491	if (PL_lex_brackets)
	4492	return TRUE;
	4493
	4494	/* If begins with '->' ... */
	4495	if (s[0] == '-' && s[1] == '>') {
	4496
	4497	/* '->[' and '->{' imply more to the expression */
	4498	if (s[2] == '[' \|\| s[2] == '{') {
	4499	return TRUE;
	4500	}
	4501
	4502	/* Any post deref construct implies more to the expression */
	4503	if ( FEATURE_POSTDEREF_QQ_IS_ENABLED
	4504	&& ( (s[2] == '$' && ( s[3] == '*'
	4505	\|\| (s[3] == '#' && s[4] == '*')))
	4506	\|\| (s[2] == '@' && memCHRs("*[{", s[3])) ))
	4507	{
	4508	return TRUE;
	4509	}
	4510	}
	4511
	4512	if (s[0] != '{' && s[0] != '[')
	4513	return FALSE;
	4514
	4515	/* quit immediately from any errors from now on */
	4516	PL_parser->sub_no_recover = TRUE;
	4517
	4518	/* Here is '{' or '['. Outside patterns, they're always subscripts */
	4519	if (!PL_lex_inpat)
	4520	return TRUE;
	4521
	4522	/* In a pattern, so maybe we have {n,m}, in which case, there isn't more to
	4523	* the expression.
	4524	*
	4525	* khw: This assumes that anything matching regcurly is a character class.
	4526	* The syntax of regcurly has been loosened since this function was
	4527	* written, and regcurly never required a comma, as in {0}. Probably it is
	4528	* ok as-is */
	4529	if (s[0] == '{') {
	4530	if (regcurly(s, e, NULL)) {
	4531	return FALSE;
	4532	}
	4533	return TRUE;
	4534	}
	4535
	4536	/* Here is '[': maybe we have a character class. Examine the guts */
	4537	s++;
	4538
	4539	/* '^' implies a character class; An empty '[]' isn't legal, but it does
	4540	* mean there isn't more to come */
	4541	if (s[0] == ']' \|\| s[0] == '^')
	4542	return FALSE;
	4543
	4544	/* Find matching ']'. khw: This means any s[1] below is guaranteed to
	4545	* exist */
	4546	const char * const send = (char *) memchr(s, ']', e - s);
	4547	if (! send) /* has to be an expression */
	4548	return TRUE;
	4549
	4550	/* If the construct consists entirely of one or two digits, call it a
	4551	* subscript. */
	4552	if (isDIGIT(s[0]) && send - s <= 2 && (send - s == 1 \|\| (isDIGIT(s[1])))) {
	4553	return TRUE;
	4554	}
	4555
	4556	/* this is terrifying, and it mostly works. See GH #16478.
	4557	*
	4558	* khw: That ticket shows that the heuristics here get things wrong. That
	4559	* most of the weights are divisible by 5 indicates that not a lot of
	4560	* tuning was done, and that the values are fairly arbitrary. Especially
	4561	* problematic are when all characters in the construct are numeric. We
	4562	* have [89] always resolving to a subscript, though that could well be a
	4563	* character class that is related to finding non-octals. And [100] is a
	4564	* character class when it could well be a subscript. */
	4565
	4566	int weight;
	4567
	4568	if (s[0] == '$') { /* First char is dollar; lean very slightly to it
	4569	being a subscript */
	4570	weight = -1;
	4571	}
	4572	else { /* Otherwise, lean a little more towards it being a
	4573	character class. */
	4574	weight = 2;
	4575	}
	4576
	4577	/* Unsigned version of current character */
	4578	unsigned char un_char = 0;
	4579
	4580	/* Keep track of how many multiple occurrences of the same character there
	4581	* are */
	4582	char seen[256];
	4583	Zero(seen, 256, char);
	4584
	4585	/* Examine each character in the construct */
	4586	bool first_time = true;
	4587	for (; s < send; s++, first_time = false) {
	4588	unsigned char prev_un_char = un_char;
	4589	un_char = (unsigned char) s[0];
	4590	switch (s[0]) {
	4591	case '@':
	4592	case '&':
	4593	case '$':
	4594
	4595	/* Each additional occurrence of one of these three strongly
	4596	* indicates it is a subscript */
	4597	weight -= seen[un_char] * 10;
	4598
	4599	/* Following one of these characters, we look to see if there is an
	4600	* identifier already found in the program by that name. If so,
	4601	* strongly suspect this isn't a character class */
	4602	if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
	4603	int len;
	4604	char tmpbuf[sizeof PL_tokenbuf * 4];
	4605	scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
	4606	len = (int)strlen(tmpbuf);
	4607	if ( len > 1
	4608	&& gv_fetchpvn_flags(tmpbuf,
	4609	len,
	4610	UTF ? SVf_UTF8 : 0,
	4611	SVt_PV))
	4612	weight -= 100;
	4613	else /* Not a multi-char identifier already known in the
	4614	program; is somewhat likely to be a subscript */
	4615	weight -= 10;
	4616	}
	4617	else if ( s[0] == '$'
	4618	&& s[1]
	4619	&& memCHRs("[#!%*<>()-=", s[1]))
	4620	{
	4621	/* Here we have what could be a punctuation variable. If the
	4622	* next character after it is a closing bracket, it makes it
	4623	* quite likely to be that, and hence a subscript. If it is
	4624	* something else, more mildly a subscript */
	4625	if (/{/ memCHRs("])} =", s[2]))
	4626	weight -= 10;
	4627	else
	4628	weight -= 1;
	4629	}
	4630	break;
	4631
	4632	case '\\':
	4633	if (s[1]) {
	4634	if (memCHRs("wds]", s[1]))
	4635	weight += 100; /* \w \d \s => strongly charclass */
	4636	/* khw: Why not \W \D \S \h \v, etc as well? */
	4637	else if (seen[(U8)'\''] \|\| seen[(U8)'"'])
	4638	weight += 1; /* \' => mildly charclass */
	4639	else if (memCHRs("abcfnrtvx", s[1]))
	4640	weight += 40; /* \n, etc => charclass */
	4641	/* khw: Why not \e etc as well? */
	4642	else if (isDIGIT(s[1])) {
	4643	weight += 40; /* \123 => charclass */
	4644	while (s[1] && isDIGIT(s[1]))
	4645	s++;
	4646	}
	4647	}
	4648	else /* \ followed by NUL strongly indicates character class */
	4649	weight += 100;
	4650	break;
	4651
	4652	case '-':
	4653	/* If it is something like '-\', it is more likely to be a
	4654	* character class.
	4655	*
	4656	* khw: The rest of the conditionals in this 'case' really should
	4657	* be subject to an 'else' of this condition */
	4658	if (s[1] == '\\')
	4659	weight += 50;
	4660
	4661	/* If it is something like 'a-' or '0-', it is more likely to
	4662	* be a character class. '!' is the first ASCII graphic, so '!-'
	4663	* would be the start of a range of graphics. */
	4664	if (! first_time && memCHRs("aA01! ", prev_un_char))
	4665	weight += 30;
	4666
	4667	/* If it is something like '-Z' or '-7' (for octal) or '-9' it
	4668	* is more likely to be a character class. '~' is the final ASCII
	4669	* graphic, so '-~' would be the end of a range of graphics.
	4670	*
	4671	* khw: Having [-z] really doesn't imply what the comments above
	4672	* indicate, so this should only be tested when '! first_time' */
	4673	if (memCHRs("zZ79~", s[1]))
	4674	weight += 30;
	4675
	4676	/* If it is something like -1 or -$foo, it is more likely to be a
	4677	* subscript. */
	4678	if (first_time && (isDIGIT(s[1]) \|\| s[1] == '$')) {
	4679	weight -= 5; /* cope with negative subscript */
	4680	}
	4681	break;
	4682
	4683	default:
	4684	if ( (first_time \|\| ( ! isWORDCHAR(prev_un_char)
	4685	&& prev_un_char != '$'
	4686	&& prev_un_char != '@'
	4687	&& prev_un_char != '&'))
	4688	&& isALPHA(s[0])
	4689	&& isALPHA(s[1]))
	4690	{
	4691	/* Here it's \W (that isn't [$@&] ) followed immediately by two
	4692	* alphas in a row. Accumulate all the consecutive alphas */
	4693	char *d = s;
	4694	while (isALPHA(s[0]))
	4695	s++;
	4696
	4697	/* If those alphas spell a keyword, it's almost certainly not a
	4698	* character class */
	4699	if (keyword(d, s - d, 0))
	4700	weight -= 150;
	4701
	4702	/* khw: Should those alphas be marked as seen? */
	4703	}
	4704
	4705	/* Consecutive chars like [...12...] and [...ab...] are presumed
	4706	* more likely to be character classes */
	4707	if ( ! first_time
	4708	&& ( NATIVE_TO_LATIN1(un_char)
	4709	== NATIVE_TO_LATIN1(prev_un_char) + 1))
	4710	{
	4711	weight += 5;
	4712	}
	4713
	4714	/* But repeating a character inside a character class does nothing,
	4715	* like [aba], so less likely that someone makes such a class, more
	4716	* likely that it is a subscript; the more repeats, the less
	4717	* likely. */
	4718	weight -= seen[un_char];
	4719	break;
	4720	} /* End of switch */
	4721
	4722	/* khw: 'seen' is declared as a char. This ++ can cause it to wrap.
	4723	* This gives different results with compilers for which a plain 'char'
	4724	* is actually unsigned, versus those where it is signed. I believe it
	4725	* is undefined behavior to wrap a 'signed'. I think it should be
	4726	* instead declared an unsigned int to make the chances of wrapping
	4727	* essentially zero.
	4728	*
	4729	* And I believe that extra backslashes are different from other
	4730	* repeated characters. */
	4731	seen[un_char]++;
	4732	} /* End of loop through each character of the construct */
	4733
	4734	if (weight >= 0) /* probably a character class */
	4735	return FALSE;
	4736
	4737	return TRUE;
	4738	}
	4739
	4740	/*
	4741	* S_intuit_method
	4742	*
	4743	* Does all the checking to disambiguate
	4744	* foo bar
	4745	* between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
	4746	* METHCALL (bar->foo(args)) or METHCALL0 (bar->foo args).
	4747	*
	4748	* First argument is the stuff after the first token, e.g. "bar".
	4749	*
	4750	* Not a method if foo is a filehandle.
	4751	* Not a method if foo is a subroutine prototyped to take a filehandle.
	4752	* Not a method if it's really "Foo $bar"
	4753	* Method if it's "foo $bar"
	4754	* Not a method if it's really "print foo $bar"
	4755	* Method if it's really "foo package::" (interpreted as package->foo)
	4756	* Not a method if bar is known to be a subroutine ("sub bar; foo bar")
	4757	* Not a method if bar is a filehandle or package, but is quoted with
	4758	* =>
	4759	*/
	4760
	4761	STATIC int
	4762	S_intuit_method(pTHX_ char start, SV ioname, CV *cv)
	4763	{
	4764	char s = start + (start == '$');
	4765	char tmpbuf[sizeof PL_tokenbuf];
	4766	STRLEN len;
	4767	GV* indirgv;
	4768	/* Mustn't actually add anything to a symbol table.
	4769	But also don't want to "initialise" any placeholder
	4770	constants that might already be there into full
	4771	blown PVGVs with attached PVCV. */
	4772	GV * const gv =
	4773	ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
	4774
	4775	PERL_ARGS_ASSERT_INTUIT_METHOD;
	4776
	4777	if (!FEATURE_INDIRECT_IS_ENABLED)
	4778	return 0;
	4779
	4780	if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
	4781	return 0;
	4782	if (cv && SvPOK(cv)) {
	4783	const char *proto = CvPROTO(cv);
	4784	if (proto) {
	4785	while (proto && (isSPACE(proto) \|\| *proto == ';'))
	4786	proto++;
	4787	if (proto == '')
	4788	return 0;
	4789	}
	4790	}
	4791
	4792	if (*start == '$') {
	4793	SSize_t start_off = start - SvPVX(PL_linestr);
	4794	if (cv \|\| PL_last_lop_op == OP_PRINT \|\| PL_last_lop_op == OP_SAY
	4795	\|\| isUPPER(*PL_tokenbuf))
	4796	return 0;
	4797	/* this could be $# */
	4798	if (isSPACE(*s))
	4799	s = skipspace(s);
	4800	PL_bufptr = SvPVX(PL_linestr) + start_off;
	4801	PL_expect = XREF;
	4802	return *s == '(' ? METHCALL : METHCALL0;
	4803	}
	4804
	4805	s = scan_word6(s, tmpbuf, sizeof tmpbuf, TRUE, &len, FALSE);
	4806	/* start is the beginning of the possible filehandle/object,
	4807	* and s is the end of it
	4808	* tmpbuf is a copy of it (but with single quotes as double colons)
	4809	*/
	4810
	4811	if (!keyword(tmpbuf, len, 0)) {
	4812	if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
	4813	len -= 2;
	4814	tmpbuf[len] = '\0';
	4815	goto bare_package;
	4816	}
	4817	indirgv = gv_fetchpvn_flags(tmpbuf, len,
	4818	GV_NOADD_NOINIT\|( UTF ? SVf_UTF8 : 0 ),
	4819	SVt_PVCV);
	4820	if (indirgv && SvTYPE(indirgv) != SVt_NULL
	4821	&& (!isGV(indirgv) \|\| GvCVu(indirgv)))
	4822	return 0;
	4823	/* filehandle or package name makes it a method */
	4824	if (!cv \|\| GvIO(indirgv) \|\| gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
	4825	s = skipspace(s);
	4826	if ((PL_bufend - s) >= 2 && s == '=' && (s+1) == '>')
	4827	return 0; /* no assumptions -- "=>" quotes bareword */
	4828	bare_package:
	4829	NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
	4830	S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
	4831	NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
	4832	PL_expect = XTERM;
	4833	force_next(BAREWORD);
	4834	PL_bufptr = s;
	4835	return *s == '(' ? METHCALL : METHCALL0;
	4836	}
	4837	}
	4838	return 0;
	4839	}
	4840
	4841	/* Encoded script support. filter_add() effectively inserts a
	4842	* 'pre-processing' function into the current source input stream.
	4843	* Note that the filter function only applies to the current source file
	4844	* (e.g., it will not affect files 'require'd or 'use'd by this one).
	4845	*
	4846	* The datasv parameter (which may be NULL) can be used to pass
	4847	* private data to this instance of the filter. The filter function
	4848	* can recover the SV using the FILTER_DATA macro and use it to
	4849	* store private buffers and state information.
	4850	*
	4851	* The supplied datasv parameter is upgraded to a PVIO type
	4852	* and the IoDIRP/IoANY field is used to store the function pointer,
	4853	* and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
	4854	* Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
	4855	* private use must be set using malloc'd pointers.
	4856	*/
	4857
	4858	SV *
	4859	Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
	4860	{
	4861	if (!funcp)
	4862	return NULL;
	4863
	4864	if (!PL_parser)
	4865	return NULL;
	4866
	4867	if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
	4868	Perl_croak(aTHX_ "Source filters apply only to byte streams");
	4869
	4870	if (!PL_rsfp_filters)
	4871	PL_rsfp_filters = newAV();
	4872	if (!datasv)
	4873	datasv = newSV(0);
	4874	SvUPGRADE(datasv, SVt_PVIO);
	4875	IoANY(datasv) = FPTR2DPTR(void , funcp); / stash funcp into spare field */
	4876	IoFLAGS(datasv) \|= IOf_FAKE_DIRP;
	4877	DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
	4878	FPTR2DPTR(void *, IoANY(datasv)),
	4879	SvPV_nolen(datasv)));
	4880	av_unshift(PL_rsfp_filters, 1);
	4881	av_store(PL_rsfp_filters, 0, datasv) ;
	4882	if (
	4883	!PL_parser->filtered
	4884	&& PL_parser->lex_flags & LEX_EVALBYTES
	4885	&& PL_bufptr < PL_bufend
	4886	) {
	4887	const char *s = PL_bufptr;
	4888	while (s < PL_bufend) {
	4889	if (*s == '\n') {
	4890	SV *linestr = PL_parser->linestr;
	4891	char *buf = SvPVX(linestr);
	4892	STRLEN const bufptr_pos = PL_parser->bufptr - buf;
	4893	STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
	4894	STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
	4895	STRLEN const linestart_pos = PL_parser->linestart - buf;
	4896	STRLEN const last_uni_pos =
	4897	PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
	4898	STRLEN const last_lop_pos =
	4899	PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
	4900	av_push(PL_rsfp_filters, linestr);
	4901	PL_parser->linestr =
	4902	newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
	4903	buf = SvPVX(PL_parser->linestr);
	4904	PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
	4905	PL_parser->bufptr = buf + bufptr_pos;
	4906	PL_parser->oldbufptr = buf + oldbufptr_pos;
	4907	PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
	4908	PL_parser->linestart = buf + linestart_pos;
	4909	if (PL_parser->last_uni)
	4910	PL_parser->last_uni = buf + last_uni_pos;
	4911	if (PL_parser->last_lop)
	4912	PL_parser->last_lop = buf + last_lop_pos;
	4913	SvLEN_set(linestr, SvCUR(linestr));
	4914	SvCUR_set(linestr, s - SvPVX(linestr));
	4915	PL_parser->filtered = 1;
	4916	break;
	4917	}
	4918	s++;
	4919	}
	4920	}
	4921	return(datasv);
	4922	}
	4923
	4924	/*
	4925	=for apidoc_section $filters
	4926	=for apidoc filter_del
	4927
	4928	Delete most recently added instance of the filter function argument
	4929
	4930	=cut
	4931	*/
	4932
	4933	void
	4934	Perl_filter_del(pTHX_ filter_t funcp)
	4935	{
	4936	SV *datasv;
	4937
	4938	PERL_ARGS_ASSERT_FILTER_DEL;
	4939
	4940	#ifdef DEBUGGING
	4941	DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
	4942	FPTR2DPTR(void*, funcp)));
	4943	#endif
	4944	if (!PL_parser \|\| !PL_rsfp_filters \|\| AvFILLp(PL_rsfp_filters)<0)
	4945	return;
	4946	/* if filter is on top of stack (usual case) just pop it off */
	4947	datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
	4948	if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
	4949	SvREFCNT_dec(av_pop(PL_rsfp_filters));
	4950
	4951	return;
	4952	}
	4953	/* we need to search for the correct entry and clear it */
	4954	Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
	4955	}
	4956
	4957
	4958	/* Invoke the idxth filter function for the current rsfp. */
	4959	/* maxlen 0 = read one text line */
	4960	I32
	4961	Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
	4962	{
	4963	filter_t funcp;
	4964	I32 ret;
	4965	SV *datasv = NULL;
	4966	/* This API is bad. It should have been using unsigned int for maxlen.
	4967	Not sure if we want to change the API, but if not we should sanity
	4968	check the value here. */
	4969	unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
	4970
	4971	PERL_ARGS_ASSERT_FILTER_READ;
	4972
	4973	if (!PL_parser \|\| !PL_rsfp_filters)
	4974	return -1;
	4975	if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
	4976	/* Provide a default input filter to make life easy. */
	4977	/* Note that we append to the line. This is handy. */
	4978	DEBUG_P(PerlIO_printf(Perl_debug_log,
	4979	"filter_read %d: from rsfp\n", idx));
	4980	if (correct_length) {
	4981	/* Want a block */
	4982	int len ;
	4983	const int old_len = SvCUR(buf_sv);
	4984
	4985	/* ensure buf_sv is large enough */
	4986	SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
	4987	if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
	4988	correct_length)) <= 0) {
	4989	if (PerlIO_error(PL_rsfp))
	4990	return -1; /* error */
	4991	else
	4992	return 0 ; /* end of file */
	4993	}
	4994	SvCUR_set(buf_sv, old_len + len) ;
	4995	SvPVX(buf_sv)[old_len + len] = '\0';
	4996	} else {
	4997	/* Want a line */
	4998	if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
	4999	if (PerlIO_error(PL_rsfp))
	5000	return -1; /* error */
	5001	else
	5002	return 0 ; /* end of file */
	5003	}
	5004	}
	5005	return SvCUR(buf_sv);
	5006	}
	5007	/* Skip this filter slot if filter has been deleted */
	5008	if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
	5009	DEBUG_P(PerlIO_printf(Perl_debug_log,
	5010	"filter_read %d: skipped (filter deleted)\n",
	5011	idx));
	5012	return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
	5013	}
	5014	if (SvTYPE(datasv) != SVt_PVIO) {
	5015	if (correct_length) {
	5016	/* Want a block */
	5017	const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
	5018	if (!remainder) return 0; /* eof */
	5019	if (correct_length > remainder) correct_length = remainder;
	5020	sv_catpvn(buf_sv, SvEND(datasv), correct_length);
	5021	SvCUR_set(datasv, SvCUR(datasv) + correct_length);
	5022	} else {
	5023	/* Want a line */
	5024	const char *s = SvEND(datasv);
	5025	const char *send = SvPVX(datasv) + SvLEN(datasv);
	5026	while (s < send) {
	5027	if (*s == '\n') {
	5028	s++;
	5029	break;
	5030	}
	5031	s++;
	5032	}
	5033	if (s == send) return 0; /* eof */
	5034	sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
	5035	SvCUR_set(datasv, s-SvPVX(datasv));
	5036	}
	5037	return SvCUR(buf_sv);
	5038	}
	5039	/* Get function pointer hidden within datasv */
	5040	funcp = DPTR2FPTR(filter_t, IoANY(datasv));
	5041	DEBUG_P(PerlIO_printf(Perl_debug_log,
	5042	"filter_read %d: via function %p (%s)\n",
	5043	idx, (void*)datasv, SvPV_nolen_const(datasv)));
	5044	/* Call function. The function is expected to */
	5045	/* call "FILTER_READ(idx+1, buf_sv)" first. */
	5046	/* Return: <0:error, =0:eof, >0:not eof */
	5047	ENTER;
	5048	save_scalar(PL_errgv);
	5049
	5050	/* although this calls out to a random C function, there's a good
	5051	* chance that that function will call back into perl (e.g. using
	5052	* Filter::Util::Call). So downgrade the stack to
	5053	* non-reference-counted for backwards compatibility - i.e. do the
	5054	* equivalent of xs_wrap(), but this time we know there are no
	5055	* args to be passed or returned on the stack, simplifying it.
	5056	*/
	5057	#ifdef PERL_RC_STACK
	5058	assert(AvREAL(PL_curstack));
	5059	I32 oldbase = PL_curstackinfo->si_stack_nonrc_base;
	5060	I32 oldsp = PL_stack_sp - PL_stack_base;
	5061	if (!oldbase)
	5062	PL_curstackinfo->si_stack_nonrc_base = oldsp + 1;
	5063	#endif
	5064
	5065	ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
	5066
	5067	#ifdef PERL_RC_STACK
	5068	assert(oldsp == PL_stack_sp - PL_stack_base);
	5069	assert(AvREAL(PL_curstack));
	5070	assert(PL_curstackinfo->si_stack_nonrc_base ==
	5071	oldbase ? oldbase : oldsp + 1);
	5072	PL_curstackinfo->si_stack_nonrc_base = oldbase;
	5073	#endif
	5074
	5075	LEAVE;
	5076	return ret;
	5077	}
	5078
	5079	STATIC char *
	5080	S_filter_gets(pTHX_ SV *sv, STRLEN append)
	5081	{
	5082	PERL_ARGS_ASSERT_FILTER_GETS;
	5083
	5084	#ifdef PERL_CR_FILTER
	5085	if (!PL_rsfp_filters) {
	5086	filter_add(S_cr_textfilter,NULL);
	5087	}
	5088	#endif
	5089	if (PL_rsfp_filters) {
	5090	if (!append)
	5091	SvCUR_set(sv, 0); /* start with empty line */
	5092	if (FILTER_READ(0, sv, 0) > 0)
	5093	return ( SvPVX(sv) ) ;
	5094	else
	5095	return NULL ;
	5096	}
	5097	else
	5098	return (sv_gets(sv, PL_rsfp, append));
	5099	}
	5100
	5101	STATIC HV *
	5102	S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
	5103	{
	5104	GV *gv;
	5105
	5106	PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
	5107
	5108	if (memEQs(pkgname, len, "__PACKAGE__"))
	5109	return PL_curstash;
	5110
	5111	if (len > 2
	5112	&& (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
	5113	&& (gv = gv_fetchpvn_flags(pkgname,
	5114	len,
	5115	( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
	5116	{
	5117	return GvHV(gv); /* Foo:: */
	5118	}
	5119
	5120	/* use constant CLASS => 'MyClass' */
	5121	gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
	5122	if (gv && GvCV(gv)) {
	5123	SV * const sv = cv_const_sv(GvCV(gv));
	5124	if (sv)
	5125	return gv_stashsv(sv, 0);
	5126	}
	5127
	5128	return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
	5129	}
	5130
	5131
	5132	STATIC char *
	5133	S_tokenize_use(pTHX_ int is_use, char *s) {
	5134	PERL_ARGS_ASSERT_TOKENIZE_USE;
	5135
	5136	if (PL_expect != XSTATE)
	5137	/* diag_listed_as: "use" not allowed in expression */
	5138	yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
	5139	is_use ? "use" : "no"));
	5140	PL_expect = XTERM;
	5141	s = skipspace(s);
	5142	if (isDIGIT(s) \|\| (s == 'v' && isDIGIT(s[1]))) {
	5143	s = force_version(s, TRUE);
	5144	if (s == ';' \|\| s == '}'
	5145	\|\| (s = skipspace(s), (s == ';' \|\| s == '}'))) {
	5146	NEXTVAL_NEXTTOKE.opval = NULL;
	5147	force_next(BAREWORD);
	5148	}
	5149	else if (*s == 'v') {
	5150	s = force_word(s,BAREWORD,FALSE,TRUE);
	5151	s = force_version(s, FALSE);
	5152	}
	5153	}
	5154	else {
	5155	s = force_word(s,BAREWORD,FALSE,TRUE);
	5156	s = force_version(s, FALSE);
	5157	}
	5158	pl_yylval.ival = is_use;
	5159	return s;
	5160	}
	5161	#ifdef DEBUGGING
	5162	static const char* const exp_name[] =
	5163	{ "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
	5164	"ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
	5165	"SIGVAR", "TERMORDORDOR"
	5166	};
	5167	#endif
	5168
	5169	#define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
	5170	STATIC bool
	5171	S_word_takes_any_delimiter(char *p, STRLEN len)
	5172	{
	5173	return (len == 1 && memCHRs("msyq", p[0]))
	5174	\|\| (len == 2
	5175	&& ((p[0] == 't' && p[1] == 'r')
	5176	\|\| (p[0] == 'q' && memCHRs("qwxr", p[1]))));
	5177	}
	5178
	5179	static void
	5180	S_check_scalar_slice(pTHX_ char *s)
	5181	{
	5182	s++;
	5183	while (SPACE_OR_TAB(*s)) s++;
	5184	if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
	5185	PL_bufend,
	5186	UTF))
	5187	{
	5188	return;
	5189	}
	5190	while ( isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
	5191	\|\| (s && memCHRs(" \t$#+-'\"", s)))
	5192	{
	5193	s += UTF ? UTF8SKIP(s) : 1;
	5194	}
	5195	if (s == '}' \|\| s == ']')
	5196	pl_yylval.ival = OPpSLICEWARNING;
	5197	}
	5198
	5199	#define lex_token_boundary() S_lex_token_boundary(aTHX)
	5200	static void
	5201	S_lex_token_boundary(pTHX)
	5202	{
	5203	PL_oldoldbufptr = PL_oldbufptr;
	5204	PL_oldbufptr = PL_bufptr;
	5205	}
	5206
	5207	#define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
	5208	static char *
	5209	S_vcs_conflict_marker(pTHX_ char *s)
	5210	{
	5211	lex_token_boundary();
	5212	PL_bufptr = s;
	5213	yyerror("Version control conflict marker");
	5214	while (s < PL_bufend && *s != '\n')
	5215	s++;
	5216	return s;
	5217	}
	5218
	5219	static int
	5220	yyl_sigvar(pTHX_ char *s)
	5221	{
	5222	/* we expect the sigil and optional var name part of a
	5223	* signature element here. Since a '$' is not necessarily
	5224	* followed by a var name, handle it specially here; the general
	5225	* yylex code would otherwise try to interpret whatever follows
	5226	* as a var; e.g. ($, ...) would be seen as the var '$,'
	5227	*/
	5228
	5229	U8 sigil;
	5230
	5231	s = skipspace(s);
	5232	sigil = *s++;
	5233	PL_bufptr = s; /* for error reporting */
	5234	switch (sigil) {
	5235	case '$':
	5236	case '@':
	5237	case '%':
	5238	/* spot stuff that looks like an prototype */
	5239	if (memCHRs("$:@%&;\\[]", s)) {
	5240	yyerror("Illegal character following sigil in a subroutine signature");
	5241	break;
	5242	}
	5243	/* '$#' is banned, while '$ # comment' isn't */
	5244	if (*s == '#') {
	5245	yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
	5246	break;
	5247	}
	5248	s = skipspace(s);
	5249	if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
	5250	char *dest = PL_tokenbuf + 1;
	5251	/* read var name, including sigil, into PL_tokenbuf */
	5252	PL_tokenbuf[0] = sigil;
	5253	parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
	5254	0, cBOOL(UTF), FALSE, FALSE);
	5255	*dest = '\0';
	5256	assert(PL_tokenbuf[1]); /* we have a variable name */
	5257	}
	5258	else {
	5259	*PL_tokenbuf = 0;
	5260	PL_in_my = 0;
	5261	}
	5262
	5263	s = skipspace(s);
	5264	/* parse the = for the default ourselves to avoid '+=' etc being accepted here
	5265	* as the ASSIGNOP, and exclude other tokens that start with =
	5266	*/
	5267	if (*s == '=' && (!s[1] \|\| memCHRs("=~>", s[1]) == 0)) {
	5268	/* save now to report with the same context as we did when
	5269	* all ASSIGNOPS were accepted */
	5270	PL_oldbufptr = s;
	5271
	5272	++s;
	5273	NEXTVAL_NEXTTOKE.ival = OP_SASSIGN;
	5274	force_next(ASSIGNOP);
	5275	PL_expect = XTERM;
	5276	}
	5277	else if(*s == '/' && s[1] == '/' && s[2] == '=') {
	5278	PL_oldbufptr = s;
	5279
	5280	s += 3;
	5281	NEXTVAL_NEXTTOKE.ival = OP_DORASSIGN;
	5282	force_next(ASSIGNOP);
	5283	PL_expect = XTERM;
	5284	}
	5285	else if(*s == '\|' && s[1] == '\|' && s[2] == '=') {
	5286	PL_oldbufptr = s;
	5287
	5288	s += 3;
	5289	NEXTVAL_NEXTTOKE.ival = OP_ORASSIGN;
	5290	force_next(ASSIGNOP);
	5291	PL_expect = XTERM;
	5292	}
	5293	else if (s == ',' \|\| s == ')') {
	5294	PL_expect = XOPERATOR;
	5295	}
	5296	else {
	5297	/* make sure the context shows the unexpected character and
	5298	* hopefully a bit more */
	5299	if (*s) ++s;
	5300	while (s && s != '$' && s != '@' && s != '%' && *s != ')')
	5301	s++;
	5302	PL_bufptr = s; /* for error reporting */
	5303	yyerror("Illegal operator following parameter in a subroutine signature");
	5304	PL_in_my = 0;
	5305	}
	5306	if (*PL_tokenbuf) {
	5307	NEXTVAL_NEXTTOKE.ival = sigil;
	5308	force_next('p'); /* force a signature pending identifier */
	5309	}
	5310	break;
	5311
	5312	case ')':
	5313	PL_expect = XBLOCK;
	5314	break;
	5315	case ',': /* handle ($a,,$b) */
	5316	break;
	5317
	5318	default:
	5319	PL_in_my = 0;
	5320	yyerror("A signature parameter must start with '$', '@' or '%'");
	5321	/* very crude error recovery: skip to likely next signature
	5322	* element */
	5323	while (s && s != '$' && s != '@' && s != '%' && *s != ')')
	5324	s++;
	5325	break;
	5326	}
	5327
	5328	switch (sigil) {
	5329	case ',': TOKEN (PERLY_COMMA);
	5330	case '$': TOKEN (PERLY_DOLLAR);
	5331	case '@': TOKEN (PERLY_SNAIL);
	5332	case '%': TOKEN (PERLY_PERCENT_SIGN);
	5333	case ')': TOKEN (PERLY_PAREN_CLOSE);
	5334	default: TOKEN (sigil);
	5335	}
	5336	}
	5337
	5338	static int
	5339	yyl_dollar(pTHX_ char *s)
	5340	{
	5341	CLINE;
	5342
	5343	if (PL_expect == XPOSTDEREF) {
	5344	if (s[1] == '#') {
	5345	s++;
	5346	POSTDEREF(DOLSHARP);
	5347	}
	5348	POSTDEREF(PERLY_DOLLAR);
	5349	}
	5350
	5351	if ( s[1] == '#'
	5352	&& ( isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
	5353	\|\| memCHRs("{$:+-@", s[2])))
	5354	{
	5355	PL_tokenbuf[0] = '@';
	5356	s = scan_ident(s + 1, PL_tokenbuf + 1,
	5357	sizeof PL_tokenbuf - 1, FALSE);
	5358	if (PL_expect == XOPERATOR) {
	5359	char *d = s;
	5360	if (PL_bufptr > s) {
	5361	d = PL_bufptr-1;
	5362	PL_bufptr = PL_oldbufptr;
	5363	}
	5364	no_op("Array length", d);
	5365	}
	5366	if (!PL_tokenbuf[1])
	5367	PREREF(DOLSHARP);
	5368	PL_expect = XOPERATOR;
	5369	force_ident_maybe_lex('#');
	5370	TOKEN(DOLSHARP);
	5371	}
	5372
	5373	PL_tokenbuf[0] = '$';
	5374	s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
	5375	if (PL_expect == XOPERATOR) {
	5376	char *d = s;
	5377	if (PL_bufptr > s) {
	5378	d = PL_bufptr-1;
	5379	PL_bufptr = PL_oldbufptr;
	5380	}
	5381	no_op("Scalar", d);
	5382	}
	5383	if (!PL_tokenbuf[1]) {
	5384	if (s == PL_bufend)
	5385	yyerror("Final $ should be \\$ or $name");
	5386	PREREF(PERLY_DOLLAR);
	5387	}
	5388
	5389	{
	5390	const char tmp = *s;
	5391	if (PL_lex_state == LEX_NORMAL \|\| PL_lex_brackets)
	5392	s = skipspace(s);
	5393
	5394	if ( (PL_expect != XREF \|\| PL_oldoldbufptr == PL_last_lop)
	5395	&& intuit_more(s, PL_bufend)) {
	5396	if (*s == '[') {
	5397	PL_tokenbuf[0] = '@';
	5398	if (ckWARN(WARN_SYNTAX)) {
	5399	char *t = s+1;
	5400
	5401	while ( t < PL_bufend ) {
	5402	if (isSPACE(*t)) {
	5403	do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
	5404	/* consumed one or more space chars */
	5405	} else if (t == '$' \|\| t == '@') {
	5406	/* could be more than one '$' like $$ref or @$ref */
	5407	do { t++; } while (t < PL_bufend && *t == '$');
	5408
	5409	/* could be an abigail style identifier like $ foo */
	5410	while (t < PL_bufend && *t == ' ') t++;
	5411
	5412	/* strip off the name of the var */
	5413	while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
	5414	t += UTF ? UTF8SKIP(t) : 1;
	5415	/* consumed a varname */
	5416	} else if (isDIGIT(*t)) {
	5417	/* deal with hex constants like 0x11 */
	5418	if (t[0] == '0' && t[1] == 'x') {
	5419	t += 2;
	5420	while (t < PL_bufend && isXDIGIT(*t)) t++;
	5421	} else {
	5422	/* deal with decimal/octal constants like 1 and 0123 */
	5423	do { t++; } while (isDIGIT(*t));
	5424	if (t<PL_bufend && *t == '.') {
	5425	do { t++; } while (isDIGIT(*t));
	5426	}
	5427	}
	5428	/* consumed a number */
	5429	} else {
	5430	/* not a var nor a space nor a number */
	5431	break;
	5432	}
	5433	}
	5434	if (t < PL_bufend && *t++ == ',') {
	5435	PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
	5436	while (t < PL_bufend && *t != ']')
	5437	t++;
	5438	Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
	5439	"Multidimensional syntax %" UTF8f " not supported",
	5440	UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
	5441	}
	5442	}
	5443	}
	5444	else if (*s == '{') {
	5445	char *t;
	5446	PL_tokenbuf[0] = '%';
	5447	if ( strEQ(PL_tokenbuf+1, "SIG")
	5448	&& ckWARN(WARN_SYNTAX)
	5449	&& (t = (char *) memchr(s, '}', PL_bufend - s))
	5450	&& (t = (char *) memchr(t, '=', PL_bufend - t)))
	5451	{
	5452	char tmpbuf[sizeof PL_tokenbuf];
	5453	do {
	5454	t++;
	5455	} while (isSPACE(*t));
	5456	if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
	5457	STRLEN len;
	5458	t = scan_word6(t, tmpbuf, sizeof tmpbuf, TRUE,
	5459	&len, TRUE);
	5460	while (isSPACE(*t))
	5461	t++;
	5462	if ( *t == ';'
	5463	&& get_cvn_flags(tmpbuf, len, UTF
	5464	? SVf_UTF8
	5465	: 0))
	5466	{
	5467	Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
	5468	"You need to quote \"%" UTF8f "\"",
	5469	UTF8fARG(UTF, len, tmpbuf));
	5470	}
	5471	}
	5472	}
	5473	}
	5474	}
	5475
	5476	PL_expect = XOPERATOR;
	5477	if ((PL_lex_state == LEX_NORMAL \|\| PL_lex_brackets) && isSPACE((char)tmp)) {
	5478	const bool islop = (PL_last_lop == PL_oldoldbufptr);
	5479	if (!islop \|\| PL_last_lop_op == OP_GREPSTART)
	5480	PL_expect = XOPERATOR;
	5481	else if (memCHRs("$@\"'`q", *s))
	5482	PL_expect = XTERM; /* e.g. print $fh "foo" */
	5483	else if ( memCHRs("&<%", s)
	5484	&& isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
	5485	{
	5486	PL_expect = XTERM; /* e.g. print $fh &sub */
	5487	}
	5488	else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
	5489	char tmpbuf[sizeof PL_tokenbuf];
	5490	int t2;
	5491	STRLEN len;
	5492	scan_word6(s, tmpbuf, sizeof tmpbuf, TRUE, &len, FALSE);
	5493	if ((t2 = keyword(tmpbuf, len, 0))) {
	5494	/* binary operators exclude handle interpretations */
	5495	switch (t2) {
	5496	case -KEY_x:
	5497	case -KEY_eq:
	5498	case -KEY_ne:
	5499	case -KEY_gt:
	5500	case -KEY_lt:
	5501	case -KEY_ge:
	5502	case -KEY_le:
	5503	case -KEY_cmp:
	5504	break;
	5505	default:
	5506	PL_expect = XTERM; /* e.g. print $fh length() */
	5507	break;
	5508	}
	5509	}
	5510	else {
	5511	PL_expect = XTERM; /* e.g. print $fh subr() */
	5512	}
	5513	}
	5514	else if (isDIGIT(*s))
	5515	PL_expect = XTERM; /* e.g. print $fh 3 */
	5516	else if (*s == '.' && isDIGIT(s[1]))
	5517	PL_expect = XTERM; /* e.g. print $fh .3 */
	5518	else if ((s == '?' \|\| s == '-' \|\| *s == '+')
	5519	&& !isSPACE(s[1]) && s[1] != '=')
	5520	PL_expect = XTERM; /* e.g. print $fh -1 */
	5521	else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
	5522	&& s[1] != '/')
	5523	PL_expect = XTERM; /* e.g. print $fh /.../
	5524	XXX except DORDOR operator
	5525	*/
	5526	else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
	5527	&& s[2] != '=')
	5528	PL_expect = XTERM; /* print $fh <<"EOF" */
	5529	}
	5530	}
	5531	force_ident_maybe_lex('$');
	5532	TOKEN(PERLY_DOLLAR);
	5533	}
	5534
	5535	static int
	5536	yyl_sub(pTHX_ char *s, const int key)
	5537	{
	5538	char * const tmpbuf = PL_tokenbuf + 1;
	5539	bool have_name, have_proto;
	5540	STRLEN len;
	5541	SV *format_name = NULL;
	5542	bool is_method = (key == KEY_method);
	5543
	5544	/* method always implies signatures */
	5545	bool is_sigsub = is_method \|\| FEATURE_SIGNATURES_IS_ENABLED;
	5546
	5547	SSize_t off = s-SvPVX(PL_linestr);
	5548	char *d;
	5549
	5550	s = skipspace(s); /* can move PL_linestr */
	5551
	5552	d = SvPVX(PL_linestr)+off;
	5553
	5554	SAVEBOOL(PL_parser->sig_seen);
	5555	PL_parser->sig_seen = FALSE;
	5556
	5557	if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
	5558	\|\| *s == '\''
	5559	\|\| (*s == ':' && s[1] == ':'))
	5560	{
	5561
	5562	PL_expect = XATTRBLOCK;
	5563	d = scan_word6(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
	5564	&len, TRUE);
	5565	if (key == KEY_format)
	5566	format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
	5567	*PL_tokenbuf = '&';
	5568	if (memchr(tmpbuf, ':', len) \|\| key != KEY_sub
	5569	\|\| pad_findmy_pvn(
	5570	PL_tokenbuf, len + 1, 0
	5571	) != NOT_IN_PAD)
	5572	sv_setpvn(PL_subname, tmpbuf, len);
	5573	else {
	5574	sv_setsv(PL_subname,PL_curstname);
	5575	sv_catpvs(PL_subname,"::");
	5576	sv_catpvn(PL_subname,tmpbuf,len);
	5577	}
	5578	if (SvUTF8(PL_linestr))
	5579	SvUTF8_on(PL_subname);
	5580	have_name = TRUE;
	5581
	5582	s = skipspace(d);
	5583	}
	5584	else {
	5585	if (key == KEY_my \|\| key == KEY_our \|\| key==KEY_state) {
	5586	*d = '\0';
	5587	/* diag_listed_as: Missing name in "%s sub" */
	5588	Perl_croak(aTHX_
	5589	"Missing name in \"%s\"", PL_bufptr);
	5590	}
	5591	PL_expect = XATTRTERM;
	5592	sv_setpvs(PL_subname,"?");
	5593	have_name = FALSE;
	5594	}
	5595
	5596	if (key == KEY_format) {
	5597	if (format_name) {
	5598	NEXTVAL_NEXTTOKE.opval
	5599	= newSVOP(OP_CONST,0, format_name);
	5600	NEXTVAL_NEXTTOKE.opval->op_private \|= OPpCONST_BARE;
	5601	force_next(BAREWORD);
	5602	}
	5603	PREBLOCK(KW_FORMAT);
	5604	}
	5605
	5606	/* Look for a prototype */
	5607	if (*s == '(' && !is_sigsub) {
	5608	s = scan_str(s,FALSE,FALSE,FALSE,NULL);
	5609	if (!s)
	5610	Perl_croak(aTHX_ "Prototype not terminated");
	5611	COPLINE_SET_FROM_MULTI_END;
	5612	(void)validate_proto(PL_subname, PL_lex_stuff,
	5613	ckWARN(WARN_ILLEGALPROTO), 0);
	5614	have_proto = TRUE;
	5615
	5616	s = skipspace(s);
	5617	}
	5618	else
	5619	have_proto = FALSE;
	5620
	5621	if ( !(*s == ':' && s[1] != ':')
	5622	&& (s != '{' && s != '(') && key != KEY_format)
	5623	{
	5624	assert(key == KEY_sub \|\| key == KEY_method \|\|
	5625	key == KEY_AUTOLOAD \|\| key == KEY_DESTROY \|\|
	5626	key == KEY_BEGIN \|\| key == KEY_UNITCHECK \|\| key == KEY_CHECK \|\|
	5627	key == KEY_INIT \|\| key == KEY_END \|\|
	5628	key == KEY_my \|\| key == KEY_state \|\|
	5629	key == KEY_our);
	5630	if (!have_name)
	5631	Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
	5632	else if (s != ';' && s != '}')
	5633	Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
	5634	}
	5635
	5636	if (have_proto) {
	5637	NEXTVAL_NEXTTOKE.opval =
	5638	newSVOP(OP_CONST, 0, PL_lex_stuff);
	5639	PL_lex_stuff = NULL;
	5640	force_next(THING);
	5641	}
	5642
	5643	if (!have_name) {
	5644	if (PL_curstash)
	5645	sv_setpvs(PL_subname, "__ANON__");
	5646	else
	5647	sv_setpvs(PL_subname, "__ANON__::__ANON__");
	5648	if (is_method)
	5649	TOKEN(KW_METHOD_anon);
	5650	else if (is_sigsub)
	5651	TOKEN(KW_SUB_anon_sig);
	5652	else
	5653	TOKEN(KW_SUB_anon);
	5654	}
	5655	force_ident_maybe_lex('&');
	5656	if (is_method)
	5657	TOKEN(KW_METHOD_named);
	5658	else if (is_sigsub)
	5659	TOKEN(KW_SUB_named_sig);
	5660	else
	5661	TOKEN(KW_SUB_named);
	5662	}
	5663
	5664	static int
	5665	yyl_interpcasemod(pTHX_ char *s)
	5666	{
	5667	#ifdef DEBUGGING
	5668	if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
	5669	Perl_croak(aTHX_
	5670	"panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
	5671	PL_bufptr, PL_bufend, *PL_bufptr);
	5672	#endif
	5673
	5674	if (PL_bufptr == PL_bufend \|\| PL_bufptr[1] == 'E') {
	5675	/* if at a \E */
	5676	if (PL_lex_casemods) {
	5677	const char oldmod = PL_lex_casestack[--PL_lex_casemods];
	5678	PL_lex_casestack[PL_lex_casemods] = '\0';
	5679
	5680	if (PL_bufptr != PL_bufend
	5681	&& (oldmod == 'L' \|\| oldmod == 'U' \|\| oldmod == 'Q'
	5682	\|\| oldmod == 'F')) {
	5683	PL_bufptr += 2;
	5684	PL_lex_state = LEX_INTERPCONCAT;
	5685	}
	5686	PL_lex_allbrackets--;
	5687	return REPORT(PERLY_PAREN_CLOSE);
	5688	}
	5689	else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
	5690	/* Got an unpaired \E */
	5691	Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
	5692	"Useless use of \\E");
	5693	}
	5694	if (PL_bufptr != PL_bufend)
	5695	PL_bufptr += 2;
	5696	PL_lex_state = LEX_INTERPCONCAT;
	5697	return yylex();
	5698	}
	5699	else {
	5700	DEBUG_T({
	5701	PerlIO_printf(Perl_debug_log, "### Saw case modifier\n");
	5702	});
	5703	s = PL_bufptr + 1;
	5704	if (s[1] == '\\' && s[2] == 'E') {
	5705	PL_bufptr = s + 3;
	5706	PL_lex_state = LEX_INTERPCONCAT;
	5707	return yylex();
	5708	}
	5709	else {
	5710	I32 tmp;
	5711	if ( memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
	5712	\|\| memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
	5713	{
	5714	tmp = s, s = s[2], s[2] = (char)tmp; /* misordered... */
	5715	}
	5716	if ((s == 'L' \|\| s == 'U' \|\| *s == 'F')
	5717	&& (strpbrk(PL_lex_casestack, "LUF")))
	5718	{
	5719	PL_lex_casestack[--PL_lex_casemods] = '\0';
	5720	PL_lex_allbrackets--;
	5721	return REPORT(PERLY_PAREN_CLOSE);
	5722	}
	5723	if (PL_lex_casemods > 10)
	5724	Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
	5725	PL_lex_casestack[PL_lex_casemods++] = *s;
	5726	PL_lex_casestack[PL_lex_casemods] = '\0';
	5727	PL_lex_state = LEX_INTERPCONCAT;
	5728	NEXTVAL_NEXTTOKE.ival = 0;
	5729	force_next((2<<24)\|PERLY_PAREN_OPEN);
	5730	if (*s == 'l')
	5731	NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
	5732	else if (*s == 'u')
	5733	NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
	5734	else if (*s == 'L')
	5735	NEXTVAL_NEXTTOKE.ival = OP_LC;
	5736	else if (*s == 'U')
	5737	NEXTVAL_NEXTTOKE.ival = OP_UC;
	5738	else if (*s == 'Q')
	5739	NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
	5740	else if (*s == 'F')
	5741	NEXTVAL_NEXTTOKE.ival = OP_FC;
	5742	else
	5743	Perl_croak(aTHX_ "panic: yylex, s=%u", s);
	5744	PL_bufptr = s + 1;
	5745	}
	5746	force_next(FUNC);
	5747	if (PL_lex_starts) {
	5748	s = PL_bufptr;
	5749	PL_lex_starts = 0;
	5750	/* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
	5751	if (PL_lex_casemods == 1 && PL_lex_inpat)
	5752	TOKEN(PERLY_COMMA);
	5753	else
	5754	AopNOASSIGN(OP_CONCAT);
	5755	}
	5756	else
	5757	return yylex();
	5758	}
	5759	}
	5760
	5761	static int
	5762	yyl_secondclass_keyword(pTHX_ char s, STRLEN len, int key, I32 orig_keyword,
	5763	GV pgv, GV *pgvp)
	5764	{
	5765	GV ogv = NULL; / override (winner) */
	5766	GV hgv = NULL; / hidden (loser) */
	5767	GV gv = pgv;
	5768
	5769	if (PL_expect != XOPERATOR && (*s != ':' \|\| s[1] != ':')) {
	5770	CV *cv;
	5771	if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
	5772	(UTF ? SVf_UTF8 : 0)\|GV_NOTQUAL,
	5773	SVt_PVCV))
	5774	&& (cv = GvCVu(gv)))
	5775	{
	5776	if (GvIMPORTED_CV(gv))
	5777	ogv = gv;
	5778	else if (! CvNOWARN_AMBIGUOUS(cv))
	5779	hgv = gv;
	5780	}
	5781	if (!ogv
	5782	&& (pgvp = (GV*)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE))
	5783	&& (gv = **pgvp)
	5784	&& (isGV_with_GP(gv)
	5785	? GvCVu(gv) && GvIMPORTED_CV(gv)
	5786	: SvPCS_IMPORTED(gv)
	5787	&& (gv_init(gv, PL_globalstash, PL_tokenbuf,
	5788	len, 0), 1)))
	5789	{
	5790	ogv = gv;
	5791	}
	5792	}
	5793
	5794	*pgv = gv;
	5795
	5796	if (ogv) {
	5797	*orig_keyword = key;
	5798	return 0; /* overridden by import or by GLOBAL */
	5799	}
	5800	else if (gv && !*pgvp
	5801	&& -key==KEY_lock /* XXX generalizable kludge */
	5802	&& GvCVu(gv))
	5803	{
	5804	return 0; /* any sub overrides "weak" keyword */
	5805	}
	5806	else { /* no override */
	5807	key = -key;
	5808	if (key == KEY_dump) {
	5809	Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
	5810	}
	5811	*pgv = NULL;
	5812	*pgvp = 0;
	5813	if (hgv && key != KEY_x) /* never ambiguous */
	5814	Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
	5815	"Ambiguous call resolved as CORE::%s(), "
	5816	"qualify as such or use &",
	5817	GvENAME(hgv));
	5818	return key;
	5819	}
	5820	}
	5821
	5822	static int
	5823	yyl_qw(pTHX_ char *s, STRLEN len)
	5824	{
	5825	OP *words = NULL;
	5826
	5827	s = scan_str(s,FALSE,FALSE,FALSE,NULL);
	5828	if (!s)
	5829	missingterm(NULL, 0);
	5830
	5831	COPLINE_SET_FROM_MULTI_END;
	5832	PL_expect = XOPERATOR;
	5833	if (SvCUR(PL_lex_stuff)) {
	5834	int warned_comma = !ckWARN(WARN_QW);
	5835	int warned_comment = warned_comma;
	5836	char *d = SvPV_force(PL_lex_stuff, len);
	5837	while (len) {
	5838	for (; isSPACE(*d) && len; --len, ++d)
	5839	/**/;
	5840	if (len) {
	5841	SV *sv;
	5842	const char *b = d;
	5843	if (!warned_comma \|\| !warned_comment) {
	5844	for (; !isSPACE(*d) && len; --len, ++d) {
	5845	if (!warned_comma && *d == ',') {
	5846	Perl_warner(aTHX_ packWARN(WARN_QW),
	5847	"Possible attempt to separate words with commas");
	5848	++warned_comma;
	5849	}
	5850	else if (!warned_comment && *d == '#') {
	5851	Perl_warner(aTHX_ packWARN(WARN_QW),
	5852	"Possible attempt to put comments in qw() list");
	5853	++warned_comment;
	5854	}
	5855	}
	5856	}
	5857	else {
	5858	for (; !isSPACE(*d) && len; --len, ++d)
	5859	/**/;
	5860	}
	5861	sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
	5862	words = op_append_elem(OP_LIST, words,
	5863	newSVOP(OP_CONST, 0, tokeq(sv)));
	5864	}
	5865	}
	5866	}
	5867	if (!words)
	5868	words = newNULLLIST();
	5869	SvREFCNT_dec_NN(PL_lex_stuff);
	5870	PL_lex_stuff = NULL;
	5871	PL_expect = XOPERATOR;
	5872	pl_yylval.opval = sawparens(words);
	5873	TOKEN(QWLIST);
	5874	}
	5875
	5876	static int
	5877	yyl_hyphen(pTHX_ char *s)
	5878	{
	5879	if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
	5880	I32 ftst = 0;
	5881	char tmp;
	5882
	5883	s++;
	5884	PL_bufptr = s;
	5885	tmp = *s++;
	5886
	5887	while (s < PL_bufend && SPACE_OR_TAB(*s))
	5888	s++;
	5889
	5890	if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
	5891	s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
	5892	DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
	5893	OPERATOR(PERLY_MINUS); /* unary minus */
	5894	}
	5895	switch (tmp) {
	5896	case 'r': ftst = OP_FTEREAD; break;
	5897	case 'w': ftst = OP_FTEWRITE; break;
	5898	case 'x': ftst = OP_FTEEXEC; break;
	5899	case 'o': ftst = OP_FTEOWNED; break;
	5900	case 'R': ftst = OP_FTRREAD; break;
	5901	case 'W': ftst = OP_FTRWRITE; break;
	5902	case 'X': ftst = OP_FTREXEC; break;
	5903	case 'O': ftst = OP_FTROWNED; break;
	5904	case 'e': ftst = OP_FTIS; break;
	5905	case 'z': ftst = OP_FTZERO; break;
	5906	case 's': ftst = OP_FTSIZE; break;
	5907	case 'f': ftst = OP_FTFILE; break;
	5908	case 'd': ftst = OP_FTDIR; break;
	5909	case 'l': ftst = OP_FTLINK; break;
	5910	case 'p': ftst = OP_FTPIPE; break;
	5911	case 'S': ftst = OP_FTSOCK; break;
	5912	case 'u': ftst = OP_FTSUID; break;
	5913	case 'g': ftst = OP_FTSGID; break;
	5914	case 'k': ftst = OP_FTSVTX; break;
	5915	case 'b': ftst = OP_FTBLK; break;
	5916	case 'c': ftst = OP_FTCHR; break;
	5917	case 't': ftst = OP_FTTTY; break;
	5918	case 'T': ftst = OP_FTTEXT; break;
	5919	case 'B': ftst = OP_FTBINARY; break;
	5920	case 'M': case 'A': case 'C':
	5921	gv_fetchpvs("\024", GV_ADD\|GV_NOTQUAL, SVt_PV);
	5922	switch (tmp) {
	5923	case 'M': ftst = OP_FTMTIME; break;
	5924	case 'A': ftst = OP_FTATIME; break;
	5925	case 'C': ftst = OP_FTCTIME; break;
	5926	default: break;
	5927	}
	5928	break;
	5929	default:
	5930	break;
	5931	}
	5932	if (ftst) {
	5933	PL_last_uni = PL_oldbufptr;
	5934	PL_last_lop_op = (OPCODE)ftst;
	5935	DEBUG_T( {
	5936	PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp);
	5937	} );
	5938	FTST(ftst);
	5939	}
	5940	else {
	5941	/* Assume it was a minus followed by a one-letter named
	5942	* subroutine call (or a -bareword), then. */
	5943	DEBUG_T( {
	5944	PerlIO_printf(Perl_debug_log,
	5945	"### '-%c' looked like a file test but was not\n",
	5946	(int) tmp);
	5947	} );
	5948	s = --PL_bufptr;
	5949	}
	5950	}
	5951	{
	5952	const char tmp = *s++;
	5953	if (*s == tmp) {
	5954	s++;
	5955	if (PL_expect == XOPERATOR)
	5956	TERM(POSTDEC);
	5957	else
	5958	OPERATOR(PREDEC);
	5959	}
	5960	else if (*s == '>') {
	5961	s++;
	5962	s = skipspace(s);
	5963	if (((s == '$' \|\| s == '&') && s[1] == '*')
	5964	\|\|(s == '$' && s[1] == '#' && s[2] == '')
	5965	\|\|((s == '@' \|\| s == '%') && memCHRs("*[{", s[1]))
	5966	\|\|(s == '' && (s[1] == '*' \|\| s[1] == '{'))
	5967	)
	5968	{
	5969	PL_expect = XPOSTDEREF;
	5970	TOKEN(ARROW);
	5971	}
	5972	if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
	5973	s = force_word(s,METHCALL0,FALSE,TRUE);
	5974	TOKEN(ARROW);
	5975	}
	5976	else if (*s == '$')
	5977	OPERATOR(ARROW);
	5978	else
	5979	TERM(ARROW);
	5980	}
	5981	if (PL_expect == XOPERATOR) {
	5982	if (*s == '='
	5983	&& !PL_lex_allbrackets
	5984	&& PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
	5985	{
	5986	s--;
	5987	TOKEN(0);
	5988	}
	5989	Aop(OP_SUBTRACT);
	5990	}
	5991	else {
	5992	if (isSPACE(s) \|\| !isSPACE(PL_bufptr))
	5993	check_uni();
	5994	OPERATOR(PERLY_MINUS); /* unary minus */
	5995	}
	5996	}
	5997	}
	5998
	5999	static int
	6000	yyl_plus(pTHX_ char *s)
	6001	{
	6002	const char tmp = *s++;
	6003	if (*s == tmp) {
	6004	s++;
	6005	if (PL_expect == XOPERATOR)
	6006	TERM(POSTINC);
	6007	else
	6008	OPERATOR(PREINC);
	6009	}
	6010	if (PL_expect == XOPERATOR) {
	6011	if (*s == '='
	6012	&& !PL_lex_allbrackets
	6013	&& PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
	6014	{
	6015	s--;
	6016	TOKEN(0);
	6017	}
	6018	Aop(OP_ADD);
	6019	}
	6020	else {
	6021	if (isSPACE(s) \|\| !isSPACE(PL_bufptr))
	6022	check_uni();
	6023	OPERATOR(PERLY_PLUS);
	6024	}
	6025	}
	6026
	6027	static int
	6028	yyl_star(pTHX_ char *s)
	6029	{
	6030	if (PL_expect == XPOSTDEREF)
	6031	POSTDEREF(PERLY_STAR);
	6032
	6033	if (PL_expect != XOPERATOR) {
	6034	s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
	6035	PL_expect = XOPERATOR;
	6036	force_ident(PL_tokenbuf, PERLY_STAR);
	6037	if (!*PL_tokenbuf)
	6038	PREREF(PERLY_STAR);
	6039	TERM(PERLY_STAR);
	6040	}
	6041
	6042	s++;
	6043	if (s == '') {
	6044	s++;
	6045	if (*s == '=' && !PL_lex_allbrackets
	6046	&& PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
	6047	{
	6048	s -= 2;
	6049	TOKEN(0);
	6050	}
	6051	PWop(OP_POW);
	6052	}
	6053
	6054	if (*s == '='
	6055	&& !PL_lex_allbrackets
	6056	&& PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
	6057	{
	6058	s--;
	6059	TOKEN(0);
	6060	}
	6061
	6062	Mop(OP_MULTIPLY);
	6063	}
	6064
	6065	static int
	6066	yyl_percent(pTHX_ char *s)
	6067	{
	6068	if (PL_expect == XOPERATOR) {
	6069	if (s[1] == '='
	6070	&& !PL_lex_allbrackets
	6071	&& PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
	6072	{
	6073	TOKEN(0);
	6074	}
	6075	++s;
	6076	Mop(OP_MODULO);
	6077	}
	6078	else if (PL_expect == XPOSTDEREF)
	6079	POSTDEREF(PERLY_PERCENT_SIGN);
	6080
	6081	PL_tokenbuf[0] = '%';
	6082	s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
	6083	pl_yylval.ival = 0;
	6084	if (!PL_tokenbuf[1]) {
	6085	PREREF(PERLY_PERCENT_SIGN);
	6086	}
	6087	if ( (PL_expect != XREF \|\| PL_oldoldbufptr == PL_last_lop)
	6088	&& intuit_more(s, PL_bufend)) {
	6089	if (*s == '[')
	6090	PL_tokenbuf[0] = '@';
	6091	}
	6092	PL_expect = XOPERATOR;
	6093	force_ident_maybe_lex('%');
	6094	TERM(PERLY_PERCENT_SIGN);
	6095	}
	6096
	6097	static int
	6098	yyl_caret(pTHX_ char *s)
	6099	{
	6100	char *d = s;
	6101	const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED);
	6102	if (bof && s[1] == '.')
	6103	s++;
	6104	if (!PL_lex_allbrackets && PL_lex_fakeeof >=
	6105	(s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
	6106	{
	6107	s = d;
	6108	TOKEN(0);
	6109	}
	6110	s++;
	6111	BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
	6112	}
	6113
	6114	static int
	6115	yyl_colon(pTHX_ char *s)
	6116	{
	6117	OP *attrs;
	6118
	6119	switch (PL_expect) {
	6120	case XOPERATOR:
	6121	if (!PL_in_my \|\| (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
	6122	break;
	6123	PL_bufptr = s; /* update in case we back off */
	6124	if (*s == '=') {
	6125	Perl_croak(aTHX_
	6126	"Use of := for an empty attribute list is not allowed");
	6127	}
	6128	goto grabattrs;
	6129	case XATTRBLOCK:
	6130	PL_expect = XBLOCK;
	6131	goto grabattrs;
	6132	case XATTRTERM:
	6133	PL_expect = XTERMBLOCK;
	6134	grabattrs:
	6135	/* NB: as well as parsing normal attributes, we also end up
	6136	* here if there is something looking like attributes
	6137	* following a signature (which is illegal, but used to be
	6138	* legal in 5.20..5.26). If the latter, we still parse the
	6139	* attributes so that error messages(s) are less confusing,
	6140	* but ignore them (parser->sig_seen).
	6141	*/
	6142	s = skipspace(s);
	6143	attrs = NULL;
	6144	while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
	6145	I32 tmp;
	6146	SV *sv;
	6147	STRLEN len;
	6148	char *d = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
	6149	if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
	6150	if (tmp < 0) tmp = -tmp;
	6151	switch (tmp) {
	6152	case KEY_or:
	6153	case KEY_and:
	6154	case KEY_for:
	6155	case KEY_foreach:
	6156	case KEY_unless:
	6157	case KEY_if:
	6158	case KEY_while:
	6159	case KEY_until:
	6160	goto got_attrs;
	6161	default:
	6162	break;
	6163	}
	6164	}
	6165	sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
	6166	if (*d == '(') {
	6167	d = scan_str(d,TRUE,TRUE,FALSE,NULL);
	6168	if (!d) {
	6169	op_free(attrs);
	6170	ASSUME(sv && SvREFCNT(sv) == 1);
	6171	SvREFCNT_dec(sv);
	6172	Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
	6173	}
	6174	COPLINE_SET_FROM_MULTI_END;
	6175	}
	6176	if (PL_lex_stuff) {
	6177	sv_catsv(sv, PL_lex_stuff);
	6178	attrs = op_append_elem(OP_LIST, attrs,
	6179	newSVOP(OP_CONST, 0, sv));
	6180	SvREFCNT_dec_NN(PL_lex_stuff);
	6181	PL_lex_stuff = NULL;
	6182	}
	6183	else {
	6184	attrs = op_append_elem(OP_LIST, attrs,
	6185	newSVOP(OP_CONST, 0, sv));
	6186	}
	6187	s = skipspace(d);
	6188	if (*s == ':' && s[1] != ':')
	6189	s = skipspace(s+1);
	6190	else if (s == d)
	6191	break; /* require real whitespace or :'s */
	6192	/* XXX losing whitespace on sequential attributes here */
	6193	}
	6194
	6195	if (*s != ';'
	6196	&& *s != '}'
	6197	&& !(PL_expect == XOPERATOR
	6198	/* if an operator is expected, permit =, //= and \|\|= or ) to end */
	6199	? (s == '=' \|\| s == ')' \|\| s == '/' \|\| s == '\|')
	6200	: (s == '{' \|\| s == '(')))
	6201	{
	6202	const char q = ((*s == '\'') ? '"' : '\'');
	6203	/* If here for an expression, and parsed no attrs, back off. */
	6204	if (PL_expect == XOPERATOR && !attrs) {
	6205	s = PL_bufptr;
	6206	break;
	6207	}
	6208	/* MUST advance bufptr here to avoid bogus "at end of line"
	6209	context messages from yyerror().
	6210	*/
	6211	PL_bufptr = s;
	6212	yyerror( (const char *)
	6213	(*s
	6214	? Perl_form(aTHX_ "Invalid separator character "
	6215	"%c%c%c in attribute list", q, *s, q)
	6216	: "Unterminated attribute list" ) );
	6217	op_free(attrs);
	6218	OPERATOR(PERLY_COLON);
	6219	}
	6220
	6221	got_attrs:
	6222	if (PL_parser->sig_seen) {
	6223	/* see comment about about sig_seen and parser error
	6224	* handling */
	6225	op_free(attrs);
	6226	Perl_croak(aTHX_ "Subroutine attributes must come "
	6227	"before the signature");
	6228	}
	6229	if (attrs) {
	6230	NEXTVAL_NEXTTOKE.opval = attrs;
	6231	force_next(THING);
	6232	}
	6233	TOKEN(COLONATTR);
	6234	}
	6235
	6236	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
	6237	s--;
	6238	TOKEN(0);
	6239	}
	6240
	6241	PL_lex_allbrackets--;
	6242	OPERATOR(PERLY_COLON);
	6243	}
	6244
	6245	static int
	6246	yyl_subproto(pTHX_ char s, CV cv)
	6247	{
	6248	STRLEN protolen = CvPROTOLEN(cv);
	6249	const char *proto = CvPROTO(cv);
	6250	bool optional;
	6251
	6252	proto = S_strip_spaces(aTHX_ proto, &protolen);
	6253	if (!protolen)
	6254	TERM(FUNC0SUB);
	6255	if ((optional = *proto == ';')) {
	6256	do {
	6257	proto++;
	6258	} while (*proto == ';');
	6259	}
	6260
	6261	if (
	6262	(
	6263	(
	6264	proto == '$' \|\| proto == '_'
	6265	\|\| proto == '' \|\| *proto == '+'
	6266	)
	6267	&& proto[1] == '\0'
	6268	)
	6269	\|\| (
	6270	*proto == '\\' && proto[1] && proto[2] == '\0'
	6271	)
	6272	) {
	6273	UNIPROTO(UNIOPSUB,optional);
	6274	}
	6275
	6276	if (*proto == '\\' && proto[1] == '[') {
	6277	const char *p = proto + 2;
	6278	while(p && p != ']')
	6279	++p;
	6280	if(*p == ']' && !p[1])
	6281	UNIPROTO(UNIOPSUB,optional);
	6282	}
	6283
	6284	if (proto == '&' && s == '{') {
	6285	if (PL_curstash)
	6286	sv_setpvs(PL_subname, "__ANON__");
	6287	else
	6288	sv_setpvs(PL_subname, "__ANON__::__ANON__");
	6289	if (!PL_lex_allbrackets
	6290	&& PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
	6291	{
	6292	PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
	6293	}
	6294	PREBLOCK(LSTOPSUB);
	6295	}
	6296
	6297	return KEY_NULL;
	6298	}
	6299
	6300	static int
	6301	yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
	6302	{
	6303	char *d;
	6304	if (PL_lex_brackets > 100) {
	6305	Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
	6306	}
	6307
	6308	switch (PL_expect) {
	6309	case XTERM:
	6310	case XTERMORDORDOR:
	6311	PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
	6312	PL_lex_allbrackets++;
	6313	OPERATOR(HASHBRACK);
	6314	case XOPERATOR:
	6315	while (s < PL_bufend && SPACE_OR_TAB(*s))
	6316	s++;
	6317	d = s;
	6318	PL_tokenbuf[0] = '\0';
	6319	if (d < PL_bufend && *d == '-') {
	6320	PL_tokenbuf[0] = '-';
	6321	d++;
	6322	while (d < PL_bufend && SPACE_OR_TAB(*d))
	6323	d++;
	6324	}
	6325	if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
	6326	STRLEN len;
	6327	d = scan_word6(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
	6328	FALSE, &len, FALSE);
	6329	while (d < PL_bufend && SPACE_OR_TAB(*d))
	6330	d++;
	6331	if (*d == '}') {
	6332	const char minus = (PL_tokenbuf[0] == '-');
	6333	s = force_word(s + minus, BAREWORD, FALSE, TRUE);
	6334	if (minus)
	6335	force_next(PERLY_MINUS);
	6336	}
	6337	}
	6338	/* FALLTHROUGH */
	6339	case XATTRTERM:
	6340	case XTERMBLOCK:
	6341	PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
	6342	PL_lex_allbrackets++;
	6343	PL_expect = XSTATE;
	6344	break;
	6345	case XATTRBLOCK:
	6346	case XBLOCK:
	6347	PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
	6348	PL_lex_allbrackets++;
	6349	PL_expect = XSTATE;
	6350	break;
	6351	case XBLOCKTERM:
	6352	PL_lex_brackstack[PL_lex_brackets++] = XTERM;
	6353	PL_lex_allbrackets++;
	6354	PL_expect = XSTATE;
	6355	break;
	6356	default: {
	6357	const char *t;
	6358	if (PL_oldoldbufptr == PL_last_lop)
	6359	PL_lex_brackstack[PL_lex_brackets++] = XTERM;
	6360	else
	6361	PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
	6362	PL_lex_allbrackets++;
	6363	s = skipspace(s);
	6364	if (*s == '}') {
	6365	if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
	6366	PL_expect = XTERM;
	6367	/* This hack is to get the ${} in the message. */
	6368	PL_bufptr = s+1;
	6369	yyerror("syntax error");
	6370	yyquit();
	6371	break;
	6372	}
	6373	OPERATOR(HASHBRACK);
	6374	}
	6375	if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
	6376	/* ${...} or @{...} etc., but not print {...}
	6377	* Skip the disambiguation and treat this as a block.
	6378	*/
	6379	goto block_expectation;
	6380	}
	6381	/* This hack serves to disambiguate a pair of curlies
	6382	* as being a block or an anon hash. Normally, expectation
	6383	* determines that, but in cases where we're not in a
	6384	* position to expect anything in particular (like inside
	6385	* eval"") we have to resolve the ambiguity. This code
	6386	* covers the case where the first term in the curlies is a
	6387	* quoted string. Most other cases need to be explicitly
	6388	* disambiguated by prepending a "+" before the opening
	6389	* curly in order to force resolution as an anon hash.
	6390	*
	6391	* XXX should probably propagate the outer expectation
	6392	* into eval"" to rely less on this hack, but that could
	6393	* potentially break current behavior of eval"".
	6394	* GSAR 97-07-21
	6395	*/
	6396	t = s;
	6397	if (s == '\'' \|\| s == '"' \|\| *s == '`') {
	6398	/* common case: get past first string, handling escapes */
	6399	for (t++; t < PL_bufend && t != s;)
	6400	if (*t++ == '\\')
	6401	t++;
	6402	t++;
	6403	}
	6404	else if (*s == 'q') {
	6405	if (++t < PL_bufend
	6406	&& (!isWORDCHAR(*t)
	6407	\|\| ((t == 'q' \|\| t == 'x') && ++t < PL_bufend
	6408	&& !isWORDCHAR(*t))))
	6409	{
	6410	/* skip q//-like construct */
	6411	const char *tmps;
	6412	char open, close, term;
	6413	I32 brackets = 1;
	6414
	6415	while (t < PL_bufend && isSPACE(*t))
	6416	t++;
	6417	/* check for q => */
	6418	if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
	6419	OPERATOR(HASHBRACK);
	6420	}
	6421	term = *t;
	6422	open = term;
	6423	if (term && (tmps = memCHRs("([{< )]}> )]}>",term)))
	6424	term = tmps[5];
	6425	close = term;
	6426	if (open == close)
	6427	for (t++; t < PL_bufend; t++) {
	6428	if (*t == '\\' && t+1 < PL_bufend && open != '\\')
	6429	t++;
	6430	else if (*t == open)
	6431	break;
	6432	}
	6433	else {
	6434	for (t++; t < PL_bufend; t++) {
	6435	if (*t == '\\' && t+1 < PL_bufend)
	6436	t++;
	6437	else if (*t == close && --brackets <= 0)
	6438	break;
	6439	else if (*t == open)
	6440	brackets++;
	6441	}
	6442	}
	6443	t++;
	6444	}
	6445	else
	6446	/* skip plain q word */
	6447	while ( t < PL_bufend
	6448	&& isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
	6449	{
	6450	t += UTF ? UTF8SKIP(t) : 1;
	6451	}
	6452	}
	6453	else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
	6454	t += UTF ? UTF8SKIP(t) : 1;
	6455	while ( t < PL_bufend
	6456	&& isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
	6457	{
	6458	t += UTF ? UTF8SKIP(t) : 1;
	6459	}
	6460	}
	6461	while (t < PL_bufend && isSPACE(*t))
	6462	t++;
	6463	/* if comma follows first term, call it an anon hash */
	6464	/* XXX it could be a comma expression with loop modifiers */
	6465	if (t < PL_bufend && ((t == ',' && (s == 'q' \|\| !isLOWER(*s)))
	6466	\|\| (*t == '=' && t[1] == '>')))
	6467	OPERATOR(HASHBRACK);
	6468	if (PL_expect == XREF) {
	6469	block_expectation:
	6470	/* If there is an opening brace or 'sub:', treat it
	6471	as a term to make ${{...}}{k} and &{sub:attr...}
	6472	dwim. Otherwise, treat it as a statement, so
	6473	map {no strict; ...} works.
	6474	*/
	6475	s = skipspace(s);
	6476	if (*s == '{') {
	6477	PL_expect = XTERM;
	6478	break;
	6479	}
	6480	if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
	6481	PL_bufptr = s;
	6482	d = s + 3;
	6483	d = skipspace(d);
	6484	s = PL_bufptr;
	6485	if (*d == ':') {
	6486	PL_expect = XTERM;
	6487	break;
	6488	}
	6489	}
	6490	PL_expect = XSTATE;
	6491	}
	6492	else {
	6493	PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
	6494	PL_expect = XSTATE;
	6495	}
	6496	}
	6497	break;
	6498	}
	6499
	6500	pl_yylval.ival = CopLINE(PL_curcop);
	6501	PL_copline = NOLINE; /* invalidate current command line number */
	6502	TOKEN(formbrack ? PERLY_EQUAL_SIGN : PERLY_BRACE_OPEN);
	6503	}
	6504
	6505	static int
	6506	yyl_rightcurly(pTHX_ char *s, const U8 formbrack)
	6507	{
	6508	assert(s != PL_bufend);
	6509	s++;
	6510
	6511	if (PL_lex_brackets <= 0)
	6512	/* diag_listed_as: Unmatched right %s bracket */
	6513	yyerror("Unmatched right curly bracket");
	6514	else
	6515	PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
	6516
	6517	PL_lex_allbrackets--;
	6518
	6519	if (PL_lex_state == LEX_INTERPNORMAL) {
	6520	if (PL_lex_brackets == 0) {
	6521	if (PL_expect & XFAKEBRACK) {
	6522	PL_expect &= XENUMMASK;
	6523	PL_lex_state = LEX_INTERPEND;
	6524	PL_bufptr = s;
	6525	return yylex(); /* ignore fake brackets */
	6526	}
	6527	if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
	6528	&& SvEVALED(PL_lex_repl))
	6529	PL_lex_state = LEX_INTERPEND;
	6530	else if (*s == '-' && s[1] == '>')
	6531	PL_lex_state = LEX_INTERPENDMAYBE;
	6532	else if (s != '[' && s != '{')
	6533	PL_lex_state = LEX_INTERPEND;
	6534	}
	6535	}
	6536
	6537	if (PL_expect & XFAKEBRACK) {
	6538	PL_expect &= XENUMMASK;
	6539	PL_bufptr = s;
	6540	return yylex(); /* ignore fake brackets */
	6541	}
	6542
	6543	force_next(formbrack ? PERLY_DOT : PERLY_BRACE_CLOSE);
	6544	if (formbrack) LEAVE_with_name("lex_format");
	6545	if (formbrack == 2) { /* means . where arguments were expected */
	6546	force_next(PERLY_SEMICOLON);
	6547	TOKEN(FORMRBRACK);
	6548	}
	6549
	6550	TOKEN(PERLY_SEMICOLON);
	6551	}
	6552
	6553	static int
	6554	yyl_ampersand(pTHX_ char *s)
	6555	{
	6556	if (PL_expect == XPOSTDEREF)
	6557	POSTDEREF(PERLY_AMPERSAND);
	6558
	6559	s++;
	6560	if (*s++ == '&') {
	6561	if (!PL_lex_allbrackets && PL_lex_fakeeof >=
	6562	(*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
	6563	s -= 2;
	6564	TOKEN(0);
	6565	}
	6566	AOPERATOR(ANDAND);
	6567	}
	6568	s--;
	6569
	6570	if (PL_expect == XOPERATOR) {
	6571	char *d;
	6572	bool bof;
	6573	if ( PL_bufptr == PL_linestart
	6574	&& ckWARN(WARN_SEMICOLON)
	6575	&& isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
	6576	{
	6577	CopLINE_dec(PL_curcop);
	6578	Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
	6579	CopLINE_inc(PL_curcop);
	6580	}
	6581	d = s;
	6582	if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
	6583	s++;
	6584	if (!PL_lex_allbrackets && PL_lex_fakeeof >=
	6585	(*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
	6586	s = d;
	6587	s--;
	6588	TOKEN(0);
	6589	}
	6590	if (d == s)
	6591	BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
	6592	else
	6593	BAop(OP_SBIT_AND);
	6594	}
	6595
	6596	PL_tokenbuf[0] = '&';
	6597	s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
	6598	pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
	6599
	6600	if (PL_tokenbuf[1])
	6601	force_ident_maybe_lex('&');
	6602	else
	6603	PREREF(PERLY_AMPERSAND);
	6604
	6605	TERM(PERLY_AMPERSAND);
	6606	}
	6607
	6608	static int
	6609	yyl_verticalbar(pTHX_ char *s)
	6610	{
	6611	char *d;
	6612	bool bof;
	6613
	6614	s++;
	6615	if (*s++ == '\|') {
	6616	if (!PL_lex_allbrackets && PL_lex_fakeeof >=
	6617	(*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
	6618	s -= 2;
	6619	TOKEN(0);
	6620	}
	6621	AOPERATOR(OROR);
	6622	}
	6623
	6624	s--;
	6625	d = s;
	6626	if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
	6627	s++;
	6628
	6629	if (!PL_lex_allbrackets && PL_lex_fakeeof >=
	6630	(*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
	6631	s = d - 1;
	6632	TOKEN(0);
	6633	}
	6634
	6635	BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
	6636	}
	6637
	6638	static int
	6639	yyl_bang(pTHX_ char *s)
	6640	{
	6641	const char tmp = *s++;
	6642	if (tmp == '=') {
	6643	/* was this !=~ where !~ was meant?
	6644	* warn on m:!=~\s+([/?]\|[msy]\W\|tr\W): */
	6645
	6646	if (*s == '~' && ckWARN(WARN_SYNTAX)) {
	6647	const char *t = s+1;
	6648
	6649	while (t < PL_bufend && isSPACE(*t))
	6650	++t;
	6651
	6652	if (t == '/' \|\| t == '?'
	6653	\|\| ((t == 'm' \|\| t == 's' \|\| *t == 'y')
	6654	&& !isWORDCHAR(t[1]))
	6655	\|\| (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
	6656	Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
	6657	"!=~ should be !~");
	6658	}
	6659
	6660	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
	6661	s -= 2;
	6662	TOKEN(0);
	6663	}
	6664
	6665	ChEop(OP_NE);
	6666	}
	6667
	6668	if (tmp == '~')
	6669	PMop(OP_NOT);
	6670
	6671	s--;
	6672	OPERATOR(PERLY_EXCLAMATION_MARK);
	6673	}
	6674
	6675	static int
	6676	yyl_snail(pTHX_ char *s)
	6677	{
	6678	if (PL_expect == XPOSTDEREF)
	6679	POSTDEREF(PERLY_SNAIL);
	6680	PL_tokenbuf[0] = '@';
	6681	s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
	6682	if (PL_expect == XOPERATOR) {
	6683	char *d = s;
	6684	if (PL_bufptr > s) {
	6685	d = PL_bufptr-1;
	6686	PL_bufptr = PL_oldbufptr;
	6687	}
	6688	no_op("Array", d);
	6689	}
	6690	pl_yylval.ival = 0;
	6691	if (!PL_tokenbuf[1]) {
	6692	PREREF(PERLY_SNAIL);
	6693	}
	6694	if (PL_lex_state == LEX_NORMAL \|\| PL_lex_brackets)
	6695	s = skipspace(s);
	6696	if ( (PL_expect != XREF \|\| PL_oldoldbufptr == PL_last_lop)
	6697	&& intuit_more(s, PL_bufend))
	6698	{
	6699	if (*s == '{')
	6700	PL_tokenbuf[0] = '%';
	6701
	6702	/* Warn about @ where they meant $. */
	6703	if (s == '[' \|\| s == '{') {
	6704	if (ckWARN(WARN_SYNTAX)) {
	6705	S_check_scalar_slice(aTHX_ s);
	6706	}
	6707	}
	6708	}
	6709	PL_expect = XOPERATOR;
	6710	force_ident_maybe_lex('@');
	6711	TERM(PERLY_SNAIL);
	6712	}
	6713
	6714	static int
	6715	yyl_slash(pTHX_ char *s)
	6716	{
	6717	if ((PL_expect == XOPERATOR \|\| PL_expect == XTERMORDORDOR) && s[1] == '/') {
	6718	if (!PL_lex_allbrackets && PL_lex_fakeeof >=
	6719	(s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
	6720	TOKEN(0);
	6721	s += 2;
	6722	AOPERATOR(DORDOR);
	6723	}
	6724	else if (PL_expect == XOPERATOR) {
	6725	s++;
	6726	if (*s == '=' && !PL_lex_allbrackets
	6727	&& PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
	6728	{
	6729	s--;
	6730	TOKEN(0);
	6731	}
	6732	Mop(OP_DIVIDE);
	6733	}
	6734	else {
	6735	/* Disable warning on "study /blah/" */
	6736	if ( PL_oldoldbufptr == PL_last_uni
	6737	&& ( *PL_last_uni != 's' \|\| s - PL_last_uni < 5
	6738	\|\| memNE(PL_last_uni, "study", 5)
	6739	\|\| isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
	6740	))
	6741	check_uni();
	6742	s = scan_pat(s,OP_MATCH);
	6743	TERM(sublex_start());
	6744	}
	6745	}
	6746
	6747	static int
	6748	yyl_leftsquare(pTHX_ char *s)
	6749	{
	6750	if (PL_lex_brackets > 100)
	6751	Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
	6752	PL_lex_brackstack[PL_lex_brackets++] = 0;
	6753	PL_lex_allbrackets++;
	6754	s++;
	6755	OPERATOR(PERLY_BRACKET_OPEN);
	6756	}
	6757
	6758	static int
	6759	yyl_rightsquare(pTHX_ char *s)
	6760	{
	6761	if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
	6762	TOKEN(0);
	6763	s++;
	6764	if (PL_lex_brackets <= 0)
	6765	/* diag_listed_as: Unmatched right %s bracket */
	6766	yyerror("Unmatched right square bracket");
	6767	else
	6768	--PL_lex_brackets;
	6769	PL_lex_allbrackets--;
	6770	if (PL_lex_state == LEX_INTERPNORMAL) {
	6771	if (PL_lex_brackets == 0) {
	6772	if (*s == '-' && s[1] == '>')
	6773	PL_lex_state = LEX_INTERPENDMAYBE;
	6774	else if (s != '[' && s != '{')
	6775	PL_lex_state = LEX_INTERPEND;
	6776	}
	6777	}
	6778	TERM(PERLY_BRACKET_CLOSE);
	6779	}
	6780
	6781	static int
	6782	yyl_tilde(pTHX_ char *s)
	6783	{
	6784	bool bof;
	6785	if (s[1] == '~' && (PL_expect == XOPERATOR \|\| PL_expect == XTERMORDORDOR)) {
	6786	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
	6787	TOKEN(0);
	6788	s += 2;
	6789	Perl_ck_warner_d(aTHX_
	6790	packWARN(WARN_DEPRECATED__SMARTMATCH),
	6791	"Smartmatch is deprecated");
	6792	NCEop(OP_SMARTMATCH);
	6793	}
	6794	s++;
	6795	if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
	6796	s++;
	6797	BCop(OP_SCOMPLEMENT);
	6798	}
	6799	BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
	6800	}
	6801
	6802	static int
	6803	yyl_leftparen(pTHX_ char *s)
	6804	{
	6805	if (PL_last_lop == PL_oldoldbufptr \|\| PL_last_uni == PL_oldoldbufptr)
	6806	PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
	6807	else
	6808	PL_expect = XTERM;
	6809	s = skipspace(s);
	6810	PL_lex_allbrackets++;
	6811	TOKEN(PERLY_PAREN_OPEN);
	6812	}
	6813
	6814	static int
	6815	yyl_rightparen(pTHX_ char *s)
	6816	{
	6817	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
	6818	TOKEN(0);
	6819	s++;
	6820	PL_lex_allbrackets--;
	6821	s = skipspace(s);
	6822	if (*s == '{')
	6823	PREBLOCK(PERLY_PAREN_CLOSE);
	6824	TERM(PERLY_PAREN_CLOSE);
	6825	}
	6826
	6827	static int
	6828	yyl_leftpointy(pTHX_ char *s)
	6829	{
	6830	char tmp;
	6831
	6832	if (PL_expect != XOPERATOR) {
	6833	if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
	6834	check_uni();
	6835	if (s[1] == '<' && s[2] != '>')
	6836	s = scan_heredoc(s);
	6837	else
	6838	s = scan_inputsymbol(s);
	6839	PL_expect = XOPERATOR;
	6840	TOKEN(sublex_start());
	6841	}
	6842
	6843	s++;
	6844
	6845	tmp = *s++;
	6846	if (tmp == '<') {
	6847	if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
	6848	s -= 2;
	6849	TOKEN(0);
	6850	}
	6851	SHop(OP_LEFT_SHIFT);
	6852	}
	6853	if (tmp == '=') {
	6854	tmp = *s++;
	6855	if (tmp == '>') {
	6856	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
	6857	s -= 3;
	6858	TOKEN(0);
	6859	}
	6860	NCEop(OP_NCMP);
	6861	}
	6862	s--;
	6863	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
	6864	s -= 2;
	6865	TOKEN(0);
	6866	}
	6867	ChRop(OP_LE);
	6868	}
	6869
	6870	s--;
	6871	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
	6872	s--;
	6873	TOKEN(0);
	6874	}
	6875
	6876	ChRop(OP_LT);
	6877	}
	6878
	6879	static int
	6880	yyl_rightpointy(pTHX_ char *s)
	6881	{
	6882	const char tmp = *s++;
	6883
	6884	if (tmp == '>') {
	6885	if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
	6886	s -= 2;
	6887	TOKEN(0);
	6888	}
	6889	SHop(OP_RIGHT_SHIFT);
	6890	}
	6891	else if (tmp == '=') {
	6892	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
	6893	s -= 2;
	6894	TOKEN(0);
	6895	}
	6896	ChRop(OP_GE);
	6897	}
	6898
	6899	s--;
	6900	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
	6901	s--;
	6902	TOKEN(0);
	6903	}
	6904
	6905	ChRop(OP_GT);
	6906	}
	6907
	6908	static int
	6909	yyl_sglquote(pTHX_ char *s)
	6910	{
	6911	s = scan_str(s,FALSE,FALSE,FALSE,NULL);
	6912	if (!s)
	6913	missingterm(NULL, 0);
	6914	COPLINE_SET_FROM_MULTI_END;
	6915	DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
	6916	if (PL_expect == XOPERATOR) {
	6917	no_op("String",s);
	6918	}
	6919	pl_yylval.ival = OP_CONST;
	6920	TERM(sublex_start());
	6921	}
	6922
	6923	static int
	6924	yyl_dblquote(pTHX_ char *s)
	6925	{
	6926	char *d;
	6927	STRLEN len;
	6928	s = scan_str(s,FALSE,FALSE,FALSE,NULL);
	6929	DEBUG_T( {
	6930	if (s)
	6931	printbuf("### Saw string before %s\n", s);
	6932	else
	6933	PerlIO_printf(Perl_debug_log,
	6934	"### Saw unterminated string\n");
	6935	} );
	6936	if (PL_expect == XOPERATOR) {
	6937	no_op("String",s);
	6938	}
	6939	if (!s)
	6940	missingterm(NULL, 0);
	6941	pl_yylval.ival = OP_CONST;
	6942	/* FIXME. I think that this can be const if char *d is replaced by
	6943	more localised variables. */
	6944	for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
	6945	if (d == '$' \|\| d == '@' \|\| d == '\\' \|\| !UTF8_IS_INVARIANT((U8)d)) {
	6946	pl_yylval.ival = OP_STRINGIFY;
	6947	break;
	6948	}
	6949	}
	6950	if (pl_yylval.ival == OP_CONST)
	6951	COPLINE_SET_FROM_MULTI_END;
	6952	TERM(sublex_start());
	6953	}
	6954
	6955	static int
	6956	yyl_backtick(pTHX_ char *s)
	6957	{
	6958	s = scan_str(s,FALSE,FALSE,FALSE,NULL);
	6959	DEBUG_T( {
	6960	if (s)
	6961	printbuf("### Saw backtick string before %s\n", s);
	6962	else
	6963	PerlIO_printf(Perl_debug_log,
	6964	"### Saw unterminated backtick string\n");
	6965	} );
	6966	if (PL_expect == XOPERATOR)
	6967	no_op("Backticks",s);
	6968	if (!s)
	6969	missingterm(NULL, 0);
	6970	pl_yylval.ival = OP_BACKTICK;
	6971	TERM(sublex_start());
	6972	}
	6973
	6974	static int
	6975	yyl_backslash(pTHX_ char *s)
	6976	{
	6977	if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s))
	6978	Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
	6979	s, s);
	6980	if (PL_expect == XOPERATOR)
	6981	no_op("Backslash",s);
	6982	OPERATOR(REFGEN);
	6983	}
	6984
	6985	static void
	6986	yyl_data_handle(pTHX)
	6987	{
	6988	HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
	6989	? PL_curstash
	6990	: PL_defstash;
	6991	GV gv = (GV )*hv_fetchs(stash, "DATA", 1);
	6992
	6993	if (!isGV(gv))
	6994	gv_init(gv,stash,"DATA",4,0);
	6995
	6996	GvMULTI_on(gv);
	6997	if (!GvIO(gv))
	6998	GvIOp(gv) = newIO();
	6999	IoIFP(GvIOp(gv)) = PL_rsfp;
	7000
	7001	/* Mark this internal pseudo-handle as clean */
	7002	IoFLAGS(GvIOp(gv)) \|= IOf_UNTAINT;
	7003	if ((PerlIO*)PL_rsfp == PerlIO_stdin())
	7004	IoTYPE(GvIOp(gv)) = IoTYPE_STD;
	7005	else
	7006	IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
	7007
	7008	#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
	7009	/* if the script was opened in binmode, we need to revert
	7010	* it to text mode for compatibility; but only iff it has CRs
	7011	* XXX this is a questionable hack at best. */
	7012	if (PL_bufend-PL_bufptr > 2
	7013	&& PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
	7014	{
	7015	Off_t loc = 0;
	7016	if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
	7017	loc = PerlIO_tell(PL_rsfp);
	7018	(void)PerlIO_seek(PL_rsfp, 0L, 0);
	7019	}
	7020	if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
	7021	if (loc > 0)
	7022	PerlIO_seek(PL_rsfp, loc, 0);
	7023	}
	7024	}
	7025	#endif
	7026
	7027	#ifdef PERLIO_LAYERS
	7028	if (!IN_BYTES) {
	7029	if (UTF)
	7030	PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
	7031	}
	7032	#endif
	7033
	7034	PL_rsfp = NULL;
	7035	}
	7036
	7037	PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*)
	7038	__attribute__noreturn__;
	7039
	7040	PERL_STATIC_NO_RET void
	7041	yyl_croak_unrecognised(pTHX_ char *s)
	7042	{
	7043	SV *dsv = newSVpvs_flags("", SVs_TEMP);
	7044	const char *c;
	7045	char *d;
	7046	STRLEN len;
	7047
	7048	if (UTF) {
	7049	STRLEN skiplen = UTF8SKIP(s);
	7050	STRLEN stravail = PL_bufend - s;
	7051	c = sv_uni_display(dsv, newSVpvn_flags(s,
	7052	skiplen > stravail ? stravail : skiplen,
	7053	SVs_TEMP \| SVf_UTF8),
	7054	10, UNI_DISPLAY_ISPRINT);
	7055	}
	7056	else {
	7057	c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
	7058	}
	7059
	7060	if (s >= PL_linestart) {
	7061	d = PL_linestart;
	7062	}
	7063	else {
	7064	/* somehow (probably due to a parse failure), PL_linestart has advanced
	7065	* pass PL_bufptr, get a reasonable beginning of line
	7066	*/
	7067	d = s;
	7068	while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
	7069	--d;
	7070	}
	7071	len = UTF ? Perl_utf8_length(aTHX_ (U8 ) d, (U8 ) s) : (STRLEN) (s - d);
	7072	if (len > UNRECOGNIZED_PRECEDE_COUNT) {
	7073	d = UTF ? (char ) utf8_hop_back((U8 ) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
	7074	}
	7075
	7076	Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
	7077	UTF8fARG(UTF, (s - d), d),
	7078	(int) len + 1);
	7079	}
	7080
	7081	static int
	7082	yyl_require(pTHX_ char *s, I32 orig_keyword)
	7083	{
	7084	s = skipspace(s);
	7085	if (isDIGIT(*s)) {
	7086	s = force_version(s, FALSE);
	7087	}
	7088	else if (*s != 'v' \|\| !isDIGIT(s[1])
	7089	\|\| (s = force_version(s, TRUE), *s == 'v'))
	7090	{
	7091	*PL_tokenbuf = '\0';
	7092	s = force_word(s,BAREWORD,TRUE,TRUE);
	7093	if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
	7094	PL_tokenbuf + sizeof(PL_tokenbuf),
	7095	UTF))
	7096	{
	7097	gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
	7098	GV_ADD \| (UTF ? SVf_UTF8 : 0));
	7099	}
	7100	else if (*s == '<')
	7101	yyerror("<> at require-statement should be quotes");
	7102	}
	7103
	7104	if (orig_keyword == KEY_require)
	7105	pl_yylval.ival = 1;
	7106	else
	7107	pl_yylval.ival = 0;
	7108
	7109	PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
	7110	PL_bufptr = s;
	7111	PL_last_uni = PL_oldbufptr;
	7112	PL_last_lop_op = OP_REQUIRE;
	7113	s = skipspace(s);
	7114	return REPORT( (int)KW_REQUIRE );
	7115	}
	7116
	7117	static int
	7118	yyl_foreach(pTHX_ char *s)
	7119	{
	7120	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
	7121	return REPORT(0);
	7122	pl_yylval.ival = CopLINE(PL_curcop);
	7123	s = skipspace(s);
	7124	if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
	7125	char *p = s;
	7126	SSize_t s_off = s - SvPVX(PL_linestr);
	7127	bool paren_is_valid = FALSE;
	7128	bool maybe_package = FALSE;
	7129	bool saw_core = FALSE;
	7130	bool core_valid = FALSE;
	7131
	7132	if (UNLIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "CORE::"))) {
	7133	saw_core = TRUE;
	7134	p += 6;
	7135	}
	7136	if (LIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "my"))) {
	7137	core_valid = TRUE;
	7138	paren_is_valid = TRUE;
	7139	if (isSPACE(p[2])) {
	7140	p = skipspace(p + 3);
	7141	maybe_package = TRUE;
	7142	}
	7143	else {
	7144	p += 2;
	7145	}
	7146	}
	7147	else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")) {
	7148	core_valid = TRUE;
	7149	if (isSPACE(p[3])) {
	7150	p = skipspace(p + 4);
	7151	maybe_package = TRUE;
	7152	}
	7153	else {
	7154	p += 3;
	7155	}
	7156	}
	7157	else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "state")) {
	7158	core_valid = TRUE;
	7159	if (isSPACE(p[5])) {
	7160	p = skipspace(p + 6);
	7161	}
	7162	else {
	7163	p += 5;
	7164	}
	7165	}
	7166	if (saw_core && !core_valid) {
	7167	Perl_croak(aTHX_ "Missing $ on loop variable");
	7168	}
	7169
	7170	if (maybe_package && !saw_core) {
	7171	/* skip optional package name, as in "for my abc $x (..)" */
	7172	if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) {
	7173	STRLEN len;
	7174	p = scan_word6(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len, TRUE);
	7175	p = skipspace(p);
	7176	paren_is_valid = FALSE;
	7177	}
	7178	}
	7179
	7180	if (UNLIKELY(paren_is_valid && *p == '(')) {
	7181	Perl_ck_warner_d(aTHX_
	7182	packWARN(WARN_EXPERIMENTAL__FOR_LIST),
	7183	"for my (...) is experimental");
	7184	}
	7185	else if (UNLIKELY(p != '$' && p != '\\')) {
	7186	/* "for myfoo (" will end up here, but with p pointing at the 'f' */
	7187	Perl_croak(aTHX_ "Missing $ on loop variable");
	7188	}
	7189	/* The buffer may have been reallocated, update s */
	7190	s = SvPVX(PL_linestr) + s_off;
	7191	}
	7192	OPERATOR(KW_FOR);
	7193	}
	7194
	7195	static int
	7196	yyl_do(pTHX_ char *s, I32 orig_keyword)
	7197	{
	7198	s = skipspace(s);
	7199	if (*s == '{')
	7200	PRETERMBLOCK(KW_DO);
	7201	if (*s != '\'') {
	7202	char *d;
	7203	STRLEN len;
	7204	*PL_tokenbuf = '&';
	7205	d = scan_word6(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
	7206	1, &len, TRUE);
	7207	if (len && memNEs(PL_tokenbuf+1, len, "CORE")
	7208	&& !keyword(PL_tokenbuf + 1, len, 0)) {
	7209	SSize_t off = s-SvPVX(PL_linestr);
	7210	d = skipspace(d);
	7211	s = SvPVX(PL_linestr)+off;
	7212	if (*d == '(') {
	7213	force_ident_maybe_lex('&');
	7214	s = d;
	7215	}
	7216	}
	7217	}
	7218	if (orig_keyword == KEY_do)
	7219	pl_yylval.ival = 1;
	7220	else
	7221	pl_yylval.ival = 0;
	7222	OPERATOR(KW_DO);
	7223	}
	7224
	7225	static int
	7226	yyl_my(pTHX_ char *s, I32 my)
	7227	{
	7228	if (PL_in_my) {
	7229	PL_bufptr = s;
	7230	yyerror(Perl_form(aTHX_
	7231	"Can't redeclare \"%s\" in \"%s\"",
	7232	my == KEY_my ? "my" :
	7233	my == KEY_state ? "state" : "our",
	7234	PL_in_my == KEY_my ? "my" :
	7235	PL_in_my == KEY_state ? "state" : "our"));
	7236	}
	7237	PL_in_my = (U16)my;
	7238	s = skipspace(s);
	7239	if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
	7240	STRLEN len;
	7241	s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len, TRUE);
	7242	if (memEQs(PL_tokenbuf, len, "sub"))
	7243	return yyl_sub(aTHX_ s, my);
	7244	PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
	7245	if (!PL_in_my_stash) {
	7246	char tmpbuf[1024];
	7247	int i;
	7248	PL_bufptr = s;
	7249	i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
	7250	PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf));
	7251	yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
	7252	}
	7253	}
	7254	else if (*s == '\\') {
	7255	if (!FEATURE_MYREF_IS_ENABLED)
	7256	Perl_croak(aTHX_ "The experimental declared_refs "
	7257	"feature is not enabled");
	7258	Perl_ck_warner_d(aTHX_
	7259	packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
	7260	"Declaring references is experimental");
	7261	}
	7262	OPERATOR(KW_MY);
	7263	}
	7264
	7265	static int yyl_try(pTHX_ char*);
	7266
	7267	static bool
	7268	yyl_eol_needs_semicolon(pTHX_ char **ps)
	7269	{
	7270	char s = ps;
	7271	if (PL_lex_state != LEX_NORMAL
	7272	\|\| (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
	7273	{
	7274	const bool in_comment = *s == '#';
	7275	char *d;
	7276	if (*s == '#' && s == PL_linestart && PL_in_eval
	7277	&& !PL_rsfp && !PL_parser->filtered) {
	7278	/* handle eval qq[#line 1 "foo"\n ...] */
	7279	CopLINE_dec(PL_curcop);
	7280	incline(s, PL_bufend);
	7281	}
	7282	d = s;
	7283	while (d < PL_bufend && *d != '\n')
	7284	d++;
	7285	if (d < PL_bufend)
	7286	d++;
	7287	s = d;
	7288	if (in_comment && d == PL_bufend
	7289	&& PL_lex_state == LEX_INTERPNORMAL
	7290	&& PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
	7291	&& SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
	7292	else
	7293	incline(s, PL_bufend);
	7294	if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
	7295	PL_lex_state = LEX_FORMLINE;
	7296	force_next(FORMRBRACK);
	7297	*ps = s;
	7298	return TRUE;
	7299	}
	7300	}
	7301	else {
	7302	while (s < PL_bufend && *s != '\n')
	7303	s++;
	7304	if (s < PL_bufend) {
	7305	s++;
	7306	if (s < PL_bufend)
	7307	incline(s, PL_bufend);
	7308	}
	7309	}
	7310	*ps = s;
	7311	return FALSE;
	7312	}
	7313
	7314	static int
	7315	yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s)
	7316	{
	7317	char *d;
	7318
	7319	goto start;
	7320
	7321	do {
	7322	fake_eof = 0;
	7323	bof = cBOOL(PL_rsfp);
	7324	start:
	7325
	7326	PL_bufptr = PL_bufend;
	7327	COPLINE_INC_WITH_HERELINES;
	7328	if (!lex_next_chunk(fake_eof)) {
	7329	CopLINE_dec(PL_curcop);
	7330	s = PL_bufptr;
	7331	TOKEN(PERLY_SEMICOLON); /* not infinite loop because rsfp is NULL now */
	7332	}
	7333	CopLINE_dec(PL_curcop);
	7334	s = PL_bufptr;
	7335	/* If it looks like the start of a BOM or raw UTF-16,
	7336	* check if it in fact is. */
	7337	if (bof && PL_rsfp
	7338	&& ( *s == 0
	7339	\|\| (U8)s == BOM_UTF8_FIRST_BYTE
	7340	\|\| (U8)s >= 0xFE
	7341	\|\| s[1] == 0))
	7342	{
	7343	Off_t offset = (IV)PerlIO_tell(PL_rsfp);
	7344	bof = (offset == (Off_t)SvCUR(PL_linestr));
	7345	#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
	7346	/* offset may include swallowed CR */
	7347	if (!bof)
	7348	bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
	7349	#endif
	7350	if (bof) {
	7351	PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
	7352	s = swallow_bom((U8*)s);
	7353	}
	7354	}
	7355	if (PL_parser->in_pod) {
	7356	/* Incest with pod. */
	7357	if ( memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
	7358	&& !isALPHA(s[4]))
	7359	{
	7360	SvPVCLEAR(PL_linestr);
	7361	PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
	7362	PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
	7363	PL_last_lop = PL_last_uni = NULL;
	7364	PL_parser->in_pod = 0;
	7365	}
	7366	}
	7367	if (PL_rsfp \|\| PL_parser->filtered)
	7368	incline(s, PL_bufend);
	7369	} while (PL_parser->in_pod);
	7370
	7371	PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
	7372	PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
	7373	PL_last_lop = PL_last_uni = NULL;
	7374	if (CopLINE(PL_curcop) == 1) {
	7375	while (s < PL_bufend && isSPACE(*s))
	7376	s++;
	7377	if (s == ':' && s[1] != ':') / for csh execing sh scripts */
	7378	s++;
	7379	d = NULL;
	7380	if (!PL_in_eval) {
	7381	if (s == '#' && (s+1) == '!')
	7382	d = s + 2;
	7383	#ifdef ALTERNATE_SHEBANG
	7384	else {
	7385	static char const as[] = ALTERNATE_SHEBANG;
	7386	if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
	7387	d = s + (sizeof(as) - 1);
	7388	}
	7389	#endif /* ALTERNATE_SHEBANG */
	7390	}
	7391	if (d) {
	7392	char *ipath;
	7393	char *ipathend;
	7394
	7395	while (isSPACE(*d))
	7396	d++;
	7397	ipath = d;
	7398	while (d && !isSPACE(d))
	7399	d++;
	7400	ipathend = d;
	7401
	7402	#ifdef ARG_ZERO_IS_SCRIPT
	7403	if (ipathend > ipath) {
	7404	/*
	7405	* HP-UX (at least) sets argv[0] to the script name,
	7406	* which makes $^X incorrect. And Digital UNIX and Linux,
	7407	* at least, set argv[0] to the basename of the Perl
	7408	* interpreter. So, having found "#!", we'll set it right.
	7409	*/
	7410	SV* copfilesv = CopFILESV(PL_curcop);
	7411	if (copfilesv) {
	7412	SV * const x =
	7413	GvSV(gv_fetchpvs("\030", GV_ADD\|GV_NOTQUAL,
	7414	SVt_PV)); /* $^X */
	7415	assert(SvPOK(x) \|\| SvGMAGICAL(x));
	7416	if (sv_eq(x, copfilesv)) {
	7417	sv_setpvn(x, ipath, ipathend - ipath);
	7418	SvSETMAGIC(x);
	7419	}
	7420	else {
	7421	STRLEN blen;
	7422	STRLEN llen;
	7423	const char *bstart = SvPV_const(copfilesv, blen);
	7424	const char * const lstart = SvPV_const(x, llen);
	7425	if (llen < blen) {
	7426	bstart += blen - llen;
	7427	if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
	7428	sv_setpvn(x, ipath, ipathend - ipath);
	7429	SvSETMAGIC(x);
	7430	}
	7431	}
	7432	}
	7433	}
	7434	else {
	7435	/* Anything to do if no copfilesv? */
	7436	}
	7437	TAINT_NOT; /* $^X is always tainted, but that's OK */
	7438	}
	7439	#endif /* ARG_ZERO_IS_SCRIPT */
	7440
	7441	/*
	7442	* Look for options.
	7443	*/
	7444	d = instr(s,"perl -");
	7445	if (!d) {
	7446	d = instr(s,"perl");
	7447	#if defined(DOSISH)
	7448	/* avoid getting into infinite loops when shebang
	7449	* line contains "Perl" rather than "perl" */
	7450	if (!d) {
	7451	for (d = ipathend-4; d >= ipath; --d) {
	7452	if (isALPHA_FOLD_EQ(*d, 'p')
	7453	&& !ibcmp(d, "perl", 4))
	7454	{
	7455	break;
	7456	}
	7457	}
	7458	if (d < ipath)
	7459	d = NULL;
	7460	}
	7461	#endif
	7462	}
	7463	#ifdef ALTERNATE_SHEBANG
	7464	/*
	7465	* If the ALTERNATE_SHEBANG on this system starts with a
	7466	* character that can be part of a Perl expression, then if
	7467	* we see it but not "perl", we're probably looking at the
	7468	* start of Perl code, not a request to hand off to some
	7469	* other interpreter. Similarly, if "perl" is there, but
	7470	* not in the first 'word' of the line, we assume the line
	7471	* contains the start of the Perl program.
	7472	*/
	7473	if (d && *s != '#') {
	7474	const char *c = ipath;
	7475	while (c && !memCHRs("; \t\r\n\f\v#", c))
	7476	c++;
	7477	if (c < d)
	7478	d = NULL; /* "perl" not in first word; ignore */
	7479	else
	7480	s = '#'; / Don't try to parse shebang line */
	7481	}
	7482	#endif /* ALTERNATE_SHEBANG */
	7483	if (!d
	7484	&& *s == '#'
	7485	&& ipathend > ipath
	7486	&& !PL_minus_c
	7487	&& !instr(s,"indir")
	7488	&& instr(PL_origargv[0],"perl"))
	7489	{
	7490	char **newargv;
	7491
	7492	*ipathend = '\0';
	7493	s = ipathend + 1;
	7494	while (s < PL_bufend && isSPACE(*s))
	7495	s++;
	7496	if (s < PL_bufend) {
	7497	Newx(newargv,PL_origargc+3,char*);
	7498	newargv[1] = s;
	7499	while (s < PL_bufend && !isSPACE(*s))
	7500	s++;
	7501	*s = '\0';
	7502	Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
	7503	}
	7504	else
	7505	newargv = PL_origargv;
	7506	newargv[0] = ipath;
	7507	PERL_FPU_PRE_EXEC
	7508	PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
	7509	PERL_FPU_POST_EXEC
	7510	Perl_croak(aTHX_ "Can't exec %s", ipath);
	7511	}
	7512	if (d) {
	7513	while (d && !isSPACE(d))
	7514	d++;
	7515	while (SPACE_OR_TAB(*d))
	7516	d++;
	7517
	7518	if (*d++ == '-') {
	7519	const bool switches_done = PL_doswitches;
	7520	const U32 oldpdb = PL_perldb;
	7521	const bool oldn = PL_minus_n;
	7522	const bool oldp = PL_minus_p;
	7523	const char *d1 = d;
	7524
	7525	do {
	7526	bool baduni = FALSE;
	7527	if (*d1 == 'C') {
	7528	const char *d2 = d1 + 1;
	7529	if (parse_unicode_opts((const char **)&d2)
	7530	!= PL_unicode)
	7531	baduni = TRUE;
	7532	}
	7533	if (baduni \|\| isALPHA_FOLD_EQ(*d1, 'M')) {
	7534	const char * const m = d1;
	7535	while (d1 && !isSPACE(d1))
	7536	d1++;
	7537	Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
	7538	(int)(d1 - m), m);
	7539	}
	7540	d1 = moreswitches(d1);
	7541	} while (d1);
	7542	if (PL_doswitches && !switches_done) {
	7543	int argc = PL_origargc;
	7544	char **argv = PL_origargv;
	7545	do {
	7546	argc--,argv++;
	7547	} while (argc && argv[0][0] == '-' && argv[0][1]);
	7548	init_argv_symbols(argc,argv);
	7549	}
	7550	if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb)
	7551	\|\| ((PL_minus_n \|\| PL_minus_p) && !(oldn \|\| oldp)))
	7552	/* if we have already added "LINE: while (<>) {",
	7553	we must not do it again */
	7554	{
	7555	SvPVCLEAR(PL_linestr);
	7556	PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
	7557	PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
	7558	PL_last_lop = PL_last_uni = NULL;
	7559	PL_preambled = FALSE;
	7560	if (PERLDB_LINE_OR_SAVESRC)
	7561	(void)gv_fetchfile(PL_origfilename);
	7562	return YYL_RETRY;
	7563	}
	7564	}
	7565	}
	7566	}
	7567	}
	7568
	7569	if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
	7570	PL_lex_state = LEX_FORMLINE;
	7571	force_next(FORMRBRACK);
	7572	TOKEN(PERLY_SEMICOLON);
	7573	}
	7574
	7575	PL_bufptr = s;
	7576	return YYL_RETRY;
	7577	}
	7578
	7579	static int
	7580	yyl_fatcomma(pTHX_ char *s, STRLEN len)
	7581	{
	7582	CLINE;
	7583	pl_yylval.opval
	7584	= newSVOP(OP_CONST, 0,
	7585	S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
	7586	pl_yylval.opval->op_private = OPpCONST_BARE;
	7587	TERM(BAREWORD);
	7588	}
	7589
	7590	static int
	7591	yyl_safe_bareword(pTHX_ char *s, const char lastchar)
	7592	{
	7593	if ((lastchar == '*' \|\| lastchar == '%' \|\| lastchar == '&')
	7594	&& PL_parser->saw_infix_sigil)
	7595	{
	7596	Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
	7597	"Operator or semicolon missing before %c%" UTF8f,
	7598	lastchar,
	7599	UTF8fARG(UTF, strlen(PL_tokenbuf),
	7600	PL_tokenbuf));
	7601	Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
	7602	"Ambiguous use of %c resolved as operator %c",
	7603	lastchar, lastchar);
	7604	}
	7605	TOKEN(BAREWORD);
	7606	}
	7607
	7608	static int
	7609	yyl_constant_op(pTHX_ char s, SV sv, CV cv, OP rv2cv_op, PADOFFSET off)
	7610	{
	7611	if (sv) {
	7612	op_free(rv2cv_op);
	7613	SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
	7614	((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
	7615	if (SvTYPE(sv) == SVt_PVAV)
	7616	pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
	7617	pl_yylval.opval);
	7618	else {
	7619	pl_yylval.opval->op_private = 0;
	7620	pl_yylval.opval->op_folded = 1;
	7621	pl_yylval.opval->op_flags \|= OPf_SPECIAL;
	7622	}
	7623	TOKEN(BAREWORD);
	7624	}
	7625
	7626	op_free(pl_yylval.opval);
	7627	pl_yylval.opval =
	7628	off ? newCVREF(0, rv2cv_op) : rv2cv_op;
	7629	pl_yylval.opval->op_private \|= OPpENTERSUB_NOPAREN;
	7630	PL_last_lop = PL_oldbufptr;
	7631	PL_last_lop_op = OP_ENTERSUB;
	7632
	7633	/* Is there a prototype? */
	7634	if (SvPOK(cv)) {
	7635	int k = yyl_subproto(aTHX_ s, cv);
	7636	if (k != KEY_NULL)
	7637	return k;
	7638	}
	7639
	7640	NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
	7641	PL_expect = XTERM;
	7642	force_next(off ? PRIVATEREF : BAREWORD);
	7643	if (!PL_lex_allbrackets
	7644	&& PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
	7645	{
	7646	PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
	7647	}
	7648
	7649	TOKEN(NOAMP);
	7650	}
	7651
	7652	/* Honour "reserved word" warnings, and enforce strict subs */
	7653	static void
	7654	yyl_strictwarn_bareword(pTHX_ const char lastchar)
	7655	{
	7656	/* after "print" and similar functions (corresponding to
	7657	* "F? L" in opcode.pl), whatever wasn't already parsed as
	7658	* a filehandle should be subject to "strict subs".
	7659	* Likewise for the optional indirect-object argument to system
	7660	* or exec, which can't be a bareword */
	7661	if ((PL_last_lop_op == OP_PRINT
	7662	\|\| PL_last_lop_op == OP_PRTF
	7663	\|\| PL_last_lop_op == OP_SAY
	7664	\|\| PL_last_lop_op == OP_SYSTEM
	7665	\|\| PL_last_lop_op == OP_EXEC)
	7666	&& (PL_hints & HINT_STRICT_SUBS))
	7667	{
	7668	pl_yylval.opval->op_private \|= OPpCONST_STRICT;
	7669	}
	7670
	7671	if (lastchar != '-' && ckWARN(WARN_RESERVED)) {
	7672	char *d = PL_tokenbuf;
	7673	while (isLOWER(*d))
	7674	d++;
	7675	if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
	7676	/* PL_warn_reserved is constant */
	7677	GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
	7678	Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
	7679	PL_tokenbuf);
	7680	GCC_DIAG_RESTORE_STMT;
	7681	}
	7682	}
	7683	}
	7684
	7685	static int
	7686	yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c)
	7687	{
	7688	int pkgname = 0;
	7689	const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
	7690	bool safebw;
	7691	bool no_op_error = FALSE;
	7692	/* Use this var to track whether intuit_method has been
	7693	called. intuit_method returns 0 or > 255. */
	7694	int key = 1;
	7695
	7696	if (PL_expect == XOPERATOR) {
	7697	if (PL_bufptr == PL_linestart) {
	7698	CopLINE_dec(PL_curcop);
	7699	Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
	7700	CopLINE_inc(PL_curcop);
	7701	}
	7702	else
	7703	/* We want to call no_op with s pointing after the
	7704	bareword, so defer it. But we want it to come
	7705	before the Bad name croak. */
	7706	no_op_error = TRUE;
	7707	}
	7708
	7709	/* Get the rest if it looks like a package qualifier */
	7710
	7711	if (s == '\'' \|\| (s == ':' && s[1] == ':')) {
	7712	STRLEN morelen;
	7713	s = scan_word6(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
	7714	TRUE, &morelen, TRUE);
	7715	if (no_op_error) {
	7716	no_op("Bareword",s);
	7717	no_op_error = FALSE;
	7718	}
	7719	if (!morelen)
	7720	Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
	7721	UTF8fARG(UTF, len, PL_tokenbuf),
	7722	*s == '\'' ? "'" : "::");
	7723	len += morelen;
	7724	pkgname = 1;
	7725	}
	7726
	7727	if (no_op_error)
	7728	no_op("Bareword",s);
	7729
	7730	/* See if the name is "Foo::",
	7731	in which case Foo is a bareword
	7732	(and a package name). */
	7733
	7734	if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') {
	7735	if (ckWARN(WARN_BAREWORD)
	7736	&& ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
	7737	Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
	7738	"Bareword \"%" UTF8f
	7739	"\" refers to nonexistent package",
	7740	UTF8fARG(UTF, len, PL_tokenbuf));
	7741	len -= 2;
	7742	PL_tokenbuf[len] = '\0';
	7743	c.gv = NULL;
	7744	c.gvp = 0;
	7745	safebw = TRUE;
	7746	}
	7747	else {
	7748	safebw = FALSE;
	7749	}
	7750
	7751	/* if we saw a global override before, get the right name */
	7752
	7753	if (!c.sv)
	7754	c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len);
	7755	if (c.gvp) {
	7756	SV *sv = newSVpvs("CORE::GLOBAL::");
	7757	sv_catsv(sv, c.sv);
	7758	SvREFCNT_dec(c.sv);
	7759	c.sv = sv;
	7760	}
	7761
	7762	/* Presume this is going to be a bareword of some sort. */
	7763	CLINE;
	7764	pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv);
	7765	pl_yylval.opval->op_private = OPpCONST_BARE;
	7766
	7767	/* And if "Foo::", then that's what it certainly is. */
	7768	if (safebw)
	7769	return yyl_safe_bareword(aTHX_ s, lastchar);
	7770
	7771	if (!c.off) {
	7772	OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv));
	7773	const_op->op_private = OPpCONST_BARE;
	7774	c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
	7775	c.cv = c.lex
	7776	? isGV(c.gv)
	7777	? GvCV(c.gv)
	7778	: SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV
	7779	? (CV *)SvRV(c.gv)
	7780	: ((CV *)c.gv)
	7781	: rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB);
	7782	}
	7783
	7784	/* See if it's the indirect object for a list operator. */
	7785
	7786	if (PL_oldoldbufptr
	7787	&& PL_oldoldbufptr < PL_bufptr
	7788	&& (PL_oldoldbufptr == PL_last_lop
	7789	\|\| PL_oldoldbufptr == PL_last_uni)
	7790	&& /* NO SKIPSPACE BEFORE HERE! */
	7791	(PL_expect == XREF
	7792	\|\| ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
	7793	== OA_FILEREF))
	7794	{
	7795	bool immediate_paren = *s == '(';
	7796	SSize_t s_off;
	7797
	7798	/* (Now we can afford to cross potential line boundary.) */
	7799	s = skipspace(s);
	7800
	7801	/* intuit_method() can indirectly call lex_next_chunk(),
	7802	* invalidating s
	7803	*/
	7804	s_off = s - SvPVX(PL_linestr);
	7805	/* Two barewords in a row may indicate method call. */
	7806	if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
	7807	\|\| *s == '$')
	7808	&& (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
	7809	{
	7810	/* the code at method: doesn't use s */
	7811	goto method;
	7812	}
	7813	s = SvPVX(PL_linestr) + s_off;
	7814
	7815	/* If not a declared subroutine, it's an indirect object. */
	7816	/* (But it's an indir obj regardless for sort.) */
	7817	/* Also, if "_" follows a filetest operator, it's a bareword */
	7818
	7819	if (
	7820	( !immediate_paren && (PL_last_lop_op == OP_SORT
	7821	\|\| (!c.cv
	7822	&& (PL_last_lop_op != OP_MAPSTART
	7823	&& PL_last_lop_op != OP_GREPSTART))))
	7824	\|\| (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
	7825	&& ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
	7826	== OA_FILESTATOP))
	7827	)
	7828	{
	7829	PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
	7830	yyl_strictwarn_bareword(aTHX_ lastchar);
	7831	op_free(c.rv2cv_op);
	7832	return yyl_safe_bareword(aTHX_ s, lastchar);
	7833	}
	7834	}
	7835
	7836	PL_expect = XOPERATOR;
	7837	s = skipspace(s);
	7838
	7839	/* Is this a word before a => operator? */
	7840	if (*s == '=' && s[1] == '>' && !pkgname) {
	7841	op_free(c.rv2cv_op);
	7842	CLINE;
	7843	if (c.gvp \|\| (c.lex && !c.off)) {
	7844	assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv);
	7845	/* This is our own scalar, created a few lines
	7846	above, so this is safe. */
	7847	SvREADONLY_off(c.sv);
	7848	sv_setpv(c.sv, PL_tokenbuf);
	7849	if (UTF && !IN_BYTES
	7850	&& is_utf8_string((U8*)PL_tokenbuf, len))
	7851	SvUTF8_on(c.sv);
	7852	SvREADONLY_on(c.sv);
	7853	}
	7854	TERM(BAREWORD);
	7855	}
	7856
	7857	/* If followed by a paren, it's certainly a subroutine. */
	7858	if (*s == '(') {
	7859	CLINE;
	7860	if (c.cv) {
	7861	char *d = s + 1;
	7862	while (SPACE_OR_TAB(*d))
	7863	d++;
	7864	if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv)))
	7865	return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off);
	7866	}
	7867	NEXTVAL_NEXTTOKE.opval =
	7868	c.off ? c.rv2cv_op : pl_yylval.opval;
	7869	if (c.off)
	7870	op_free(pl_yylval.opval), force_next(PRIVATEREF);
	7871	else op_free(c.rv2cv_op), force_next(BAREWORD);
	7872	pl_yylval.ival = 0;
	7873	TOKEN(PERLY_AMPERSAND);
	7874	}
	7875
	7876	/* If followed by var or block, call it a method (unless sub) */
	7877
	7878	if ((s == '$' \|\| s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) {
	7879	op_free(c.rv2cv_op);
	7880	PL_last_lop = PL_oldbufptr;
	7881	PL_last_lop_op = OP_METHOD;
	7882	if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
	7883	PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
	7884	PL_expect = XBLOCKTERM;
	7885	PL_bufptr = s;
	7886	return REPORT(METHCALL0);
	7887	}
	7888
	7889	/* If followed by a bareword, see if it looks like indir obj. */
	7890
	7891	if ( key == 1
	7892	&& !orig_keyword
	7893	&& (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) \|\| *s == '$')
	7894	&& (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
	7895	{
	7896	method:
	7897	if (c.lex && !c.off) {
	7898	assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv);
	7899	SvREADONLY_off(c.sv);
	7900	sv_setpvn(c.sv, PL_tokenbuf, len);
	7901	if (UTF && !IN_BYTES
	7902	&& is_utf8_string((U8*)PL_tokenbuf, len))
	7903	SvUTF8_on(c.sv);
	7904	else SvUTF8_off(c.sv);
	7905	}
	7906	op_free(c.rv2cv_op);
	7907	if (key == METHCALL0 && !PL_lex_allbrackets
	7908	&& PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
	7909	{
	7910	PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
	7911	}
	7912	return REPORT(key);
	7913	}
	7914
	7915	/* Not a method, so call it a subroutine (if defined) */
	7916
	7917	if (c.cv) {
	7918	/* Check for a constant sub */
	7919	c.sv = cv_const_sv_or_av(c.cv);
	7920	return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off);
	7921	}
	7922
	7923	/* Call it a bare word */
	7924
	7925	if (PL_hints & HINT_STRICT_SUBS)
	7926	pl_yylval.opval->op_private \|= OPpCONST_STRICT;
	7927	else
	7928	yyl_strictwarn_bareword(aTHX_ lastchar);
	7929
	7930	op_free(c.rv2cv_op);
	7931
	7932	return yyl_safe_bareword(aTHX_ s, lastchar);
	7933	}
	7934
	7935	static int
	7936	yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c)
	7937	{
	7938	switch (key) {
	7939	default: /* not a keyword */
	7940	return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
	7941
	7942	case KEY___FILE__:
	7943	FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) );
	7944
	7945	case KEY___LINE__:
	7946	FUN0OP(
	7947	newSVOP(OP_CONST, 0,
	7948	Perl_newSVpvf(aTHX_ "%" LINE_Tf, CopLINE(PL_curcop)))
	7949	);
	7950
	7951	case KEY___PACKAGE__:
	7952	FUN0OP(
	7953	newSVOP(OP_CONST, 0, (PL_curstash
	7954	? newSVhek(HvNAME_HEK(PL_curstash))
	7955	: &PL_sv_undef))
	7956	);
	7957
	7958	case KEY___DATA__:
	7959	case KEY___END__:
	7960	if (PL_rsfp && (!PL_in_eval \|\| PL_tokenbuf[2] == 'D'))
	7961	yyl_data_handle(aTHX);
	7962	return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s);
	7963
	7964	case KEY___SUB__:
	7965	/* If !CvCLONE(PL_compcv) then rpeep will probably turn this into an
	7966	* OP_CONST. We need to make it big enough to allow room for that if
	7967	* so */
	7968	FUN0OP(CvCLONE(PL_compcv)
	7969	? newOP(OP_RUNCV, 0)
	7970	: newSVOP(OP_RUNCV, 0, &PL_sv_undef));
	7971
	7972	case KEY___CLASS__:
	7973	FUN0(OP_CLASSNAME);
	7974
	7975	case KEY_AUTOLOAD:
	7976	case KEY_DESTROY:
	7977	case KEY_BEGIN:
	7978	case KEY_UNITCHECK:
	7979	case KEY_CHECK:
	7980	case KEY_INIT:
	7981	case KEY_END:
	7982	if (PL_expect == XSTATE)
	7983	return yyl_sub(aTHX_ PL_bufptr, key);
	7984	return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
	7985
	7986	case KEY_ADJUST:
	7987	Perl_ck_warner_d(aTHX_
	7988	packWARN(WARN_EXPERIMENTAL__CLASS), "ADJUST is experimental");
	7989
	7990	/* The way that KEY_CHECK et.al. are handled currently are nothing
	7991	* short of crazy. We won't copy that model for new phasers, but use
	7992	* this as an experiment to test if this will work
	7993	*/
	7994	PHASERBLOCK(KEY_ADJUST);
	7995
	7996	case KEY_abs:
	7997	UNI(OP_ABS);
	7998
	7999	case KEY_alarm:
	8000	UNI(OP_ALARM);
	8001
	8002	case KEY_accept:
	8003	LOP(OP_ACCEPT,XTERM);
	8004
	8005	case KEY_and:
	8006	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
	8007	return REPORT(0);
	8008	OPERATOR(ANDOP);
	8009
	8010	case KEY_atan2:
	8011	LOP(OP_ATAN2,XTERM);
	8012
	8013	case KEY_bind:
	8014	LOP(OP_BIND,XTERM);
	8015
	8016	case KEY_binmode:
	8017	LOP(OP_BINMODE,XTERM);
	8018
	8019	case KEY_bless:
	8020	LOP(OP_BLESS,XTERM);
	8021
	8022	case KEY_break:
	8023	FUN0(OP_BREAK);
	8024
	8025	case KEY_catch:
	8026	Perl_ck_warner_d(aTHX_
	8027	packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
	8028	PREBLOCK(KW_CATCH);
	8029
	8030	case KEY_chop:
	8031	UNI(OP_CHOP);
	8032
	8033	case KEY_class:
	8034	Perl_ck_warner_d(aTHX_
	8035	packWARN(WARN_EXPERIMENTAL__CLASS), "class is experimental");
	8036
	8037	s = force_word(s,BAREWORD,FALSE,TRUE);
	8038	s = skipspace(s);
	8039	s = force_strict_version(s);
	8040	PL_expect = XATTRBLOCK;
	8041	TOKEN(KW_CLASS);
	8042
	8043	case KEY_continue:
	8044	/* We have to disambiguate the two senses of
	8045	"continue". If the next token is a '{' then
	8046	treat it as the start of a continue block;
	8047	otherwise treat it as a control operator.
	8048	*/
	8049	s = skipspace(s);
	8050	if (*s == '{')
	8051	PREBLOCK(KW_CONTINUE);
	8052	else
	8053	FUN0(OP_CONTINUE);
	8054
	8055	case KEY_chdir:
	8056	/* may use HOME */
	8057	(void)gv_fetchpvs("ENV", GV_ADD\|GV_NOTQUAL, SVt_PVHV);
	8058	UNI(OP_CHDIR);
	8059
	8060	case KEY_close:
	8061	UNI(OP_CLOSE);
	8062
	8063	case KEY_closedir:
	8064	UNI(OP_CLOSEDIR);
	8065
	8066	case KEY_cmp:
	8067	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
	8068	return REPORT(0);
	8069	NCEop(OP_SCMP);
	8070
	8071	case KEY_caller:
	8072	UNI(OP_CALLER);
	8073
	8074	case KEY_crypt:
	8075
	8076	LOP(OP_CRYPT,XTERM);
	8077
	8078	case KEY_chmod:
	8079	LOP(OP_CHMOD,XTERM);
	8080
	8081	case KEY_chown:
	8082	LOP(OP_CHOWN,XTERM);
	8083
	8084	case KEY_connect:
	8085	LOP(OP_CONNECT,XTERM);
	8086
	8087	case KEY_chr:
	8088	UNI(OP_CHR);
	8089
	8090	case KEY_cos:
	8091	UNI(OP_COS);
	8092
	8093	case KEY_chroot:
	8094	UNI(OP_CHROOT);
	8095
	8096	case KEY_default:
	8097	PREBLOCK(KW_DEFAULT);
	8098
	8099	case KEY_defer:
	8100	Perl_ck_warner_d(aTHX_
	8101	packWARN(WARN_EXPERIMENTAL__DEFER), "defer is experimental");
	8102	PREBLOCK(KW_DEFER);
	8103
	8104	case KEY_do:
	8105	return yyl_do(aTHX_ s, orig_keyword);
	8106
	8107	case KEY_die:
	8108	PL_hints \|= HINT_BLOCK_SCOPE;
	8109	LOP(OP_DIE,XTERM);
	8110
	8111	case KEY_defined:
	8112	UNI(OP_DEFINED);
	8113
	8114	case KEY_delete:
	8115	UNI(OP_DELETE);
	8116
	8117	case KEY_dbmopen:
	8118	Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
	8119	STR_WITH_LEN("NDBM_File::"),
	8120	STR_WITH_LEN("DB_File::"),
	8121	STR_WITH_LEN("GDBM_File::"),
	8122	STR_WITH_LEN("SDBM_File::"),
	8123	STR_WITH_LEN("ODBM_File::"),
	8124	NULL);
	8125	LOP(OP_DBMOPEN,XTERM);
	8126
	8127	case KEY_dbmclose:
	8128	UNI(OP_DBMCLOSE);
	8129
	8130	case KEY_dump:
	8131	LOOPX(OP_DUMP);
	8132
	8133	case KEY_else:
	8134	PREBLOCK(KW_ELSE);
	8135
	8136	case KEY_elsif:
	8137	pl_yylval.ival = CopLINE(PL_curcop);
	8138	OPERATOR(KW_ELSIF);
	8139
	8140	case KEY_eq:
	8141	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
	8142	return REPORT(0);
	8143	ChEop(OP_SEQ);
	8144
	8145	case KEY_exists:
	8146	UNI(OP_EXISTS);
	8147
	8148	case KEY_exit:
	8149	UNI(OP_EXIT);
	8150
	8151	case KEY_eval:
	8152	s = skipspace(s);
	8153	if (s == '{') { / block eval */
	8154	PL_expect = XTERMBLOCK;
	8155	UNIBRACK(OP_ENTERTRY);
	8156	}
	8157	else { /* string eval */
	8158	PL_expect = XTERM;
	8159	UNIBRACK(OP_ENTEREVAL);
	8160	}
	8161
	8162	case KEY_evalbytes:
	8163	PL_expect = XTERM;
	8164	UNIBRACK(-OP_ENTEREVAL);
	8165
	8166	case KEY_eof:
	8167	UNI(OP_EOF);
	8168
	8169	case KEY_exp:
	8170	UNI(OP_EXP);
	8171
	8172	case KEY_each:
	8173	UNI(OP_EACH);
	8174
	8175	case KEY_exec:
	8176	LOP(OP_EXEC,XREF);
	8177
	8178	case KEY_endhostent:
	8179	FUN0(OP_EHOSTENT);
	8180
	8181	case KEY_endnetent:
	8182	FUN0(OP_ENETENT);
	8183
	8184	case KEY_endservent:
	8185	FUN0(OP_ESERVENT);
	8186
	8187	case KEY_endprotoent:
	8188	FUN0(OP_EPROTOENT);
	8189
	8190	case KEY_endpwent:
	8191	FUN0(OP_EPWENT);
	8192
	8193	case KEY_endgrent:
	8194	FUN0(OP_EGRENT);
	8195
	8196	case KEY_field:
	8197	/* TODO: maybe this should use the same parser/grammar structures as
	8198	* `my`, but it's also rather messy because of the `our` conflation
	8199	*/
	8200	Perl_ck_warner_d(aTHX_
	8201	packWARN(WARN_EXPERIMENTAL__CLASS), "field is experimental");
	8202
	8203	croak_kw_unless_class("field");
	8204
	8205	PL_parser->in_my = KEY_field;
	8206	OPERATOR(KW_FIELD);
	8207
	8208	case KEY_finally:
	8209	Perl_ck_warner_d(aTHX_
	8210	packWARN(WARN_EXPERIMENTAL__TRY), "try/catch/finally is experimental");
	8211	PREBLOCK(KW_FINALLY);
	8212
	8213	case KEY_for:
	8214	case KEY_foreach:
	8215	return yyl_foreach(aTHX_ s);
	8216
	8217	case KEY_formline:
	8218	LOP(OP_FORMLINE,XTERM);
	8219
	8220	case KEY_fork:
	8221	FUN0(OP_FORK);
	8222
	8223	case KEY_fc:
	8224	UNI(OP_FC);
	8225
	8226	case KEY_fcntl:
	8227	LOP(OP_FCNTL,XTERM);
	8228
	8229	case KEY_fileno:
	8230	UNI(OP_FILENO);
	8231
	8232	case KEY_flock:
	8233	LOP(OP_FLOCK,XTERM);
	8234
	8235	case KEY_gt:
	8236	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
	8237	return REPORT(0);
	8238	ChRop(OP_SGT);
	8239
	8240	case KEY_ge:
	8241	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
	8242	return REPORT(0);
	8243	ChRop(OP_SGE);
	8244
	8245	case KEY_grep:
	8246	LOP(OP_GREPSTART, XREF);
	8247
	8248	case KEY_goto:
	8249	LOOPX(OP_GOTO);
	8250
	8251	case KEY_gmtime:
	8252	UNI(OP_GMTIME);
	8253
	8254	case KEY_getc:
	8255	UNIDOR(OP_GETC);
	8256
	8257	case KEY_getppid:
	8258	FUN0(OP_GETPPID);
	8259
	8260	case KEY_getpgrp:
	8261	UNI(OP_GETPGRP);
	8262
	8263	case KEY_getpriority:
	8264	LOP(OP_GETPRIORITY,XTERM);
	8265
	8266	case KEY_getprotobyname:
	8267	UNI(OP_GPBYNAME);
	8268
	8269	case KEY_getprotobynumber:
	8270	LOP(OP_GPBYNUMBER,XTERM);
	8271
	8272	case KEY_getprotoent:
	8273	FUN0(OP_GPROTOENT);
	8274
	8275	case KEY_getpwent:
	8276	FUN0(OP_GPWENT);
	8277
	8278	case KEY_getpwnam:
	8279	UNI(OP_GPWNAM);
	8280
	8281	case KEY_getpwuid:
	8282	UNI(OP_GPWUID);
	8283
	8284	case KEY_getpeername:
	8285	UNI(OP_GETPEERNAME);
	8286
	8287	case KEY_gethostbyname:
	8288	UNI(OP_GHBYNAME);
	8289
	8290	case KEY_gethostbyaddr:
	8291	LOP(OP_GHBYADDR,XTERM);
	8292
	8293	case KEY_gethostent:
	8294	FUN0(OP_GHOSTENT);
	8295
	8296	case KEY_getnetbyname:
	8297	UNI(OP_GNBYNAME);
	8298
	8299	case KEY_getnetbyaddr:
	8300	LOP(OP_GNBYADDR,XTERM);
	8301
	8302	case KEY_getnetent:
	8303	FUN0(OP_GNETENT);
	8304
	8305	case KEY_getservbyname:
	8306	LOP(OP_GSBYNAME,XTERM);
	8307
	8308	case KEY_getservbyport:
	8309	LOP(OP_GSBYPORT,XTERM);
	8310
	8311	case KEY_getservent:
	8312	FUN0(OP_GSERVENT);
	8313
	8314	case KEY_getsockname:
	8315	UNI(OP_GETSOCKNAME);
	8316
	8317	case KEY_getsockopt:
	8318	LOP(OP_GSOCKOPT,XTERM);
	8319
	8320	case KEY_getgrent:
	8321	FUN0(OP_GGRENT);
	8322
	8323	case KEY_getgrnam:
	8324	UNI(OP_GGRNAM);
	8325
	8326	case KEY_getgrgid:
	8327	UNI(OP_GGRGID);
	8328
	8329	case KEY_getlogin:
	8330	FUN0(OP_GETLOGIN);
	8331
	8332	case KEY_given:
	8333	pl_yylval.ival = CopLINE(PL_curcop);
	8334	Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED__SMARTMATCH),
	8335	"given is deprecated");
	8336	OPERATOR(KW_GIVEN);
	8337
	8338	case KEY_glob:
	8339	LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
	8340
	8341	case KEY_hex:
	8342	UNI(OP_HEX);
	8343
	8344	case KEY_if:
	8345	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
	8346	return REPORT(0);
	8347	pl_yylval.ival = CopLINE(PL_curcop);
	8348	OPERATOR(KW_IF);
	8349
	8350	case KEY_index:
	8351	LOP(OP_INDEX,XTERM);
	8352
	8353	case KEY_int:
	8354	UNI(OP_INT);
	8355
	8356	case KEY_ioctl:
	8357	LOP(OP_IOCTL,XTERM);
	8358
	8359	case KEY_isa:
	8360	NCRop(OP_ISA);
	8361
	8362	case KEY_join:
	8363	LOP(OP_JOIN,XTERM);
	8364
	8365	case KEY_keys:
	8366	UNI(OP_KEYS);
	8367
	8368	case KEY_kill:
	8369	LOP(OP_KILL,XTERM);
	8370
	8371	case KEY_last:
	8372	LOOPX(OP_LAST);
	8373
	8374	case KEY_lc:
	8375	UNI(OP_LC);
	8376
	8377	case KEY_lcfirst:
	8378	UNI(OP_LCFIRST);
	8379
	8380	case KEY_local:
	8381	OPERATOR(KW_LOCAL);
	8382
	8383	case KEY_length:
	8384	UNI(OP_LENGTH);
	8385
	8386	case KEY_lt:
	8387	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
	8388	return REPORT(0);
	8389	ChRop(OP_SLT);
	8390
	8391	case KEY_le:
	8392	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
	8393	return REPORT(0);
	8394	ChRop(OP_SLE);
	8395
	8396	case KEY_localtime:
	8397	UNI(OP_LOCALTIME);
	8398
	8399	case KEY_log:
	8400	UNI(OP_LOG);
	8401
	8402	case KEY_link:
	8403	LOP(OP_LINK,XTERM);
	8404
	8405	case KEY_listen:
	8406	LOP(OP_LISTEN,XTERM);
	8407
	8408	case KEY_lock:
	8409	UNI(OP_LOCK);
	8410
	8411	case KEY_lstat:
	8412	UNI(OP_LSTAT);
	8413
	8414	case KEY_m:
	8415	s = scan_pat(s,OP_MATCH);
	8416	TERM(sublex_start());
	8417
	8418	case KEY_map:
	8419	LOP(OP_MAPSTART, XREF);
	8420
	8421	case KEY_mkdir:
	8422	LOP(OP_MKDIR,XTERM);
	8423
	8424	case KEY_msgctl:
	8425	LOP(OP_MSGCTL,XTERM);
	8426
	8427	case KEY_msgget:
	8428	LOP(OP_MSGGET,XTERM);
	8429
	8430	case KEY_msgrcv:
	8431	LOP(OP_MSGRCV,XTERM);
	8432
	8433	case KEY_msgsnd:
	8434	LOP(OP_MSGSND,XTERM);
	8435
	8436	case KEY_our:
	8437	case KEY_my:
	8438	case KEY_state:
	8439	return yyl_my(aTHX_ s, key);
	8440
	8441	case KEY_next:
	8442	LOOPX(OP_NEXT);
	8443
	8444	case KEY_ne:
	8445	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
	8446	return REPORT(0);
	8447	ChEop(OP_SNE);
	8448
	8449	case KEY_no:
	8450	s = tokenize_use(0, s);
	8451	TOKEN(KW_USE_or_NO);
	8452
	8453	case KEY_not:
	8454	if (s == '(' \|\| (s = skipspace(s), s == '('))
	8455	FUN1(OP_NOT);
	8456	else {
	8457	if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
	8458	PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
	8459	OPERATOR(NOTOP);
	8460	}
	8461
	8462	case KEY_open:
	8463	s = skipspace(s);
	8464	if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
	8465	const char *t;
	8466	char *d = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
	8467	for (t=d; isSPACE(*t);)
	8468	t++;
	8469	if ( t && memCHRs("\|&+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
	8470	/* [perl #16184] */
	8471	&& !(t[0] == '=' && t[1] == '>')
	8472	&& !(t[0] == ':' && t[1] == ':')
	8473	&& !keyword(s, d-s, 0)
	8474	) {
	8475	Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
	8476	"Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
	8477	UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
	8478	}
	8479	}
	8480	LOP(OP_OPEN,XTERM);
	8481
	8482	case KEY_or:
	8483	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
	8484	return REPORT(0);
	8485	pl_yylval.ival = OP_OR;
	8486	OPERATOR(OROP);
	8487
	8488	case KEY_ord:
	8489	UNI(OP_ORD);
	8490
	8491	case KEY_oct:
	8492	UNI(OP_OCT);
	8493
	8494	case KEY_opendir:
	8495	LOP(OP_OPEN_DIR,XTERM);
	8496
	8497	case KEY_print:
	8498	checkcomma(s,PL_tokenbuf,"filehandle");
	8499	LOP(OP_PRINT,XREF);
	8500
	8501	case KEY_printf:
	8502	checkcomma(s,PL_tokenbuf,"filehandle");
	8503	LOP(OP_PRTF,XREF);
	8504
	8505	case KEY_prototype:
	8506	UNI(OP_PROTOTYPE);
	8507
	8508	case KEY_push:
	8509	LOP(OP_PUSH,XTERM);
	8510
	8511	case KEY_pop:
	8512	UNIDOR(OP_POP);
	8513
	8514	case KEY_pos:
	8515	UNIDOR(OP_POS);
	8516
	8517	case KEY_pack:
	8518	LOP(OP_PACK,XTERM);
	8519
	8520	case KEY_package:
	8521	s = force_word(s,BAREWORD,FALSE,TRUE);
	8522	s = skipspace(s);
	8523	s = force_strict_version(s);
	8524	PREBLOCK(KW_PACKAGE);
	8525
	8526	case KEY_pipe:
	8527	LOP(OP_PIPE_OP,XTERM);
	8528
	8529	case KEY_q:
	8530	s = scan_str(s,FALSE,FALSE,FALSE,NULL);
	8531	if (!s)
	8532	missingterm(NULL, 0);
	8533	COPLINE_SET_FROM_MULTI_END;
	8534	pl_yylval.ival = OP_CONST;
	8535	TERM(sublex_start());
	8536
	8537	case KEY_quotemeta:
	8538	UNI(OP_QUOTEMETA);
	8539
	8540	case KEY_qw:
	8541	return yyl_qw(aTHX_ s, len);
	8542
	8543	case KEY_qq:
	8544	s = scan_str(s,FALSE,FALSE,FALSE,NULL);
	8545	if (!s)
	8546	missingterm(NULL, 0);
	8547	pl_yylval.ival = OP_STRINGIFY;
	8548	if (SvIVX(PL_lex_stuff) == '\'')
	8549	SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
	8550	TERM(sublex_start());
	8551
	8552	case KEY_qr:
	8553	s = scan_pat(s,OP_QR);
	8554	TERM(sublex_start());
	8555
	8556	case KEY_qx:
	8557	s = scan_str(s,FALSE,FALSE,FALSE,NULL);
	8558	if (!s)
	8559	missingterm(NULL, 0);
	8560	pl_yylval.ival = OP_BACKTICK;
	8561	TERM(sublex_start());
	8562
	8563	case KEY_return:
	8564	OLDLOP(OP_RETURN);
	8565
	8566	case KEY_require:
	8567	return yyl_require(aTHX_ s, orig_keyword);
	8568
	8569	case KEY_reset:
	8570	UNI(OP_RESET);
	8571
	8572	case KEY_redo:
	8573	LOOPX(OP_REDO);
	8574
	8575	case KEY_rename:
	8576	LOP(OP_RENAME,XTERM);
	8577
	8578	case KEY_rand:
	8579	UNI(OP_RAND);
	8580
	8581	case KEY_rmdir:
	8582	UNI(OP_RMDIR);
	8583
	8584	case KEY_rindex:
	8585	LOP(OP_RINDEX,XTERM);
	8586
	8587	case KEY_read:
	8588	LOP(OP_READ,XTERM);
	8589
	8590	case KEY_readdir:
	8591	UNI(OP_READDIR);
	8592
	8593	case KEY_readline:
	8594	UNIDOR(OP_READLINE);
	8595
	8596	case KEY_readpipe:
	8597	UNIDOR(OP_BACKTICK);
	8598
	8599	case KEY_rewinddir:
	8600	UNI(OP_REWINDDIR);
	8601
	8602	case KEY_recv:
	8603	LOP(OP_RECV,XTERM);
	8604
	8605	case KEY_reverse:
	8606	LOP(OP_REVERSE,XTERM);
	8607
	8608	case KEY_readlink:
	8609	UNIDOR(OP_READLINK);
	8610
	8611	case KEY_ref:
	8612	UNI(OP_REF);
	8613
	8614	case KEY_s:
	8615	s = scan_subst(s);
	8616	if (pl_yylval.opval)
	8617	TERM(sublex_start());
	8618	else
	8619	TOKEN(1); /* force error */
	8620
	8621	case KEY_say:
	8622	checkcomma(s,PL_tokenbuf,"filehandle");
	8623	LOP(OP_SAY,XREF);
	8624
	8625	case KEY_chomp:
	8626	UNI(OP_CHOMP);
	8627
	8628	case KEY_scalar:
	8629	UNI(OP_SCALAR);
	8630
	8631	case KEY_select:
	8632	LOP(OP_SELECT,XTERM);
	8633
	8634	case KEY_seek:
	8635	LOP(OP_SEEK,XTERM);
	8636
	8637	case KEY_semctl:
	8638	LOP(OP_SEMCTL,XTERM);
	8639
	8640	case KEY_semget:
	8641	LOP(OP_SEMGET,XTERM);
	8642
	8643	case KEY_semop:
	8644	LOP(OP_SEMOP,XTERM);
	8645
	8646	case KEY_send:
	8647	LOP(OP_SEND,XTERM);
	8648
	8649	case KEY_setpgrp:
	8650	LOP(OP_SETPGRP,XTERM);
	8651
	8652	case KEY_setpriority:
	8653	LOP(OP_SETPRIORITY,XTERM);
	8654
	8655	case KEY_sethostent:
	8656	UNI(OP_SHOSTENT);
	8657
	8658	case KEY_setnetent:
	8659	UNI(OP_SNETENT);
	8660
	8661	case KEY_setservent:
	8662	UNI(OP_SSERVENT);
	8663
	8664	case KEY_setprotoent:
	8665	UNI(OP_SPROTOENT);
	8666
	8667	case KEY_setpwent:
	8668	FUN0(OP_SPWENT);
	8669
	8670	case KEY_setgrent:
	8671	FUN0(OP_SGRENT);
	8672
	8673	case KEY_seekdir:
	8674	LOP(OP_SEEKDIR,XTERM);
	8675
	8676	case KEY_setsockopt:
	8677	LOP(OP_SSOCKOPT,XTERM);
	8678
	8679	case KEY_shift:
	8680	UNIDOR(OP_SHIFT);
	8681
	8682	case KEY_shmctl:
	8683	LOP(OP_SHMCTL,XTERM);
	8684
	8685	case KEY_shmget:
	8686	LOP(OP_SHMGET,XTERM);
	8687
	8688	case KEY_shmread:
	8689	LOP(OP_SHMREAD,XTERM);
	8690
	8691	case KEY_shmwrite:
	8692	LOP(OP_SHMWRITE,XTERM);
	8693
	8694	case KEY_shutdown:
	8695	LOP(OP_SHUTDOWN,XTERM);
	8696
	8697	case KEY_sin:
	8698	UNI(OP_SIN);
	8699
	8700	case KEY_sleep:
	8701	UNI(OP_SLEEP);
	8702
	8703	case KEY_socket:
	8704	LOP(OP_SOCKET,XTERM);
	8705
	8706	case KEY_socketpair:
	8707	LOP(OP_SOCKPAIR,XTERM);
	8708
	8709	case KEY_sort:
	8710	checkcomma(s,PL_tokenbuf,"subroutine name");
	8711	s = skipspace(s);
	8712	PL_expect = XTERM;
	8713	s = force_word(s,BAREWORD,TRUE,TRUE);
	8714	LOP(OP_SORT,XREF);
	8715
	8716	case KEY_split:
	8717	LOP(OP_SPLIT,XTERM);
	8718
	8719	case KEY_sprintf:
	8720	LOP(OP_SPRINTF,XTERM);
	8721
	8722	case KEY_splice:
	8723	LOP(OP_SPLICE,XTERM);
	8724
	8725	case KEY_sqrt:
	8726	UNI(OP_SQRT);
	8727
	8728	case KEY_srand:
	8729	UNI(OP_SRAND);
	8730
	8731	case KEY_stat:
	8732	UNI(OP_STAT);
	8733
	8734	case KEY_study:
	8735	UNI(OP_STUDY);
	8736
	8737	case KEY_substr:
	8738	LOP(OP_SUBSTR,XTERM);
	8739
	8740	case KEY_method:
	8741	/* For now we just treat 'method' identical to 'sub' plus a warning */
	8742	Perl_ck_warner_d(aTHX_
	8743	packWARN(WARN_EXPERIMENTAL__CLASS), "method is experimental");
	8744	return yyl_sub(aTHX_ s, KEY_method);
	8745
	8746	case KEY_format:
	8747	case KEY_sub:
	8748	return yyl_sub(aTHX_ s, key);
	8749
	8750	case KEY_system:
	8751	LOP(OP_SYSTEM,XREF);
	8752
	8753	case KEY_symlink:
	8754	LOP(OP_SYMLINK,XTERM);
	8755
	8756	case KEY_syscall:
	8757	LOP(OP_SYSCALL,XTERM);
	8758
	8759	case KEY_sysopen:
	8760	LOP(OP_SYSOPEN,XTERM);
	8761
	8762	case KEY_sysseek:
	8763	LOP(OP_SYSSEEK,XTERM);
	8764
	8765	case KEY_sysread:
	8766	LOP(OP_SYSREAD,XTERM);
	8767
	8768	case KEY_syswrite:
	8769	LOP(OP_SYSWRITE,XTERM);
	8770
	8771	case KEY_tr:
	8772	case KEY_y:
	8773	s = scan_trans(s);
	8774	TERM(sublex_start());
	8775
	8776	case KEY_tell:
	8777	UNI(OP_TELL);
	8778
	8779	case KEY_telldir:
	8780	UNI(OP_TELLDIR);
	8781
	8782	case KEY_tie:
	8783	LOP(OP_TIE,XTERM);
	8784
	8785	case KEY_tied:
	8786	UNI(OP_TIED);
	8787
	8788	case KEY_time:
	8789	FUN0(OP_TIME);
	8790
	8791	case KEY_times:
	8792	FUN0(OP_TMS);
	8793
	8794	case KEY_truncate:
	8795	LOP(OP_TRUNCATE,XTERM);
	8796
	8797	case KEY_try:
	8798	pl_yylval.ival = CopLINE(PL_curcop);
	8799	Perl_ck_warner_d(aTHX_
	8800	packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
	8801	PREBLOCK(KW_TRY);
	8802
	8803	case KEY_uc:
	8804	UNI(OP_UC);
	8805
	8806	case KEY_ucfirst:
	8807	UNI(OP_UCFIRST);
	8808
	8809	case KEY_untie:
	8810	UNI(OP_UNTIE);
	8811
	8812	case KEY_until:
	8813	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
	8814	return REPORT(0);
	8815	pl_yylval.ival = CopLINE(PL_curcop);
	8816	OPERATOR(KW_UNTIL);
	8817
	8818	case KEY_unless:
	8819	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
	8820	return REPORT(0);
	8821	pl_yylval.ival = CopLINE(PL_curcop);
	8822	OPERATOR(KW_UNLESS);
	8823
	8824	case KEY_unlink:
	8825	LOP(OP_UNLINK,XTERM);
	8826
	8827	case KEY_undef:
	8828	UNIDOR(OP_UNDEF);
	8829
	8830	case KEY_unpack:
	8831	LOP(OP_UNPACK,XTERM);
	8832
	8833	case KEY_utime:
	8834	LOP(OP_UTIME,XTERM);
	8835
	8836	case KEY_umask:
	8837	UNIDOR(OP_UMASK);
	8838
	8839	case KEY_unshift:
	8840	LOP(OP_UNSHIFT,XTERM);
	8841
	8842	case KEY_use:
	8843	s = tokenize_use(1, s);
	8844	TOKEN(KW_USE_or_NO);
	8845
	8846	case KEY_values:
	8847	UNI(OP_VALUES);
	8848
	8849	case KEY_vec:
	8850	LOP(OP_VEC,XTERM);
	8851
	8852	case KEY_when:
	8853	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
	8854	return REPORT(0);
	8855	pl_yylval.ival = CopLINE(PL_curcop);
	8856	Perl_ck_warner_d(aTHX_
	8857	packWARN(WARN_DEPRECATED__SMARTMATCH),
	8858	"when is deprecated");
	8859	OPERATOR(KW_WHEN);
	8860
	8861	case KEY_while:
	8862	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
	8863	return REPORT(0);
	8864	pl_yylval.ival = CopLINE(PL_curcop);
	8865	OPERATOR(KW_WHILE);
	8866
	8867	case KEY_warn:
	8868	PL_hints \|= HINT_BLOCK_SCOPE;
	8869	LOP(OP_WARN,XTERM);
	8870
	8871	case KEY_wait:
	8872	FUN0(OP_WAIT);
	8873
	8874	case KEY_waitpid:
	8875	LOP(OP_WAITPID,XTERM);
	8876
	8877	case KEY_wantarray:
	8878	FUN0(OP_WANTARRAY);
	8879
	8880	case KEY_write:
	8881	/* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
	8882	* we use the same number on EBCDIC */
	8883	gv_fetchpvs("\x0C", GV_ADD\|GV_NOTQUAL, SVt_PV);
	8884	UNI(OP_ENTERWRITE);
	8885
	8886	case KEY_x:
	8887	if (PL_expect == XOPERATOR) {
	8888	if (*s == '=' && !PL_lex_allbrackets
	8889	&& PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
	8890	{
	8891	return REPORT(0);
	8892	}
	8893	Mop(OP_REPEAT);
	8894	}
	8895	check_uni();
	8896	return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
	8897
	8898	case KEY_xor:
	8899	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
	8900	return REPORT(0);
	8901	pl_yylval.ival = OP_XOR;
	8902	OPERATOR(OROP);
	8903	}
	8904	}
	8905
	8906	static int
	8907	yyl_key_core(pTHX_ char *s, STRLEN len, struct code c)
	8908	{
	8909	I32 key = 0;
	8910	I32 orig_keyword = 0;
	8911	STRLEN olen = len;
	8912	char *d = s;
	8913	s += 2;
	8914	s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
	8915	if ((*s == ':' && s[1] == ':')
	8916	\|\| (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
	8917	{
	8918	Copy(PL_bufptr, PL_tokenbuf, olen, char);
	8919	return yyl_just_a_word(aTHX_ d, olen, 0, c);
	8920	}
	8921	if (!key)
	8922	Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
	8923	UTF8fARG(UTF, len, PL_tokenbuf));
	8924	if (key < 0)
	8925	key = -key;
	8926	else if (key == KEY_require \|\| key == KEY_do
	8927	\|\| key == KEY_glob)
	8928	/* that's a way to remember we saw "CORE::" */
	8929	orig_keyword = key;
	8930
	8931	/* Known to be a reserved word at this point */
	8932	return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
	8933	}
	8934
	8935	struct Perl_custom_infix_result {
	8936	struct Perl_custom_infix *def;
	8937	SV *parsedata;
	8938	};
	8939
	8940	static enum yytokentype tokentype_for_plugop(struct Perl_custom_infix *def)
	8941	{
	8942	enum Perl_custom_infix_precedence prec = def->prec;
	8943	if(prec <= INFIX_PREC_LOW)
	8944	return PLUGIN_LOW_OP;
	8945	if(prec <= INFIX_PREC_LOGICAL_OR_LOW)
	8946	return PLUGIN_LOGICAL_OR_LOW_OP;
	8947	if(prec <= INFIX_PREC_LOGICAL_AND_LOW)
	8948	return PLUGIN_LOGICAL_AND_LOW_OP;
	8949	if(prec <= INFIX_PREC_ASSIGN)
	8950	return PLUGIN_ASSIGN_OP;
	8951	if(prec <= INFIX_PREC_LOGICAL_OR)
	8952	return PLUGIN_LOGICAL_OR_OP;
	8953	if(prec <= INFIX_PREC_LOGICAL_AND)
	8954	return PLUGIN_LOGICAL_AND_OP;
	8955	if(prec <= INFIX_PREC_REL)
	8956	return PLUGIN_REL_OP;
	8957	if(prec <= INFIX_PREC_ADD)
	8958	return PLUGIN_ADD_OP;
	8959	if(prec <= INFIX_PREC_MUL)
	8960	return PLUGIN_MUL_OP;
	8961	if(prec <= INFIX_PREC_POW)
	8962	return PLUGIN_POW_OP;
	8963	return PLUGIN_HIGH_OP;
	8964	}
	8965
	8966	OP *
	8967	Perl_build_infix_plugin(pTHX_ OP lhs, OP rhs, void *tokendata)
	8968	{
	8969	PERL_ARGS_ASSERT_BUILD_INFIX_PLUGIN;
	8970
	8971	struct Perl_custom_infix_result result = (struct Perl_custom_infix_result )tokendata;
	8972	SAVEFREEPV(result);
	8973	if(result->parsedata)
	8974	SAVEFREESV(result->parsedata);
	8975
	8976	return (*result->def->build_op)(aTHX_
	8977	&result->parsedata, lhs, rhs, result->def);
	8978	}
	8979
	8980	static int
	8981	yyl_keylookup(pTHX_ char s, GV gv)
	8982	{
	8983	STRLEN len;
	8984	bool anydelim;
	8985	I32 key;
	8986	struct code c = no_code;
	8987	I32 orig_keyword = 0;
	8988	char *d;
	8989
	8990	c.gv = gv;
	8991
	8992	PL_bufptr = s;
	8993	s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
	8994
	8995	/* Some keywords can be followed by any delimiter, including ':' */
	8996	anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
	8997
	8998	/* x::* is just a word, unless x is "CORE" */
	8999	if (!anydelim && *s == ':' && s[1] == ':') {
	9000	if (memEQs(PL_tokenbuf, len, "CORE"))
	9001	return yyl_key_core(aTHX_ s, len, c);
	9002	return yyl_just_a_word(aTHX_ s, len, 0, c);
	9003	}
	9004
	9005	d = s;
	9006	while (d < PL_bufend && isSPACE(*d))
	9007	d++; /* no comments skipped here, or s### is misparsed */
	9008
	9009	/* Is this a word before a => operator? */
	9010	if (*d == '=' && d[1] == '>') {
	9011	return yyl_fatcomma(aTHX_ s, len);
	9012	}
	9013
	9014	/* Check for plugged-in keyword */
	9015	{
	9016	OP *o;
	9017	int result;
	9018	char *saved_bufptr = PL_bufptr;
	9019	PL_bufptr = s;
	9020	result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
	9021	s = PL_bufptr;
	9022	if (result == KEYWORD_PLUGIN_DECLINE) {
	9023	/* not a plugged-in keyword */
	9024	PL_bufptr = saved_bufptr;
	9025	} else if (result == KEYWORD_PLUGIN_STMT) {
	9026	pl_yylval.opval = o;
	9027	CLINE;
	9028	if (!PL_nexttoke) PL_expect = XSTATE;
	9029	return REPORT(PLUGSTMT);
	9030	} else if (result == KEYWORD_PLUGIN_EXPR) {
	9031	pl_yylval.opval = o;
	9032	CLINE;
	9033	if (!PL_nexttoke) PL_expect = XOPERATOR;
	9034	return REPORT(PLUGEXPR);
	9035	} else {
	9036	Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf);
	9037	}
	9038	}
	9039
	9040	/* Check for plugged-in named operator */
	9041	if(PLUGINFIX_IS_ENABLED) {
	9042	struct Perl_custom_infix *def;
	9043	STRLEN result;
	9044	result = PL_infix_plugin(aTHX_ PL_tokenbuf, len, &def);
	9045	if(result) {
	9046	if(result != len)
	9047	Perl_croak(aTHX_ "Bad infix plugin result (%zd) - did not consume entire identifier <%s>\n",
	9048	result, PL_tokenbuf);
	9049	PL_bufptr = s = d;
	9050	struct Perl_custom_infix_result *result;
	9051	Newx(result, 1, struct Perl_custom_infix_result);
	9052	result->def = def;
	9053	result->parsedata = NULL;
	9054	if(def->parse) {
	9055	(*def->parse)(aTHX_ &result->parsedata, def);
	9056	s = PL_bufptr; /* restore local s variable */
	9057	}
	9058	pl_yylval.pval = result;
	9059	CLINE;
	9060	OPERATOR(tokentype_for_plugop(def));
	9061	}
	9062	}
	9063
	9064	/* Is this a label? */
	9065	if (!anydelim && PL_expect == XSTATE
	9066	&& d < PL_bufend && d == ':' && (d + 1) != ':') {
	9067	s = d + 1;
	9068	pl_yylval.opval =
	9069	newSVOP(OP_CONST, 0,
	9070	newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
	9071	CLINE;
	9072	TOKEN(LABEL);
	9073	}
	9074
	9075	/* Check for lexical sub */
	9076	if (PL_expect != XOPERATOR) {
	9077	char tmpbuf[sizeof PL_tokenbuf + 1];
	9078	*tmpbuf = '&';
	9079	Copy(PL_tokenbuf, tmpbuf+1, len, char);
	9080	c.off = pad_findmy_pvn(tmpbuf, len+1, 0);
	9081	if (c.off != NOT_IN_PAD) {
	9082	assert(c.off); /* we assume this is boolean-true below */
	9083	if (PAD_COMPNAME_FLAGS_isOUR(c.off)) {
	9084	HV * const stash = PAD_COMPNAME_OURSTASH(c.off);
	9085	HEK * const stashname = HvNAME_HEK(stash);
	9086	c.sv = newSVhek(stashname);
	9087	sv_catpvs(c.sv, "::");
	9088	sv_catpvn_flags(c.sv, PL_tokenbuf, len,
	9089	(UTF ? SV_CATUTF8 : SV_CATBYTES));
	9090	c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT \| SvUTF8(c.sv),
	9091	SVt_PVCV);
	9092	c.off = 0;
	9093	if (!c.gv) {
	9094	ASSUME(c.sv && SvREFCNT(c.sv) == 1);
	9095	SvREFCNT_dec(c.sv);
	9096	c.sv = NULL;
	9097	return yyl_just_a_word(aTHX_ s, len, 0, c);
	9098	}
	9099	}
	9100	else {
	9101	c.rv2cv_op = newOP(OP_PADANY, 0);
	9102	c.rv2cv_op->op_targ = c.off;
	9103	c.cv = find_lexical_cv(c.off);
	9104	}
	9105	c.lex = TRUE;
	9106	return yyl_just_a_word(aTHX_ s, len, 0, c);
	9107	}
	9108	c.off = 0;
	9109	}
	9110
	9111	/* Check for built-in keyword */
	9112	key = keyword(PL_tokenbuf, len, 0);
	9113
	9114	if (key < 0)
	9115	key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp);
	9116
	9117	if (key && key != KEY___DATA__ && key != KEY___END__
	9118	&& (!anydelim \|\| *s != '#')) {
	9119	/* no override, and not s### either; skipspace is safe here
	9120	* check for => on following line */
	9121	bool arrow;
	9122	STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
	9123	STRLEN soff = s - SvPVX(PL_linestr);
	9124	s = peekspace(s);
	9125	arrow = *s == '=' && s[1] == '>';
	9126	PL_bufptr = SvPVX(PL_linestr) + bufoff;
	9127	s = SvPVX(PL_linestr) + soff;
	9128	if (arrow)
	9129	return yyl_fatcomma(aTHX_ s, len);
	9130	}
	9131
	9132	return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
	9133	}
	9134
	9135	static int
	9136	yyl_try(pTHX_ char *s)
	9137	{
	9138	char *d;
	9139	GV *gv = NULL;
	9140	int tok;
	9141
	9142	retry:
	9143	/* Check for plugged-in symbolic operator */
	9144	if(PLUGINFIX_IS_ENABLED && isPLUGINFIX_FIRST(*s)) {
	9145	struct Perl_custom_infix *def;
	9146	char s_end = s, d = PL_tokenbuf;
	9147	STRLEN len;
	9148
	9149	/* Copy the longest sequence of isPLUGINFIX() chars into PL_tokenbuf */
	9150	while(s_end < PL_bufend && d < PL_tokenbuf+sizeof(PL_tokenbuf)-1 && isPLUGINFIX(*s_end))
	9151	d++ = s_end++;
	9152	*d = '\0';
	9153
	9154	if((len = (*PL_infix_plugin)(aTHX_ PL_tokenbuf, s_end - s, &def))) {
	9155	s += len;
	9156	struct Perl_custom_infix_result *result;
	9157	Newx(result, 1, struct Perl_custom_infix_result);
	9158	result->def = def;
	9159	result->parsedata = NULL;
	9160	if(def->parse) {
	9161	PL_bufptr = s;
	9162	(*def->parse)(aTHX_ &result->parsedata, def);
	9163	s = PL_bufptr; /* restore local s variable */
	9164	}
	9165	pl_yylval.pval = result;
	9166	CLINE;
	9167	OPERATOR(tokentype_for_plugop(def));
	9168	}
	9169	}
	9170
	9171	switch (*s) {
	9172	default:
	9173	if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
	9174	if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
	9175	return tok;
	9176	goto retry_bufptr;
	9177	}
	9178	yyl_croak_unrecognised(aTHX_ s);
	9179
	9180	case 4:
	9181	case 26:
	9182	/* emulate EOF on ^D or ^Z */
	9183	if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY)
	9184	return tok;
	9185	retry_bufptr:
	9186	s = PL_bufptr;
	9187	goto retry;
	9188
	9189	case 0:
	9190	if ((!PL_rsfp \|\| PL_lex_inwhat)
	9191	&& (!PL_parser->filtered \|\| s+1 < PL_bufend)) {
	9192	PL_last_uni = 0;
	9193	PL_last_lop = 0;
	9194	if (PL_lex_brackets
	9195	&& PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
	9196	{
	9197	yyerror((const char *)
	9198	(PL_lex_formbrack
	9199	? "Format not terminated"
	9200	: "Missing right curly or square bracket"));
	9201	}
	9202	DEBUG_T({
	9203	PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
	9204	});
	9205	TOKEN(0);
	9206	}
	9207	if (s++ < PL_bufend)
	9208	goto retry; /* ignore stray nulls */
	9209	PL_last_uni = 0;
	9210	PL_last_lop = 0;
	9211	if (!PL_in_eval && !PL_preambled) {
	9212	PL_preambled = TRUE;
	9213	if (PL_perldb) {
	9214	/* Generate a string of Perl code to load the debugger.
	9215	* If PERL5DB is set, it will return the contents of that,
	9216	* otherwise a compile-time require of perl5db.pl. */
	9217
	9218	const char * const pdb = PerlEnv_getenv("PERL5DB");
	9219
	9220	if (pdb) {
	9221	sv_setpv(PL_linestr, pdb);
	9222	sv_catpvs(PL_linestr,";");
	9223	} else {
	9224	SETERRNO(0,SS_NORMAL);
	9225	sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
	9226	}
	9227	PL_parser->preambling = CopLINE(PL_curcop);
	9228	} else
	9229	SvPVCLEAR(PL_linestr);
	9230	if (PL_preambleav) {
	9231	SV **svp = AvARRAY(PL_preambleav);
	9232	SV **const end = svp + AvFILLp(PL_preambleav);
	9233	while(svp <= end) {
	9234	sv_catsv(PL_linestr, *svp);
	9235	++svp;
	9236	sv_catpvs(PL_linestr, ";");
	9237	}
	9238	SvREFCNT_dec(MUTABLE_SV(PL_preambleav));
	9239	PL_preambleav = NULL;
	9240	}
	9241	if (PL_minus_E)
	9242	sv_catpvs(PL_linestr,
	9243	"use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';");
	9244	if (PL_minus_n \|\| PL_minus_p) {
	9245	sv_catpvs(PL_linestr, "LINE: while (<>) {"/}/);
	9246	if (PL_minus_l)
	9247	sv_catpvs(PL_linestr,"chomp;");
	9248	if (PL_minus_a) {
	9249	if (PL_minus_F) {
	9250	if ( ( *PL_splitstr == '/'
	9251	\|\| *PL_splitstr == '\''
	9252	\|\| *PL_splitstr == '"')
	9253	&& strchr(PL_splitstr + 1, *PL_splitstr))
	9254	{
	9255	/* strchr is ok, because -F pattern can't contain
	9256	* embedded NULs */
	9257	Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
	9258	}
	9259	else {
	9260	/* "q\0${splitstr}\0" is legal perl. Yes, even NUL
	9261	bytes can be used as quoting characters. :-) */
	9262	const char *splits = PL_splitstr;
	9263	sv_catpvs(PL_linestr, "our @F=split(q\0");
	9264	do {
	9265	/* Need to \ \s */
	9266	if (*splits == '\\')
	9267	sv_catpvn(PL_linestr, splits, 1);
	9268	sv_catpvn(PL_linestr, splits, 1);
	9269	} while (*splits++);
	9270	/* This loop will embed the trailing NUL of
	9271	PL_linestr as the last thing it does before
	9272	terminating. */
	9273	sv_catpvs(PL_linestr, ");");
	9274	}
	9275	}
	9276	else
	9277	sv_catpvs(PL_linestr,"our @F=split(' ');");
	9278	}
	9279	}
	9280	sv_catpvs(PL_linestr, "\n");
	9281	PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
	9282	PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
	9283	PL_last_lop = PL_last_uni = NULL;
	9284	if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
	9285	update_debugger_info(PL_linestr, NULL, 0);
	9286	goto retry;
	9287	}
	9288	if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY)
	9289	return tok;
	9290	goto retry_bufptr;
	9291
	9292	case '\r':
	9293	#ifdef PERL_STRICT_CR
	9294	Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
	9295	Perl_croak(aTHX_
	9296	"\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
	9297	#endif
	9298	case ' ': case '\t': case '\f': case '\v':
	9299	s++;
	9300	goto retry;
	9301
	9302	case '#':
	9303	case '\n': {
	9304	const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s);
	9305	if (needs_semicolon)
	9306	TOKEN(PERLY_SEMICOLON);
	9307	else
	9308	goto retry;
	9309	}
	9310
	9311	case '-':
	9312	return yyl_hyphen(aTHX_ s);
	9313
	9314	case '+':
	9315	return yyl_plus(aTHX_ s);
	9316
	9317	case '*':
	9318	return yyl_star(aTHX_ s);
	9319
	9320	case '%':
	9321	return yyl_percent(aTHX_ s);
	9322
	9323	case '^':
	9324	return yyl_caret(aTHX_ s);
	9325
	9326	case '[':
	9327	return yyl_leftsquare(aTHX_ s);
	9328
	9329	case '~':
	9330	return yyl_tilde(aTHX_ s);
	9331
	9332	case ',':
	9333	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
	9334	TOKEN(0);
	9335	s++;
	9336	OPERATOR(PERLY_COMMA);
	9337	case ':':
	9338	if (s[1] == ':')
	9339	return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
	9340	return yyl_colon(aTHX_ s + 1);
	9341
	9342	case '(':
	9343	return yyl_leftparen(aTHX_ s + 1);
	9344
	9345	case ';':
	9346	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
	9347	TOKEN(0);
	9348	CLINE;
	9349	s++;
	9350	PL_expect = XSTATE;
	9351	TOKEN(PERLY_SEMICOLON);
	9352
	9353	case ')':
	9354	return yyl_rightparen(aTHX_ s);
	9355
	9356	case ']':
	9357	return yyl_rightsquare(aTHX_ s);
	9358
	9359	case '{':
	9360	return yyl_leftcurly(aTHX_ s + 1, 0);
	9361
	9362	case '}':
	9363	if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
	9364	TOKEN(0);
	9365	return yyl_rightcurly(aTHX_ s, 0);
	9366
	9367	case '&':
	9368	return yyl_ampersand(aTHX_ s);
	9369
	9370	case '\|':
	9371	return yyl_verticalbar(aTHX_ s);
	9372
	9373	case '=':
	9374	if (s[1] == '=' && (s == PL_linestart \|\| s[-1] == '\n')
	9375	&& memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "====="))
	9376	{
	9377	s = vcs_conflict_marker(s + 7);
	9378	goto retry;
	9379	}
	9380
	9381	s++;
	9382	{
	9383	const char tmp = *s++;
	9384	if (tmp == '=') {
	9385	if (!PL_lex_allbrackets
	9386	&& PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
	9387	{
	9388	s -= 2;
	9389	TOKEN(0);
	9390	}
	9391	ChEop(OP_EQ);
	9392	}
	9393	if (tmp == '>') {
	9394	if (!PL_lex_allbrackets
	9395	&& PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
	9396	{
	9397	s -= 2;
	9398	TOKEN(0);
	9399	}
	9400	OPERATOR(PERLY_COMMA);
	9401	}
	9402	if (tmp == '~')
	9403	PMop(OP_MATCH);
	9404	if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
	9405	&& memCHRs("+-*/%.^&\|<",tmp))
	9406	Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
	9407	"Reversed %c= operator",(int)tmp);
	9408	s--;
	9409	if (PL_expect == XSTATE
	9410	&& isALPHA(tmp)
	9411	&& (s == PL_linestart+1 \|\| s[-2] == '\n') )
	9412	{
	9413	if ( (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
	9414	\|\| PL_lex_state != LEX_NORMAL)
	9415	{
	9416	d = PL_bufend;
	9417	while (s < d) {
	9418	if (*s++ == '\n') {
	9419	incline(s, PL_bufend);
	9420	if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
	9421	{
	9422	s = (char *) memchr(s,'\n', d - s);
	9423	if (s)
	9424	s++;
	9425	else
	9426	s = d;
	9427	incline(s, PL_bufend);
	9428	goto retry;
	9429	}
	9430	}
	9431	}
	9432	goto retry;
	9433	}
	9434	s = PL_bufend;
	9435	PL_parser->in_pod = 1;
	9436	goto retry;
	9437	}
	9438	}
	9439	if (PL_expect == XBLOCK) {
	9440	const char *t = s;
	9441	#ifdef PERL_STRICT_CR
	9442	while (SPACE_OR_TAB(*t))
	9443	#else
	9444	while (SPACE_OR_TAB(t) \|\| t == '\r')
	9445	#endif
	9446	t++;
	9447	if (t == '\n' \|\| t == '#') {
	9448	ENTER_with_name("lex_format");
	9449	SAVEI8(PL_parser->form_lex_state);
	9450	SAVEI32(PL_lex_formbrack);
	9451	PL_parser->form_lex_state = PL_lex_state;
	9452	PL_lex_formbrack = PL_lex_brackets + 1;
	9453	PL_parser->sub_error_count = PL_error_count;
	9454	return yyl_leftcurly(aTHX_ s, 1);
	9455	}
	9456	}
	9457	if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
	9458	s--;
	9459	TOKEN(0);
	9460	}
	9461	pl_yylval.ival = 0;
	9462	OPERATOR(ASSIGNOP);
	9463
	9464	case '!':
	9465	return yyl_bang(aTHX_ s + 1);
	9466
	9467	case '<':
	9468	if (s[1] == '<' && (s == PL_linestart \|\| s[-1] == '\n')
	9469	&& memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<"))
	9470	{
	9471	s = vcs_conflict_marker(s + 7);
	9472	goto retry;
	9473	}
	9474	return yyl_leftpointy(aTHX_ s);
	9475
	9476	case '>':
	9477	if (s[1] == '>' && (s == PL_linestart \|\| s[-1] == '\n')
	9478	&& memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>"))
	9479	{
	9480	s = vcs_conflict_marker(s + 7);
	9481	goto retry;
	9482	}
	9483	return yyl_rightpointy(aTHX_ s + 1);
	9484
	9485	case '$':
	9486	return yyl_dollar(aTHX_ s);
	9487
	9488	case '@':
	9489	return yyl_snail(aTHX_ s);
	9490
	9491	case '/': /* may be division, defined-or, or pattern */
	9492	return yyl_slash(aTHX_ s);
	9493
	9494	case '?': /* conditional */
	9495	s++;
	9496	if (!PL_lex_allbrackets
	9497	&& PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
	9498	{
	9499	s--;
	9500	TOKEN(0);
	9501	}
	9502	PL_lex_allbrackets++;
	9503	OPERATOR(PERLY_QUESTION_MARK);
	9504
	9505	case '.':
	9506	if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
	9507	#ifdef PERL_STRICT_CR
	9508	&& s[1] == '\n'
	9509	#else
	9510	&& (s[1] == '\n' \|\| (s[1] == '\r' && s[2] == '\n'))
	9511	#endif
	9512	&& (s == PL_linestart \|\| s[-1] == '\n') )
	9513	{
	9514	PL_expect = XSTATE;
	9515	/* formbrack==2 means dot seen where arguments expected */
	9516	return yyl_rightcurly(aTHX_ s, 2);
	9517	}
	9518	if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
	9519	s += 3;
	9520	OPERATOR(YADAYADA);
	9521	}
	9522	if (PL_expect == XOPERATOR \|\| !isDIGIT(s[1])) {
	9523	char tmp = *s++;
	9524	if (*s == tmp) {
	9525	if (!PL_lex_allbrackets
	9526	&& PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
	9527	{
	9528	s--;
	9529	TOKEN(0);
	9530	}
	9531	s++;
	9532	if (*s == tmp) {
	9533	s++;
	9534	pl_yylval.ival = OPf_SPECIAL;
	9535	}
	9536	else
	9537	pl_yylval.ival = 0;
	9538	OPERATOR(DOTDOT);
	9539	}
	9540	if (*s == '=' && !PL_lex_allbrackets
	9541	&& PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
	9542	{
	9543	s--;
	9544	TOKEN(0);
	9545	}
	9546	Aop(OP_CONCAT);
	9547	}
	9548	/* FALLTHROUGH */
	9549	case '0': case '1': case '2': case '3': case '4':
	9550	case '5': case '6': case '7': case '8': case '9':
	9551	s = scan_num(s, &pl_yylval);
	9552	DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
	9553	if (PL_expect == XOPERATOR)
	9554	no_op("Number",s);
	9555	TERM(THING);
	9556
	9557	case '\'':
	9558	return yyl_sglquote(aTHX_ s);
	9559
	9560	case '"':
	9561	return yyl_dblquote(aTHX_ s);
	9562
	9563	case '`':
	9564	return yyl_backtick(aTHX_ s);
	9565
	9566	case '\\':
	9567	return yyl_backslash(aTHX_ s + 1);
	9568
	9569	case 'v':
	9570	if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
	9571	char *start = s + 2;
	9572	while (isDIGIT(start) \|\| start == '_')
	9573	start++;
	9574	if (*start == '.' && isDIGIT(start[1])) {
	9575	s = scan_num(s, &pl_yylval);
	9576	TERM(THING);
	9577	}
	9578	else if ((*start == ':' && start[1] == ':')
	9579	\|\| (PL_expect == XSTATE && *start == ':')) {
	9580	if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
	9581	return tok;
	9582	goto retry_bufptr;
	9583	}
	9584	else if (PL_expect == XSTATE) {
	9585	d = start;
	9586	while (d < PL_bufend && isSPACE(*d)) d++;
	9587	if (*d == ':') {
	9588	if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
	9589	return tok;
	9590	goto retry_bufptr;
	9591	}
	9592	}
	9593	/* avoid v123abc() or $h{v1}, allow C<print v10;> */
	9594	if (!isALPHA(*start) && (PL_expect == XTERM
	9595	\|\| PL_expect == XREF \|\| PL_expect == XSTATE
	9596	\|\| PL_expect == XTERMORDORDOR)) {
	9597	GV *const gv = gv_fetchpvn_flags(s, start - s,
	9598	UTF ? SVf_UTF8 : 0, SVt_PVCV);
	9599	if (!gv) {
	9600	s = scan_num(s, &pl_yylval);
	9601	TERM(THING);
	9602	}
	9603	}
	9604	}
	9605	if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
	9606	return tok;
	9607	goto retry_bufptr;
	9608
	9609	case 'x':
	9610	if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
	9611	s++;
	9612	Mop(OP_REPEAT);
	9613	}
	9614	if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
	9615	return tok;
	9616	goto retry_bufptr;
	9617
	9618	case '_':
	9619	case 'a': case 'A':
	9620	case 'b': case 'B':
	9621	case 'c': case 'C':
	9622	case 'd': case 'D':
	9623	case 'e': case 'E':
	9624	case 'f': case 'F':
	9625	case 'g': case 'G':
	9626	case 'h': case 'H':
	9627	case 'i': case 'I':
	9628	case 'j': case 'J':
	9629	case 'k': case 'K':
	9630	case 'l': case 'L':
	9631	case 'm': case 'M':
	9632	case 'n': case 'N':
	9633	case 'o': case 'O':
	9634	case 'p': case 'P':
	9635	case 'q': case 'Q':
	9636	case 'r': case 'R':
	9637	case 's': case 'S':
	9638	case 't': case 'T':
	9639	case 'u': case 'U':
	9640	case 'V':
	9641	case 'w': case 'W':
	9642	case 'X':
	9643	case 'y': case 'Y':
	9644	case 'z': case 'Z':
	9645	if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
	9646	return tok;
	9647	goto retry_bufptr;
	9648	}
	9649	}
	9650
	9651
	9652	/*
	9653	yylex
	9654
	9655	Works out what to call the token just pulled out of the input
	9656	stream. The yacc parser takes care of taking the ops we return and
	9657	stitching them into a tree.
	9658
	9659	Returns:
	9660	The type of the next token
	9661
	9662	Structure:
	9663	Check if we have already built the token; if so, use it.
	9664	Switch based on the current state:
	9665	- if we have a case modifier in a string, deal with that
	9666	- handle other cases of interpolation inside a string
	9667	- scan the next line if we are inside a format
	9668	In the normal state, switch on the next character:
	9669	- default:
	9670	if alphabetic, go to key lookup
	9671	unrecognized character - croak
	9672	- 0/4/26: handle end-of-line or EOF
	9673	- cases for whitespace
	9674	- \n and #: handle comments and line numbers
	9675	- various operators, brackets and sigils
	9676	- numbers
	9677	- quotes
	9678	- 'v': vstrings (or go to key lookup)
	9679	- 'x' repetition operator (or go to key lookup)
	9680	- other ASCII alphanumerics (key lookup begins here):
	9681	word before => ?
	9682	keyword plugin
	9683	scan built-in keyword (but do nothing with it yet)
	9684	check for statement label
	9685	check for lexical subs
	9686	return yyl_just_a_word if there is one
	9687	see whether built-in keyword is overridden
	9688	switch on keyword number:
	9689	- default: return yyl_just_a_word:
	9690	not a built-in keyword; handle bareword lookup
	9691	disambiguate between method and sub call
	9692	fall back to bareword
	9693	- cases for built-in keywords
	9694	*/
	9695
	9696	int
	9697	Perl_yylex(pTHX)
	9698	{
	9699	char *s = PL_bufptr;
	9700
	9701	if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
	9702	const U8* first_bad_char_loc;
	9703	if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
	9704	PL_bufend - PL_bufptr,
	9705	&first_bad_char_loc)))
	9706	{
	9707	_force_out_malformed_utf8_message(first_bad_char_loc,
	9708	(U8 *) PL_bufend,
	9709	0,
	9710	1 /* 1 means die */ );
	9711	NOT_REACHED; /* NOTREACHED */
	9712	}
	9713	PL_parser->recheck_utf8_validity = FALSE;
	9714	}
	9715	DEBUG_T( {
	9716	SV* tmp = newSVpvs("");
	9717	PerlIO_printf(Perl_debug_log, "### %" LINE_Tf ":LEX_%s/X%s %s\n",
	9718	CopLINE(PL_curcop),
	9719	lex_state_names[PL_lex_state],
	9720	exp_name[PL_expect],
	9721	pv_display(tmp, s, strlen(s), 0, 60));
	9722	SvREFCNT_dec(tmp);
	9723	} );
	9724
	9725	/* when we've already built the next token, just pull it out of the queue */
	9726	if (PL_nexttoke) {
	9727	PL_nexttoke--;
	9728	pl_yylval = PL_nextval[PL_nexttoke];
	9729	{
	9730	I32 next_type;
	9731	next_type = PL_nexttype[PL_nexttoke];
	9732	if (next_type & (7<<24)) {
	9733	if (next_type & (1<<24)) {
	9734	if (PL_lex_brackets > 100)
	9735	Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
	9736	PL_lex_brackstack[PL_lex_brackets++] =
	9737	(char) ((U8) (next_type >> 16));
	9738	}
	9739	if (next_type & (2<<24))
	9740	PL_lex_allbrackets++;
	9741	if (next_type & (4<<24))
	9742	PL_lex_allbrackets--;
	9743	next_type &= 0xffff;
	9744	}
	9745	return REPORT(next_type == 'p' ? pending_ident() : next_type);
	9746	}
	9747	}
	9748
	9749	switch (PL_lex_state) {
	9750	case LEX_NORMAL:
	9751	case LEX_INTERPNORMAL:
	9752	break;
	9753
	9754	/* interpolated case modifiers like \L \U, including \Q and \E.
	9755	when we get here, PL_bufptr is at the \
	9756	*/
	9757	case LEX_INTERPCASEMOD:
	9758	/* handle \E or end of string */
	9759	return yyl_interpcasemod(aTHX_ s);
	9760
	9761	case LEX_INTERPPUSH:
	9762	return REPORT(sublex_push());
	9763
	9764	case LEX_INTERPSTART:
	9765	if (PL_bufptr == PL_bufend)
	9766	return REPORT(sublex_done());
	9767	DEBUG_T({
	9768	if(*PL_bufptr != '(')
	9769	PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
	9770	});
	9771	PL_expect = XTERM;
	9772	/* for /@a/, we leave the joining for the regex engine to do
	9773	* (unless we're within \Q etc) */
	9774	PL_lex_dojoin = (*PL_bufptr == '@'
	9775	&& (!PL_lex_inpat \|\| PL_lex_casemods));
	9776	PL_lex_state = LEX_INTERPNORMAL;
	9777	if (PL_lex_dojoin) {
	9778	NEXTVAL_NEXTTOKE.ival = 0;
	9779	force_next(PERLY_COMMA);
	9780	force_ident("\"", PERLY_DOLLAR);
	9781	NEXTVAL_NEXTTOKE.ival = 0;
	9782	force_next(PERLY_DOLLAR);
	9783	NEXTVAL_NEXTTOKE.ival = 0;
	9784	force_next((2<<24)\|PERLY_PAREN_OPEN);
	9785	NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
	9786	force_next(FUNC);
	9787	}
	9788	/* Convert (?{...}) or ({...}) and friends to 'do {...}' /
	9789	if (PL_lex_inpat && *PL_bufptr == '(') {
	9790	PL_parser->lex_shared->re_eval_start = PL_bufptr;
	9791	PL_bufptr += 2;
	9792	if (*PL_bufptr != '{')
	9793	PL_bufptr++;
	9794	PL_expect = XTERMBLOCK;
	9795	force_next(KW_DO);
	9796	}
	9797
	9798	if (PL_lex_starts++) {
	9799	s = PL_bufptr;
	9800	/* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
	9801	if (!PL_lex_casemods && PL_lex_inpat)
	9802	TOKEN(PERLY_COMMA);
	9803	else
	9804	AopNOASSIGN(OP_CONCAT);
	9805	}
	9806	return yylex();
	9807
	9808	case LEX_INTERPENDMAYBE:
	9809	if (intuit_more(PL_bufptr, PL_bufend)) {
	9810	PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
	9811	break;
	9812	}
	9813	/* FALLTHROUGH */
	9814
	9815	case LEX_INTERPEND:
	9816	if (PL_lex_dojoin) {
	9817	const U8 dojoin_was = PL_lex_dojoin;
	9818	PL_lex_dojoin = FALSE;
	9819	PL_lex_state = LEX_INTERPCONCAT;
	9820	PL_lex_allbrackets--;
	9821	return REPORT(dojoin_was == 1 ? (int)PERLY_PAREN_CLOSE : (int)POSTJOIN);
	9822	}
	9823	if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
	9824	&& SvEVALED(PL_lex_repl))
	9825	{
	9826	if (PL_bufptr != PL_bufend)
	9827	Perl_croak(aTHX_ "Bad evalled substitution pattern");
	9828	PL_lex_repl = NULL;
	9829	}
	9830	/* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
	9831	re_eval_str. If the here-doc body's length equals the previous
	9832	value of re_eval_start, re_eval_start will now be null. So
	9833	check re_eval_str as well. */
	9834	if (PL_parser->lex_shared->re_eval_start
	9835	\|\| PL_parser->lex_shared->re_eval_str) {
	9836	SV *sv;
	9837	if (*PL_bufptr != ')')
	9838	Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
	9839	PL_bufptr++;
	9840	/* having compiled a (?{..}) expression, return the original
	9841	* text too, as a const */
	9842	if (PL_parser->lex_shared->re_eval_str) {
	9843	sv = PL_parser->lex_shared->re_eval_str;
	9844	PL_parser->lex_shared->re_eval_str = NULL;
	9845	SvCUR_set(sv,
	9846	PL_bufptr - PL_parser->lex_shared->re_eval_start);
	9847	SvPV_shrink_to_cur(sv);
	9848	}
	9849	else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
	9850	PL_bufptr - PL_parser->lex_shared->re_eval_start);
	9851	NEXTVAL_NEXTTOKE.opval =
	9852	newSVOP(OP_CONST, 0,
	9853	sv);
	9854	force_next(THING);
	9855	PL_parser->lex_shared->re_eval_start = NULL;
	9856	PL_expect = XTERM;
	9857	return REPORT(PERLY_COMMA);
	9858	}
	9859
	9860	/* FALLTHROUGH */
	9861	case LEX_INTERPCONCAT:
	9862	#ifdef DEBUGGING
	9863	if (PL_lex_brackets)
	9864	Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
	9865	(long) PL_lex_brackets);
	9866	#endif
	9867	if (PL_bufptr == PL_bufend)
	9868	return REPORT(sublex_done());
	9869
	9870	/* m'foo' still needs to be parsed for possible (?{...}) */
	9871	if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
	9872	SV *sv = newSVsv(PL_linestr);
	9873	sv = tokeq(sv);
	9874	pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
	9875	s = PL_bufend;
	9876	}
	9877	else {
	9878	int save_error_count = PL_error_count;
	9879
	9880	s = scan_const(PL_bufptr);
	9881
	9882	/* Set flag if this was a pattern and there were errors. op.c will
	9883	* refuse to compile a pattern with this flag set. Otherwise, we
	9884	* could get segfaults, etc. */
	9885	if (PL_lex_inpat && PL_error_count > save_error_count) {
	9886	((PMOP*)PL_lex_inpat)->op_pmflags \|= PMf_HAS_ERROR;
	9887	}
	9888	if (*s == '\\')
	9889	PL_lex_state = LEX_INTERPCASEMOD;
	9890	else
	9891	PL_lex_state = LEX_INTERPSTART;
	9892	}
	9893
	9894	if (s != PL_bufptr) {
	9895	NEXTVAL_NEXTTOKE = pl_yylval;
	9896	PL_expect = XTERM;
	9897	force_next(THING);
	9898	if (PL_lex_starts++) {
	9899	/* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
	9900	if (!PL_lex_casemods && PL_lex_inpat)
	9901	TOKEN(PERLY_COMMA);
	9902	else
	9903	AopNOASSIGN(OP_CONCAT);
	9904	}
	9905	else {
	9906	PL_bufptr = s;
	9907	return yylex();
	9908	}
	9909	}
	9910
	9911	return yylex();
	9912	case LEX_FORMLINE:
	9913	if (PL_parser->sub_error_count != PL_error_count) {
	9914	/* There was an error parsing a formline, which tends to
	9915	mess up the parser.
	9916	Unlike interpolated sub-parsing, we can't treat any of
	9917	these as recoverable, so no need to check sub_no_recover.
	9918	*/
	9919	yyquit();
	9920	}
	9921	assert(PL_lex_formbrack);
	9922	s = scan_formline(PL_bufptr);
	9923	if (!PL_lex_formbrack)
	9924	return yyl_rightcurly(aTHX_ s, 1);
	9925	PL_bufptr = s;
	9926	return yylex();
	9927	}
	9928
	9929	/* We really do not want PL_linestr ever becoming a COW. */
	9930	assert (!SvIsCOW(PL_linestr));
	9931	s = PL_bufptr;
	9932	PL_oldoldbufptr = PL_oldbufptr;
	9933	PL_oldbufptr = s;
	9934
	9935	if (PL_in_my == KEY_sigvar) {
	9936	PL_parser->saw_infix_sigil = 0;
	9937	return yyl_sigvar(aTHX_ s);
	9938	}
	9939
	9940	{
	9941	/* yyl_try() and its callees might consult PL_parser->saw_infix_sigil.
	9942	On its return, we then need to set it to indicate whether the token
	9943	we just encountered was an infix operator that (if we hadn't been
	9944	expecting an operator) have been a sigil.
	9945	*/
	9946	bool expected_operator = (PL_expect == XOPERATOR);
	9947	int ret = yyl_try(aTHX_ s);
	9948	switch (pl_yylval.ival) {
	9949	case OP_BIT_AND:
	9950	case OP_MODULO:
	9951	case OP_MULTIPLY:
	9952	case OP_NBIT_AND:
	9953	if (expected_operator) {
	9954	PL_parser->saw_infix_sigil = 1;
	9955	break;
	9956	}
	9957	/* FALLTHROUGH */
	9958	default:
	9959	PL_parser->saw_infix_sigil = 0;
	9960	}
	9961	return ret;
	9962	}
	9963	}
	9964
	9965
	9966	/*
	9967	S_pending_ident
	9968
	9969	Looks up an identifier in the pad or in a package
	9970
	9971	PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
	9972	rather than a plain pad var.
	9973
	9974	Returns:
	9975	PRIVATEREF if this is a lexical name.
	9976	BAREWORD if this belongs to a package.
	9977
	9978	Structure:
	9979	if we're in a my declaration
	9980	croak if they tried to say my($foo::bar)
	9981	build the ops for a my() declaration
	9982	if it's an access to a my() variable
	9983	build ops for access to a my() variable
	9984	if in a dq string, and they've said @foo and we can't find @foo
	9985	warn
	9986	build ops for a bareword
	9987	*/
	9988
	9989	static int
	9990	S_pending_ident(pTHX)
	9991	{
	9992	PADOFFSET tmp = 0;
	9993	const char pit = (char)pl_yylval.ival;
	9994	const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
	9995	/* All routes through this function want to know if there is a colon. */
	9996	const char const has_colon = (const char) memchr (PL_tokenbuf, ':', tokenbuf_len);
	9997
	9998	DEBUG_T({ PerlIO_printf(Perl_debug_log,
	9999	"### Pending identifier '%s'\n", PL_tokenbuf); });
	10000	assert(tokenbuf_len >= 2);
	10001
	10002	/* if we're in a my(), we can't allow dynamics here.
	10003	$foo'bar has already been turned into $foo::bar, so
	10004	just check for colons.
	10005
	10006	if it's a legal name, the OP is a PADANY.
	10007	*/
	10008	if (PL_in_my) {
	10009	if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
	10010	if (has_colon)
	10011	/* diag_listed_as: No package name allowed for variable %s
	10012	in "our" */
	10013	yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
	10014	"%s %s in \"our\"",
	10015	*PL_tokenbuf=='&' ? "subroutine" : "variable",
	10016	PL_tokenbuf), UTF ? SVf_UTF8 : 0);
	10017	tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
	10018	}
	10019	else {
	10020	OP *o;
	10021	if (has_colon) {
	10022	/* "my" variable %s can't be in a package */
	10023	/* PL_no_myglob is constant */
	10024	GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
	10025	yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
	10026	PL_in_my == KEY_my ? "my" :
	10027	PL_in_my == KEY_field ? "field" : "state",
	10028	*PL_tokenbuf == '&' ? "subroutine" : "variable",
	10029	PL_tokenbuf),
	10030	UTF ? SVf_UTF8 : 0);
	10031	GCC_DIAG_RESTORE_STMT;
	10032	}
	10033
	10034	if (PL_in_my == KEY_sigvar) {
	10035	/* A signature 'padop' needs in addition, an op_first to
	10036	* point to a child sigdefelem, and an extra field to hold
	10037	* the signature index. We can achieve both by using an
	10038	* UNOP_AUX and (ab)using the op_aux field to hold the
	10039	* index. If we ever need more fields, use a real malloced
	10040	* aux strut instead.
	10041	*/
	10042	o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
	10043	INT2PTR(UNOP_AUX_item *,
	10044	(PL_parser->sig_elems)));
	10045	o->op_private \|= ( PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
	10046	: PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
	10047	: OPpARGELEM_HV);
	10048	}
	10049	else
	10050	o = newOP(OP_PADANY, 0);
	10051	o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
	10052	UTF ? SVf_UTF8 : 0);
	10053	if (PL_in_my == KEY_sigvar)
	10054	PL_in_my = 0;
	10055
	10056	pl_yylval.opval = o;
	10057	return PRIVATEREF;
	10058	}
	10059	}
	10060
	10061	/*
	10062	build the ops for accesses to a my() variable.
	10063	*/
	10064
	10065	if (!has_colon) {
	10066	if (!PL_in_my)
	10067	tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
	10068	0);
	10069	if (tmp != NOT_IN_PAD) {
	10070	/* might be an "our" variable" */
	10071	if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
	10072	/* build ops for a bareword */
	10073	HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
	10074	HEK * const stashname = HvNAME_HEK(stash);
	10075	SV * const sym = newSVhek(stashname);
	10076	sv_catpvs(sym, "::");
	10077	sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
	10078	pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
	10079	pl_yylval.opval->op_private = OPpCONST_ENTERED;
	10080	if (pit != '&')
	10081	gv_fetchsv(sym,
	10082	GV_ADDMULTI,
	10083	((PL_tokenbuf[0] == '$') ? SVt_PV
	10084	: (PL_tokenbuf[0] == '@') ? SVt_PVAV
	10085	: SVt_PVHV));
	10086	return BAREWORD;
	10087	}
	10088
	10089	pl_yylval.opval = newOP(OP_PADANY, 0);
	10090	pl_yylval.opval->op_targ = tmp;
	10091	return PRIVATEREF;
	10092	}
	10093	}
	10094
	10095	/*
	10096	Whine if they've said @foo or @foo{key} in a doublequoted string,
	10097	and @foo (or %foo) isn't a variable we can find in the symbol
	10098	table.
	10099	*/
	10100	if (ckWARN(WARN_AMBIGUOUS)
	10101	&& pit == '@'
	10102	&& PL_lex_state != LEX_NORMAL
	10103	&& !PL_lex_brackets)
	10104	{
	10105	GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
	10106	( UTF ? SVf_UTF8 : 0 ) \| GV_ADDMG,
	10107	SVt_PVAV);
	10108	if ((!gv \|\| ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
	10109	)
	10110	{
	10111	/* Downgraded from fatal to warning 20000522 mjd */
	10112	Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
	10113	"Possible unintended interpolation of %" UTF8f
	10114	" in string",
	10115	UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
	10116	}
	10117	}
	10118
	10119	/* build ops for a bareword */
	10120	pl_yylval.opval = newSVOP(OP_CONST, 0,
	10121	newSVpvn_flags(PL_tokenbuf + 1,
	10122	tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
	10123	UTF ? SVf_UTF8 : 0 ));
	10124	pl_yylval.opval->op_private = OPpCONST_ENTERED;
	10125	if (pit != '&')
	10126	gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
	10127	(PL_in_eval ? GV_ADDMULTI : GV_ADD)
	10128	\| ( UTF ? SVf_UTF8 : 0 ),
	10129	((PL_tokenbuf[0] == '$') ? SVt_PV
	10130	: (PL_tokenbuf[0] == '@') ? SVt_PVAV
	10131	: SVt_PVHV));
	10132	return BAREWORD;
	10133	}
	10134
	10135	STATIC void
	10136	S_checkcomma(pTHX_ const char s, const char name, const char *what)
	10137	{
	10138	PERL_ARGS_ASSERT_CHECKCOMMA;
	10139
	10140	if (s == ' ' && s[1] == '(') { / XXX gotta be a better way */
	10141	if (ckWARN(WARN_SYNTAX)) {
	10142	int level = 1;
	10143	const char *w;
	10144	for (w = s+2; *w && level; w++) {
	10145	if (*w == '(')
	10146	++level;
	10147	else if (*w == ')')
	10148	--level;
	10149	}
	10150	while (isSPACE(*w))
	10151	++w;
	10152	/* the list of chars below is for end of statements or
	10153	* block / parens, boolean operators (&&, \|\|, //) and branch
	10154	* constructs (or, and, if, until, unless, while, err, for).
	10155	* Not a very solid hack... */
	10156	if (!w \|\| !memCHRs(";&/\|})]oaiuwef!=", w))
	10157	Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
	10158	"%s (...) interpreted as function",name);
	10159	}
	10160	}
	10161	while (s < PL_bufend && isSPACE(*s))
	10162	s++;
	10163	if (*s == '(')
	10164	s++;
	10165	while (s < PL_bufend && isSPACE(*s))
	10166	s++;
	10167	if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
	10168	const char * const w = s;
	10169	s += UTF ? UTF8SKIP(s) : 1;
	10170	while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
	10171	s += UTF ? UTF8SKIP(s) : 1;
	10172	while (s < PL_bufend && isSPACE(*s))
	10173	s++;
	10174	if (*s == ',') {
	10175	GV* gv;
	10176	if (keyword(w, s - w, 0))
	10177	return;
	10178
	10179	gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
	10180	if (gv && GvCVu(gv))
	10181	return;
	10182	if (s - w <= 254) {
	10183	PADOFFSET off;
	10184	char tmpbuf[256];
	10185	Copy(w, tmpbuf+1, s - w, char);
	10186	*tmpbuf = '&';
	10187	off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
	10188	if (off != NOT_IN_PAD) return;
	10189	}
	10190	Perl_croak(aTHX_ "No comma allowed after %s", what);
	10191	}
	10192	}
	10193	}
	10194
	10195	/* S_new_constant(): do any overload::constant lookup.
	10196
	10197	Either returns sv, or mortalizes/frees sv and returns a new SV*.
	10198	Best used as sv=new_constant(..., sv, ...).
	10199	If s, pv are NULL, calls subroutine with one argument,
	10200	and <type> is used with error messages only.
	10201	<type> is assumed to be well formed UTF-8.
	10202
	10203	If error_msg is not NULL, *error_msg will be set to any error encountered.
	10204	Otherwise yyerror() will be used to output it */
	10205
	10206	STATIC SV *
	10207	S_new_constant(pTHX_ const char s, STRLEN len, const char key, STRLEN keylen,
	10208	SV sv, SV pv, const char *type, STRLEN typelen,
	10209	const char ** error_msg)
	10210	{
	10211	dSP;
	10212	HV * table = GvHV(PL_hintgv); /* ^H */
	10213	SV *res;
	10214	SV *errsv = NULL;
	10215	SV **cvp;
	10216	SV cv, typesv;
	10217	const char why1 = "", why2 = "", *why3 = "";
	10218	const char * optional_colon = ":"; /* Only some messages have a colon */
	10219	char *msg;
	10220
	10221	PERL_ARGS_ASSERT_NEW_CONSTANT;
	10222	/* We assume that this is true: */
	10223	assert(type \|\| s);
	10224
	10225	sv_2mortal(sv); /* Parent created it permanently */
	10226
	10227	if ( ! table
	10228	\|\| ! (PL_hints & HINT_LOCALIZE_HH))
	10229	{
	10230	why1 = "unknown";
	10231	optional_colon = "";
	10232	goto report;
	10233	}
	10234
	10235	cvp = hv_fetch(table, key, keylen, FALSE);
	10236	if (!cvp \|\| !SvOK(*cvp)) {
	10237	why1 = "$^H{";
	10238	why2 = key;
	10239	why3 = "} is not defined";
	10240	goto report;
	10241	}
	10242
	10243	cv = *cvp;
	10244	if (!pv && s)
	10245	pv = newSVpvn_flags(s, len, SVs_TEMP);
	10246	if (type && pv)
	10247	typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
	10248	else
	10249	typesv = &PL_sv_undef;
	10250
	10251	PUSHSTACKi(PERLSI_OVERLOAD);
	10252	ENTER ;
	10253	SAVETMPS;
	10254
	10255	PUSHMARK(SP) ;
	10256	EXTEND(sp, 3);
	10257	if (pv)
	10258	PUSHs(pv);
	10259	PUSHs(sv);
	10260	if (pv)
	10261	PUSHs(typesv);
	10262	PUTBACK;
	10263	call_sv(cv, G_SCALAR \| ( PL_in_eval ? 0 : G_EVAL));
	10264
	10265	SPAGAIN ;
	10266
	10267	/* Check the eval first */
	10268	if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
	10269	STRLEN errlen;
	10270	const char * errstr;
	10271	sv_catpvs(errsv, "Propagated");
	10272	errstr = SvPV_const(errsv, errlen);
	10273	yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
	10274	(void)POPs;
	10275	res = SvREFCNT_inc_simple_NN(sv);
	10276	}
	10277	else {
	10278	res = POPs;
	10279	SvREFCNT_inc_simple_void_NN(res);
	10280	}
	10281
	10282	PUTBACK ;
	10283	FREETMPS ;
	10284	LEAVE ;
	10285	POPSTACK;
	10286
	10287	if (SvOK(res)) {
	10288	return res;
	10289	}
	10290
	10291	sv = res;
	10292	(void)sv_2mortal(sv);
	10293
	10294	why1 = "Call to &{$^H{";
	10295	why2 = key;
	10296	why3 = "}} did not return a defined value";
	10297
	10298	report:
	10299
	10300	msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s",
	10301	(int)(type ? typelen : len),
	10302	(type ? type: s),
	10303	optional_colon,
	10304	why1, why2, why3);
	10305	if (error_msg) {
	10306	*error_msg = msg;
	10307	}
	10308	else {
	10309	yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
	10310	}
	10311	return SvREFCNT_inc_simple_NN(sv);
	10312	}
	10313
	10314	PERL_STATIC_INLINE void
	10315	S_parse_ident(pTHX_ char s, char d, char * const e, int allow_package,
	10316	bool is_utf8, bool check_dollar, bool tick_warn)
	10317	{
	10318	int saw_tick = 0;
	10319	const char olds = s;
	10320	PERL_ARGS_ASSERT_PARSE_IDENT;
	10321
	10322	while (*s < PL_bufend) {
	10323	if (*d >= e)
	10324	Perl_croak(aTHX_ "%s", ident_too_long);
	10325	if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
	10326	/* The UTF-8 case must come first, otherwise things
	10327	* like c\N{COMBINING TILDE} would start failing, as the
	10328	* isWORDCHAR_A case below would gobble the 'c' up.
	10329	*/
	10330
	10331	char t = s + UTF8SKIP(*s);
	10332	while (isIDCONT_utf8_safe((const U8) t, (const U8) PL_bufend)) {
	10333	t += UTF8SKIP(t);
	10334	}
	10335	if (d + (t - s) > e)
	10336	Perl_croak(aTHX_ "%s", ident_too_long);
	10337	Copy(s, d, t - *s, char);
	10338	d += t - s;
	10339	*s = t;
	10340	}
	10341	else if ( isWORDCHAR_A(**s) ) {
	10342	do {
	10343	(d)++ = (s)++;
	10344	} while (isWORDCHAR_A(*s) && d < e);
	10345	}
	10346	else if ( allow_package
	10347	&& **s == '\''
	10348	&& isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
	10349	{
	10350	(d)++ = ':';
	10351	(d)++ = ':';
	10352	(*s)++;
	10353	saw_tick++;
	10354	}
	10355	else if (allow_package && *s == ':' && (s)[1] == ':'
	10356	/* Disallow things like Foo::$bar. For the curious, this is
	10357	* the code path that triggers the "Bad name after" warning
	10358	* when looking for barewords.
	10359	*/
	10360	&& !(check_dollar && (*s)[2] == '$')) {
	10361	(d)++ = (s)++;
	10362	(d)++ = (s)++;
	10363	}
	10364	else
	10365	break;
	10366	}
	10367	if (UNLIKELY(saw_tick && tick_warn && ckWARN2_d(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR))) {
	10368	if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
	10369	char *this_d;
	10370	char *d2;
	10371	Newx(this_d, s - olds + saw_tick + 2, char); / +2 for $# */
	10372	d2 = this_d;
	10373	SAVEFREEPV(this_d);
	10374
	10375	Perl_warner(aTHX_ packWARN2(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR),
	10376	"Old package separator used in string");
	10377	if (olds[-1] == '#')
	10378	*d2++ = olds[-2];
	10379	*d2++ = olds[-1];
	10380	while (olds < *s) {
	10381	if (*olds == '\'') {
	10382	*d2++ = '\\';
	10383	d2++ = olds++;
	10384	}
	10385	else
	10386	d2++ = olds++;
	10387	}
	10388	Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
	10389	"\t(Did you mean \"%" UTF8f "\" instead?)\n",
	10390	UTF8fARG(is_utf8, d2-this_d, this_d));
	10391	}
	10392	else {
	10393	Perl_warner(aTHX_ packWARN2(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR),
	10394	"Old package separator \"'\" deprecated");
	10395	}
	10396	}
	10397	return;
	10398	}
	10399
	10400	/* Returns a NUL terminated string, with the length of the string written to
	10401	*slp
	10402
	10403	scan_word6() may be removed once ' in names is removed.
	10404	*/
	10405	char *
	10406	Perl_scan_word6(pTHX_ char s, char dest, STRLEN destlen, int allow_package, STRLEN *slp, bool warn_tick)
	10407	{
	10408	char *d = dest;
	10409	char * const e = d + destlen - 3; /* two-character token, ending NUL */
	10410	bool is_utf8 = cBOOL(UTF);
	10411
	10412	PERL_ARGS_ASSERT_SCAN_WORD6;
	10413
	10414	parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, warn_tick);
	10415	*d = '\0';
	10416	*slp = d - dest;
	10417	return s;
	10418	}
	10419
	10420	char *
	10421	Perl_scan_word(pTHX_ char s, char dest, STRLEN destlen, int allow_package, STRLEN *slp)
	10422	{
	10423	PERL_ARGS_ASSERT_SCAN_WORD;
	10424	return scan_word6(s, dest, destlen, allow_package, slp, FALSE);
	10425	}
	10426
	10427	/* scan s and extract an identifier ($var) from it if possible
	10428	* into dest.
	10429	* XXX: This function has subtle implications on parsing, and
	10430	* changing how it behaves can cause a variable to change from
	10431	* being a run time rv2sv call or a compile time binding to a
	10432	* specific variable name.
	10433	*/
	10434	STATIC char *
	10435	S_scan_ident(pTHX_ char s, char dest, STRLEN destlen, I32 ck_uni)
	10436	{
	10437	I32 herelines = PL_parser->herelines;
	10438	SSize_t bracket = -1;
	10439	char funny = *s++;
	10440	char *d = dest;
	10441	char * const e = d + destlen - 3; /* two-character token, ending NUL */
	10442	bool is_utf8 = cBOOL(UTF);
	10443	line_t orig_copline = 0, tmp_copline = 0;
	10444
	10445	PERL_ARGS_ASSERT_SCAN_IDENT;
	10446
	10447	if (isSPACE(s) \|\| !s)
	10448	s = skipspace(s);
	10449	if (isDIGIT(s)) { / handle $0 and $1 $2 and $10 and etc */
	10450	bool is_zero= *s == '0' ? TRUE : FALSE;
	10451	char *digit_start= d;
	10452	d++ = s++;
	10453	while (s < PL_bufend && isDIGIT(*s)) {
	10454	if (d >= e)
	10455	Perl_croak(aTHX_ "%s", ident_too_long);
	10456	d++ = s++;
	10457	}
	10458	if (is_zero && d - digit_start > 1)
	10459	Perl_croak(aTHX_ ident_var_zero_multi_digit);
	10460	}
	10461	else { /* See if it is a "normal" identifier */
	10462	parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
	10463	}
	10464	*d = '\0';
	10465	d = dest;
	10466	if (*d) {
	10467	/* Either a digit variable, or parse_ident() found an identifier
	10468	(anything valid as a bareword), so job done and return. */
	10469	if (PL_lex_state != LEX_NORMAL)
	10470	PL_lex_state = LEX_INTERPENDMAYBE;
	10471	return s;
	10472	}
	10473
	10474	/* Here, it is not a run-of-the-mill identifier name */
	10475
	10476	if (*s == '$' && s[1]
	10477	&& ( isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
	10478	\|\| isDIGIT_A((U8)s[1])
	10479	\|\| s[1] == '$'
	10480	\|\| s[1] == '{'
	10481	\|\| memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
	10482	{
	10483	/* Dereferencing a value in a scalar variable.
	10484	The alternatives are different syntaxes for a scalar variable.
	10485	Using ' as a leading package separator isn't allowed. :: is. */
	10486	return s;
	10487	}
	10488	/* Handle the opening { of @{...}, &{...}, {...}, %{...}, ${...} /
	10489	if (*s == '{') {
	10490	bracket = s - SvPVX(PL_linestr);
	10491	s++;
	10492	orig_copline = CopLINE(PL_curcop);
	10493	if (s < PL_bufend && isSPACE(*s)) {
	10494	s = skipspace(s);
	10495	}
	10496	}
	10497
	10498
	10499	/* Extract the first character of the variable name from 's' and
	10500	* copy it, null terminated into 'd'. Note that this does not
	10501	* involve checking for just IDFIRST characters, as it allows the
	10502	* '^' for ${^FOO} type variable names, and it allows all the
	10503	* characters that are legal in a single character variable name.
	10504	*
	10505	* The legal ones are any of:
	10506	* a) all ASCII characters except:
	10507	* 1) control and space-type ones, like NUL, SOH, \t, and SPACE;
	10508	* 2) '{'
	10509	* The final case currently doesn't get this far in the program, so we
	10510	* don't test for it. If that were to change, it would be ok to allow it.
	10511	* b) When not under Unicode rules, any upper Latin1 character
	10512	* c) Otherwise, when unicode rules are used, all XIDS characters.
	10513	*
	10514	* Because all ASCII characters have the same representation whether
	10515	* encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
	10516	* '{' without knowing if is UTF-8 or not. */
	10517
	10518	if ((s <= PL_bufend - ((is_utf8)
	10519	? UTF8SKIP(s)
	10520	: 1))
	10521	&& (
	10522	isGRAPH_A(*s)
	10523	\|\|
	10524	( is_utf8
	10525	? isIDFIRST_utf8_safe(s, PL_bufend)
	10526	: (isGRAPH_L1(*s)
	10527	&& LIKELY((U8) *s != LATIN1_TO_NATIVE(0xAD))
	10528	)
	10529	)
	10530	)
	10531	){
	10532	if (is_utf8) {
	10533	const STRLEN skip = UTF8SKIP(s);
	10534	STRLEN i;
	10535	d[skip] = '\0';
	10536	for ( i = 0; i < skip; i++ )
	10537	d[i] = *s++;
	10538	}
	10539	else {
	10540	d = s++;
	10541	d[1] = '\0';
	10542	}
	10543	}
	10544
	10545	/* special case to handle ${10}, ${11} the same way we handle ${1} etc */
	10546	if (isDIGIT(*d)) {
	10547	bool is_zero= *d == '0' ? TRUE : FALSE;
	10548	char *digit_start= d;
	10549	while (s < PL_bufend && isDIGIT(*s)) {
	10550	d++;
	10551	if (d >= e)
	10552	Perl_croak(aTHX_ "%s", ident_too_long);
	10553	d= s++;
	10554	}
	10555	if (is_zero && d - digit_start >= 1) /* d points at the last digit */
	10556	Perl_croak(aTHX_ ident_var_zero_multi_digit);
	10557	d[1] = '\0';
	10558	}
	10559
	10560	/* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
	10561	else if (d == '^' && s && isCONTROLVAR(*s)) {
	10562	d = toCTRL(s);
	10563	s++;
	10564	}
	10565	/* Warn about ambiguous code after unary operators if {...} notation isn't
	10566	used. There's no difference in ambiguity; it's merely a heuristic
	10567	about when not to warn. */
	10568	else if (ck_uni && bracket == -1)
	10569	check_uni();
	10570
	10571	if (bracket != -1) {
	10572	bool skip;
	10573	char *s2;
	10574	/* If we were processing {...} notation then... */
	10575	if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
	10576	\|\| (!isPRINT(d) / isCNTRL(d), plus all non-ASCII */
	10577	&& isWORDCHAR(*s))
	10578	) {
	10579	/* note we have to check for a normal identifier first,
	10580	* as it handles utf8 symbols, and only after that has
	10581	* been ruled out can we look at the caret words */
	10582	if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
	10583	/* if it starts as a valid identifier, assume that it is one.
	10584	(the later check for } being at the expected point will trap
	10585	cases where this doesn't pan out.) */
	10586	d += is_utf8 ? UTF8SKIP(d) : 1;
	10587	parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
	10588	*d = '\0';
	10589	}
	10590	else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
	10591	d++;
	10592	while (isWORDCHAR(*s) && d < e) {
	10593	d++ = s++;
	10594	}
	10595	if (d >= e)
	10596	Perl_croak(aTHX_ "%s", ident_too_long);
	10597	*d = '\0';
	10598	}
	10599	tmp_copline = CopLINE(PL_curcop);
	10600	if (s < PL_bufend && isSPACE(*s)) {
	10601	s = skipspace(s);
	10602	}
	10603	if ((s == '[' \|\| (s == '{' && strNE(dest, "sub")))) {
	10604	/* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */
	10605	if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
	10606	const char * const brack =
	10607	(const char *)
	10608	((*s == '[') ? "[...]" : "{...}");
	10609	orig_copline = CopLINE(PL_curcop);
	10610	CopLINE_set(PL_curcop, tmp_copline);
	10611	/* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
	10612	Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
	10613	"Ambiguous use of %c{%s%s} resolved to %c%s%s",
	10614	funny, dest, brack, funny, dest, brack);
	10615	CopLINE_set(PL_curcop, orig_copline);
	10616	}
	10617	bracket++;
	10618	PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR \| XFAKEBRACK);
	10619	PL_lex_allbrackets++;
	10620	return s;
	10621	}
	10622	}
	10623
	10624	if ( !tmp_copline )
	10625	tmp_copline = CopLINE(PL_curcop);
	10626	if ((skip = s < PL_bufend && isSPACE(*s))) {
	10627	/* Avoid incrementing line numbers or resetting PL_linestart,
	10628	in case we have to back up. */
	10629	STRLEN s_off = s - SvPVX(PL_linestr);
	10630	s2 = peekspace(s);
	10631	s = SvPVX(PL_linestr) + s_off;
	10632	}
	10633	else
	10634	s2 = s;
	10635
	10636	/* Expect to find a closing } after consuming any trailing whitespace.
	10637	*/
	10638	if (*s2 == '}') {
	10639	/* Now increment line numbers if applicable. */
	10640	if (skip)
	10641	s = skipspace(s);
	10642	s++;
	10643	if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
	10644	PL_lex_state = LEX_INTERPEND;
	10645	PL_expect = XREF;
	10646	}
	10647	if (PL_lex_state == LEX_NORMAL \|\| PL_lex_brackets) {
	10648	if (ckWARN(WARN_AMBIGUOUS)
	10649	&& (keyword(dest, d - dest, 0)
	10650	\|\| get_cvn_flags(dest, d - dest, is_utf8
	10651	? SVf_UTF8
	10652	: 0)))
	10653	{
	10654	SV *tmp = newSVpvn_flags( dest, d - dest,
	10655	SVs_TEMP \| (is_utf8 ? SVf_UTF8 : 0) );
	10656	if (funny == '#')
	10657	funny = '@';
	10658	orig_copline = CopLINE(PL_curcop);
	10659	CopLINE_set(PL_curcop, tmp_copline);
	10660	Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
	10661	"Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
	10662	funny, SVfARG(tmp), funny, SVfARG(tmp));
	10663	CopLINE_set(PL_curcop, orig_copline);
	10664	}
	10665	}
	10666	}
	10667	else {
	10668	/* Didn't find the closing } at the point we expected, so restore
	10669	state such that the next thing to process is the opening { and */
	10670	s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
	10671	CopLINE_set(PL_curcop, orig_copline);
	10672	PL_parser->herelines = herelines;
	10673	*dest = '\0';
	10674	PL_parser->sub_no_recover = TRUE;
	10675	}
	10676	}
	10677	else if ( PL_lex_state == LEX_INTERPNORMAL
	10678	&& !PL_lex_brackets
	10679	&& !intuit_more(s, PL_bufend))
	10680	PL_lex_state = LEX_INTERPEND;
	10681	return s;
	10682	}
	10683
	10684	static bool
	10685	S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
	10686
	10687	/* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
	10688	* found in the parse starting at 's', based on the subset that are valid
	10689	* in this context input to this routine in 'valid_flags'. Advances s.
	10690	* Returns TRUE if the input should be treated as a valid flag, so the next
	10691	* char may be as well; otherwise FALSE. 'charset' should point to a NUL
	10692	* upon first call on the current regex. This routine will set it to any
	10693	* charset modifier found. The caller shouldn't change it. This way,
	10694	* another charset modifier encountered in the parse can be detected as an
	10695	* error, as we have decided to allow only one */
	10696
	10697	const char c = **s;
	10698	STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
	10699
	10700	if ( charlen != 1 \|\| ! strchr(valid_flags, c) ) {
	10701	if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
	10702	yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.s\"", (int)charlen, s),
	10703	UTF ? SVf_UTF8 : 0);
	10704	(*s) += charlen;
	10705	/* Pretend that it worked, so will continue processing before
	10706	* dieing */
	10707	return TRUE;
	10708	}
	10709	return FALSE;
	10710	}
	10711
	10712	switch (c) {
	10713
	10714	CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
	10715	case GLOBAL_PAT_MOD: *pmfl \|= PMf_GLOBAL; break;
	10716	case CONTINUE_PAT_MOD: *pmfl \|= PMf_CONTINUE; break;
	10717	case ONCE_PAT_MOD: *pmfl \|= PMf_KEEP; break;
	10718	case KEEPCOPY_PAT_MOD: *pmfl \|= RXf_PMf_KEEPCOPY; break;
	10719	case NONDESTRUCT_PAT_MOD: *pmfl \|= PMf_NONDESTRUCT; break;
	10720	case LOCALE_PAT_MOD:
	10721	if (*charset) {
	10722	goto multiple_charsets;
	10723	}
	10724	set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
	10725	*charset = c;
	10726	break;
	10727	case UNICODE_PAT_MOD:
	10728	if (*charset) {
	10729	goto multiple_charsets;
	10730	}
	10731	set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
	10732	*charset = c;
	10733	break;
	10734	case ASCII_RESTRICT_PAT_MOD:
	10735	if (! *charset) {
	10736	set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
	10737	}
	10738	else {
	10739
	10740	/* Error if previous modifier wasn't an 'a', but if it was, see
	10741	* if, and accept, a second occurrence (only) */
	10742	if (*charset != 'a'
	10743	\|\| get_regex_charset(*pmfl)
	10744	!= REGEX_ASCII_RESTRICTED_CHARSET)
	10745	{
	10746	goto multiple_charsets;
	10747	}
	10748	set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
	10749	}
	10750	*charset = c;
	10751	break;
	10752	case DEPENDS_PAT_MOD:
	10753	if (*charset) {
	10754	goto multiple_charsets;
	10755	}
	10756	set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
	10757	*charset = c;
	10758	break;
	10759	}
	10760
	10761	(*s)++;
	10762	return TRUE;
	10763
	10764	multiple_charsets:
	10765	if (*charset != c) {
	10766	yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
	10767	}
	10768	else if (c == 'a') {
	10769	/* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
	10770	yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
	10771	}
	10772	else {
	10773	yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
	10774	}
	10775
	10776	/* Pretend that it worked, so will continue processing before dieing */
	10777	(*s)++;
	10778	return TRUE;
	10779	}
	10780
	10781	STATIC char *
	10782	S_scan_pat(pTHX_ char *start, I32 type)
	10783	{
	10784	PMOP *pm;
	10785	char *s;
	10786	const char * const valid_flags =
	10787	(const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
	10788	char charset = '\0'; /* character set modifier */
	10789	unsigned int x_mod_count = 0;
	10790
	10791	PERL_ARGS_ASSERT_SCAN_PAT;
	10792
	10793	s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
	10794	if (!s)
	10795	Perl_croak(aTHX_ "Search pattern not terminated");
	10796
	10797	pm = (PMOP*)newPMOP(type, 0);
	10798	if (PL_multi_open == '?') {
	10799	/* This is the only point in the code that sets PMf_ONCE: */
	10800	pm->op_pmflags \|= PMf_ONCE;
	10801
	10802	/* Hence it's safe to do this bit of PMOP book-keeping here, which
	10803	allows us to restrict the list needed by reset to just the ??
	10804	matches. */
	10805	assert(type != OP_TRANS);
	10806	if (PL_curstash) {
	10807	MAGIC mg = mg_find((const SV )PL_curstash, PERL_MAGIC_symtab);
	10808	U32 elements;
	10809	if (!mg) {
	10810	mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
	10811	0);
	10812	}
	10813	elements = mg->mg_len / sizeof(PMOP**);
	10814	Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
	10815	((PMOP**)mg->mg_ptr) [elements++] = pm;
	10816	mg->mg_len = elements * sizeof(PMOP**);
	10817	PmopSTASH_set(pm,PL_curstash);
	10818	}
	10819	}
	10820
	10821	/* if qr/...(?{..}).../, then need to parse the pattern within a new
	10822	* anon CV. False positives like qr/[(?{]/ are harmless */
	10823
	10824	if (type == OP_QR) {
	10825	STRLEN len;
	10826	char e, p = SvPV(PL_lex_stuff, len);
	10827	e = p + len;
	10828	for (; p < e; p++) {
	10829	if (p[0] == '(' && (
	10830	(p[1] == '?' && (p[2] == '{' \|\|
	10831	(p[2] == '?' && p[3] == '{'))) \|\|
	10832	(p[1] == '*' && (p[2] == '{' \|\|
	10833	(p[2] == '*' && p[3] == '{')))
	10834	)){
	10835	pm->op_pmflags \|= PMf_HAS_CV;
	10836	break;
	10837	}
	10838	}
	10839	pm->op_pmflags \|= PMf_IS_QR;
	10840	}
	10841
	10842	while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
	10843	&s, &charset, &x_mod_count))
	10844	{};
	10845	/* issue a warning if /c is specified,but /g is not */
	10846	if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
	10847	{
	10848	Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
	10849	"Use of /c modifier is meaningless without /g" );
	10850	}
	10851
	10852	PL_lex_op = (OP*)pm;
	10853	pl_yylval.ival = OP_MATCH;
	10854	return s;
	10855	}
	10856
	10857	STATIC char *
	10858	S_scan_subst(pTHX_ char *start)
	10859	{
	10860	char *s;
	10861	PMOP *pm;
	10862	I32 first_start;
	10863	line_t first_line;
	10864	line_t linediff = 0;
	10865	I32 es = 0;
	10866	char charset = '\0'; /* character set modifier */
	10867	unsigned int x_mod_count = 0;
	10868	char *t;
	10869
	10870	PERL_ARGS_ASSERT_SCAN_SUBST;
	10871
	10872	pl_yylval.ival = OP_NULL;
	10873
	10874	s = scan_str(start, TRUE, FALSE, FALSE, &t);
	10875
	10876	if (!s)
	10877	Perl_croak(aTHX_ "Substitution pattern not terminated");
	10878
	10879	s = t;
	10880
	10881	first_start = PL_multi_start;
	10882	first_line = CopLINE(PL_curcop);
	10883	s = scan_str(s,FALSE,FALSE,FALSE,NULL);
	10884	if (!s) {
	10885	SvREFCNT_dec_NN(PL_lex_stuff);
	10886	PL_lex_stuff = NULL;
	10887	Perl_croak(aTHX_ "Substitution replacement not terminated");
	10888	}
	10889	PL_multi_start = first_start; /* so whole substitution is taken together */
	10890
	10891	pm = (PMOP*)newPMOP(OP_SUBST, 0);
	10892
	10893
	10894	while (*s) {
	10895	if (*s == EXEC_PAT_MOD) {
	10896	s++;
	10897	es++;
	10898	}
	10899	else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
	10900	&s, &charset, &x_mod_count))
	10901	{
	10902	break;
	10903	}
	10904	}
	10905
	10906	if ((pm->op_pmflags & PMf_CONTINUE)) {
	10907	Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
	10908	}
	10909
	10910	if (es) {
	10911	SV * const repl = newSVpvs("");
	10912
	10913	PL_multi_end = 0;
	10914	pm->op_pmflags \|= PMf_EVAL;
	10915	for (; es > 1; es--) {
	10916	sv_catpvs(repl, "eval ");
	10917	}
	10918	sv_catpvs(repl, "do {");
	10919	sv_catsv(repl, PL_parser->lex_sub_repl);
	10920	sv_catpvs(repl, "}");
	10921	SvREFCNT_dec(PL_parser->lex_sub_repl);
	10922	PL_parser->lex_sub_repl = repl;
	10923	}
	10924
	10925
	10926	linediff = CopLINE(PL_curcop) - first_line;
	10927	if (linediff)
	10928	CopLINE_set(PL_curcop, first_line);
	10929
	10930	if (linediff \|\| es) {
	10931	/* the IVX field indicates that the replacement string is a s///e;
	10932	* the NVX field indicates how many src code lines the replacement
	10933	* spreads over */
	10934	sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
	10935	((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
	10936	((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
	10937	cBOOL(es);
	10938	}
	10939
	10940	PL_lex_op = (OP*)pm;
	10941	pl_yylval.ival = OP_SUBST;
	10942	return s;
	10943	}
	10944
	10945	STATIC char *
	10946	S_scan_trans(pTHX_ char *start)
	10947	{
	10948	char* s;
	10949	OP *o;
	10950	U8 squash;
	10951	U8 del;
	10952	U8 complement;
	10953	bool nondestruct = 0;
	10954	char *t;
	10955
	10956	PERL_ARGS_ASSERT_SCAN_TRANS;
	10957
	10958	pl_yylval.ival = OP_NULL;
	10959
	10960	s = scan_str(start,FALSE,FALSE,FALSE,&t);
	10961	if (!s)
	10962	Perl_croak(aTHX_ "Transliteration pattern not terminated");
	10963
	10964	s = t;
	10965
	10966	s = scan_str(s,FALSE,FALSE,FALSE,NULL);
	10967	if (!s) {
	10968	SvREFCNT_dec_NN(PL_lex_stuff);
	10969	PL_lex_stuff = NULL;
	10970	Perl_croak(aTHX_ "Transliteration replacement not terminated");
	10971	}
	10972
	10973	complement = del = squash = 0;
	10974	while (1) {
	10975	switch (*s) {
	10976	case 'c':
	10977	complement = OPpTRANS_COMPLEMENT;
	10978	break;
	10979	case 'd':
	10980	del = OPpTRANS_DELETE;
	10981	break;
	10982	case 's':
	10983	squash = OPpTRANS_SQUASH;
	10984	break;
	10985	case 'r':
	10986	nondestruct = 1;
	10987	break;
	10988	default:
	10989	goto no_more;
	10990	}
	10991	s++;
	10992	}
	10993	no_more:
	10994
	10995	o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
	10996	o->op_private &= ~OPpTRANS_ALL;
	10997	o->op_private \|= del\|squash\|complement;
	10998
	10999	PL_lex_op = o;
	11000	pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
	11001
	11002
	11003	return s;
	11004	}
	11005
	11006	/* scan_heredoc
	11007	Takes a pointer to the first < in <<FOO.
	11008	Returns a pointer to the byte following <<FOO.
	11009
	11010	This function scans a heredoc, which involves different methods
	11011	depending on whether we are in a string eval, quoted construct, etc.
	11012	This is because PL_linestr could containing a single line of input, or
	11013	a whole string being evalled, or the contents of the current quote-
	11014	like operator.
	11015
	11016	The two basic methods are:
	11017	- Steal lines from the input stream
	11018	- Scan the heredoc in PL_linestr and remove it therefrom
	11019
	11020	In a file scope or filtered eval, the first method is used; in a
	11021	string eval, the second.
	11022
	11023	In a quote-like operator, we have to choose between the two,
	11024	depending on where we can find a newline. We peek into outer lex-
	11025	ing scopes until we find one with a newline in it. If we reach the
	11026	outermost lexing scope and it is a file, we use the stream method.
	11027	Otherwise it is treated as an eval.
	11028	*/
	11029
	11030	STATIC char *
	11031	S_scan_heredoc(pTHX_ char *s)
	11032	{
	11033	I32 op_type = OP_SCALAR;
	11034	I32 len;
	11035	SV *tmpstr;
	11036	char term;
	11037	char *d;
	11038	char *e;
	11039	char *peek;
	11040	char *indent = 0;
	11041	I32 indent_len = 0;
	11042	bool indented = FALSE;
	11043	const bool infile = PL_rsfp \|\| PL_parser->filtered;
	11044	const line_t origline = CopLINE(PL_curcop);
	11045	LEXSHARED *shared = PL_parser->lex_shared;
	11046
	11047	PERL_ARGS_ASSERT_SCAN_HEREDOC;
	11048
	11049	s += 2;
	11050	d = PL_tokenbuf + 1;
	11051	e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
	11052	*PL_tokenbuf = '\n';
	11053	peek = s;
	11054
	11055	if (*peek == '~') {
	11056	indented = TRUE;
	11057	peek++; s++;
	11058	}
	11059
	11060	while (SPACE_OR_TAB(*peek))
	11061	peek++;
	11062
	11063	if (peek == '`' \|\| peek == '\'' \|\| *peek =='"') {
	11064	s = peek;
	11065	term = *s++;
	11066	s = delimcpy(d, e, s, PL_bufend, term, &len);
	11067	if (s == PL_bufend)
	11068	Perl_croak(aTHX_ "Unterminated delimiter for here document");
	11069	d += len;
	11070	s++;
	11071	}
	11072	else {
	11073	if (*s == '\\')
	11074	/* <<\FOO is equivalent to <<'FOO' */
	11075	s++, term = '\'';
	11076	else
	11077	term = '"';
	11078
	11079	if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
	11080	Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
	11081
	11082	peek = s;
	11083
	11084	while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
	11085	peek += UTF ? UTF8SKIP(peek) : 1;
	11086	}
	11087
	11088	len = (peek - s >= e - d) ? (e - d) : (peek - s);
	11089	Copy(s, d, len, char);
	11090	s += len;
	11091	d += len;
	11092	}
	11093
	11094	if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
	11095	Perl_croak(aTHX_ "Delimiter for here document is too long");
	11096
	11097	*d++ = '\n';
	11098	*d = '\0';
	11099	len = d - PL_tokenbuf;
	11100
	11101	#ifndef PERL_STRICT_CR
	11102	d = (char *) memchr(s, '\r', PL_bufend - s);
	11103	if (d) {
	11104	char * const olds = s;
	11105	s = d;
	11106	while (s < PL_bufend) {
	11107	if (*s == '\r') {
	11108	*d++ = '\n';
	11109	if (*++s == '\n')
	11110	s++;
	11111	}
	11112	else if (s == '\n' && s[1] == '\r') { / \015\013 on a mac? */
	11113	d++ = s++;
	11114	s++;
	11115	}
	11116	else
	11117	d++ = s++;
	11118	}
	11119	*d = '\0';
	11120	PL_bufend = d;
	11121	SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
	11122	s = olds;
	11123	}
	11124	#endif
	11125
	11126	tmpstr = newSV_type(SVt_PVIV);
	11127	if (term == '\'') {
	11128	op_type = OP_CONST;
	11129	SvIV_set(tmpstr, -1);
	11130	}
	11131	else if (term == '`') {
	11132	op_type = OP_BACKTICK;
	11133	SvIV_set(tmpstr, '\\');
	11134	}
	11135
	11136	PL_multi_start = origline + 1 + PL_parser->herelines;
	11137	PL_multi_open = PL_multi_close = '<';
	11138
	11139	/* inside a string eval or quote-like operator */
	11140	if (!infile \|\| PL_lex_inwhat) {
	11141	SV *linestr;
	11142	char *bufend;
	11143	char * const olds = s;
	11144	PERL_CONTEXT * const cx = CX_CUR();
	11145	/* These two fields are not set until an inner lexing scope is
	11146	entered. But we need them set here. */
	11147	shared->ls_bufptr = s;
	11148	shared->ls_linestr = PL_linestr;
	11149
	11150	if (PL_lex_inwhat) {
	11151	/* Look for a newline. If the current buffer does not have one,
	11152	peek into the line buffer of the parent lexing scope, going
	11153	up as many levels as necessary to find one with a newline
	11154	after bufptr.
	11155	*/
	11156	while (!(s = (char *)memchr(
	11157	(void *)shared->ls_bufptr, '\n',
	11158	SvEND(shared->ls_linestr)-shared->ls_bufptr
	11159	)))
	11160	{
	11161	shared = shared->ls_prev;
	11162	/* shared is only null if we have gone beyond the outermost
	11163	lexing scope. In a file, we will have broken out of the
	11164	loop in the previous iteration. In an eval, the string buf-
	11165	fer ends with "\n;", so the while condition above will have
	11166	evaluated to false. So shared can never be null. Or so you
	11167	might think. Odd syntax errors like s;@{<<; can gobble up
	11168	the implicit semicolon at the end of a flie, causing the
	11169	file handle to be closed even when we are not in a string
	11170	eval. So shared may be null in that case.
	11171	(Closing '>>}' here to balance the earlier open brace for
	11172	editors that look for matched pairs.) */
	11173	if (UNLIKELY(!shared))
	11174	goto interminable;
	11175	/* A LEXSHARED struct with a null ls_prev pointer is the outer-
	11176	most lexing scope. In a file, shared->ls_linestr at that
	11177	level is just one line, so there is no body to steal. */
	11178	if (infile && !shared->ls_prev) {
	11179	s = olds;
	11180	goto streaming;
	11181	}
	11182	}
	11183	}
	11184	else { /* eval or we've already hit EOF */
	11185	s = (char)memchr((void)s, '\n', PL_bufend - s);
	11186	if (!s)
	11187	goto interminable;
	11188	}
	11189
	11190	linestr = shared->ls_linestr;
	11191	bufend = SvEND(linestr);
	11192	d = s;
	11193	if (indented) {
	11194	char *myolds = s;
	11195
	11196	while (s < bufend - len + 1) {
	11197	if (*s++ == '\n')
	11198	++PL_parser->herelines;
	11199
	11200	if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
	11201	char *backup = s;
	11202	indent_len = 0;
	11203
	11204	/* Only valid if it's preceded by whitespace only */
	11205	while (backup != myolds && --backup >= myolds) {
	11206	if (! SPACE_OR_TAB(*backup)) {
	11207	break;
	11208	}
	11209	indent_len++;
	11210	}
	11211
	11212	/* No whitespace or all! */
	11213	if (backup == s \|\| *backup == '\n') {
	11214	Newx(indent, indent_len + 1, char);
	11215	memcpy(indent, backup + 1, indent_len);
	11216	indent[indent_len] = 0;
	11217	s--; /* before our delimiter */
	11218	PL_parser->herelines--; /* this line doesn't count */
	11219	break;
	11220	}
	11221	}
	11222	}
	11223	}
	11224	else {
	11225	while (s < bufend - len + 1
	11226	&& memNE(s,PL_tokenbuf,len) )
	11227	{
	11228	if (*s++ == '\n')
	11229	++PL_parser->herelines;
	11230	}
	11231	}
	11232
	11233	if (s >= bufend - len + 1) {
	11234	goto interminable;
	11235	}
	11236
	11237	sv_setpvn_fresh(tmpstr,d+1,s-d);
	11238	s += len - 1;
	11239	/* the preceding stmt passes a newline */
	11240	PL_parser->herelines++;
	11241
	11242	/* s now points to the newline after the heredoc terminator.
	11243	d points to the newline before the body of the heredoc.
	11244	*/
	11245
	11246	/* We are going to modify linestr in place here, so set
	11247	aside copies of the string if necessary for re-evals or
	11248	(caller $n)[6]. */
	11249	/* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
	11250	check shared->re_eval_str. */
	11251	if (shared->re_eval_start \|\| shared->re_eval_str) {
	11252	/* Set aside the rest of the regexp */
	11253	if (!shared->re_eval_str)
	11254	shared->re_eval_str =
	11255	newSVpvn(shared->re_eval_start,
	11256	bufend - shared->re_eval_start);
	11257	shared->re_eval_start -= s-d;
	11258	}
	11259
	11260	if (cxstack_ix >= 0
	11261	&& CxTYPE(cx) == CXt_EVAL
	11262	&& CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
	11263	&& cx->blk_eval.cur_text == linestr)
	11264	{
	11265	cx->blk_eval.cur_text = newSVsv(linestr);
	11266	cx->blk_u16 \|= 0x40; /* indicate cur_text is ref counted */
	11267	}
	11268
	11269	/* Copy everything from s onwards back to d. */
	11270	Move(s,d,bufend-s + 1,char);
	11271	SvCUR_set(linestr, SvCUR(linestr) - (s-d));
	11272	/* Setting PL_bufend only applies when we have not dug deeper
	11273	into other scopes, because sublex_done sets PL_bufend to
	11274	SvEND(PL_linestr). */
	11275	if (shared == PL_parser->lex_shared)
	11276	PL_bufend = SvEND(linestr);
	11277	s = olds;
	11278	}
	11279	else {
	11280	SV *linestr_save;
	11281	char *oldbufptr_save;
	11282	char *oldoldbufptr_save;
	11283	streaming:
	11284	sv_grow_fresh(tmpstr, 80);
	11285	SvPVCLEAR_FRESH(tmpstr); /* avoid "uninitialized" warning */
	11286	term = PL_tokenbuf[1];
	11287	len--;
	11288	linestr_save = PL_linestr; /* must restore this afterwards */
	11289	d = s; /* and this */
	11290	oldbufptr_save = PL_oldbufptr;
	11291	oldoldbufptr_save = PL_oldoldbufptr;
	11292	PL_linestr = newSVpvs("");
	11293	PL_bufend = SvPVX(PL_linestr);
	11294
	11295	while (1) {
	11296	PL_bufptr = PL_bufend;
	11297	CopLINE_set(PL_curcop,
	11298	origline + 1 + PL_parser->herelines);
	11299
	11300	if ( !lex_next_chunk(LEX_NO_TERM)
	11301	&& (!SvCUR(tmpstr) \|\| SvEND(tmpstr)[-1] != '\n'))
	11302	{
	11303	/* Simply freeing linestr_save might seem simpler here, as it
	11304	does not matter what PL_linestr points to, since we are
	11305	about to croak; but in a quote-like op, linestr_save
	11306	will have been prospectively freed already, via
	11307	SAVEFREESV(PL_linestr) in sublex_push, so it's easier to
	11308	restore PL_linestr. */
	11309	SvREFCNT_dec_NN(PL_linestr);
	11310	PL_linestr = linestr_save;
	11311	PL_oldbufptr = oldbufptr_save;
	11312	PL_oldoldbufptr = oldoldbufptr_save;
	11313	goto interminable;
	11314	}
	11315
	11316	CopLINE_set(PL_curcop, origline);
	11317
	11318	if (!SvCUR(PL_linestr) \|\| PL_bufend[-1] != '\n') {
	11319	s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
	11320	/* ^That should be enough to avoid this needing to grow: */
	11321	sv_catpvs(PL_linestr, "\n\0");
	11322	assert(s == SvPVX(PL_linestr));
	11323	PL_bufend = SvEND(PL_linestr);
	11324	}
	11325
	11326	s = PL_bufptr;
	11327	PL_parser->herelines++;
	11328	PL_last_lop = PL_last_uni = NULL;
	11329
	11330	#ifndef PERL_STRICT_CR
	11331	if (PL_bufend - PL_linestart >= 2) {
	11332	if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
	11333	\|\| (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
	11334	{
	11335	PL_bufend[-2] = '\n';
	11336	PL_bufend--;
	11337	SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
	11338	}
	11339	else if (PL_bufend[-1] == '\r')
	11340	PL_bufend[-1] = '\n';
	11341	}
	11342	else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
	11343	PL_bufend[-1] = '\n';
	11344	#endif
	11345
	11346	if (indented && (PL_bufend-s) >= len) {
	11347	char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
	11348
	11349	if (found) {
	11350	char *backup = found;
	11351	indent_len = 0;
	11352
	11353	/* Only valid if it's preceded by whitespace only */
	11354	while (backup != s && --backup >= s) {
	11355	if (! SPACE_OR_TAB(*backup)) {
	11356	break;
	11357	}
	11358	indent_len++;
	11359	}
	11360
	11361	/* All whitespace or none! */
	11362	if (backup == found \|\| SPACE_OR_TAB(*backup)) {
	11363	Newx(indent, indent_len + 1, char);
	11364	memcpy(indent, backup, indent_len);
	11365	indent[indent_len] = 0;
	11366	SvREFCNT_dec(PL_linestr);
	11367	PL_linestr = linestr_save;
	11368	PL_linestart = SvPVX(linestr_save);
	11369	PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
	11370	PL_oldbufptr = oldbufptr_save;
	11371	PL_oldoldbufptr = oldoldbufptr_save;
	11372	s = d;
	11373	break;
	11374	}
	11375	}
	11376
	11377	/* Didn't find it */
	11378	sv_catsv(tmpstr,PL_linestr);
	11379	}
	11380	else {
	11381	if (*s == term && PL_bufend-s >= len
	11382	&& memEQ(s,PL_tokenbuf + 1,len))
	11383	{
	11384	SvREFCNT_dec(PL_linestr);
	11385	PL_linestr = linestr_save;
	11386	PL_linestart = SvPVX(linestr_save);
	11387	PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
	11388	PL_oldbufptr = oldbufptr_save;
	11389	PL_oldoldbufptr = oldoldbufptr_save;
	11390	s = d;
	11391	break;
	11392	}
	11393	else {
	11394	sv_catsv(tmpstr,PL_linestr);
	11395	}
	11396	}
	11397	} /* while (1) */
	11398	}
	11399
	11400	PL_multi_end = origline + PL_parser->herelines;
	11401
	11402	if (indented && indent) {
	11403	STRLEN linecount = 1;
	11404	STRLEN herelen = SvCUR(tmpstr);
	11405	char *ss = SvPVX(tmpstr);
	11406	char *se = ss + herelen;
	11407	SV *newstr = newSV(herelen+1);
	11408	SvPOK_on(newstr);
	11409
	11410	/* Trim leading whitespace */
	11411	while (ss < se) {
	11412	/* newline only? Copy and move on */
	11413	if (*ss == '\n') {
	11414	sv_catpvs(newstr,"\n");
	11415	ss++;
	11416	linecount++;
	11417
	11418	/* Found our indentation? Strip it */
	11419	}
	11420	else if (se - ss >= indent_len
	11421	&& memEQ(ss, indent, indent_len))
	11422	{
	11423	STRLEN le = 0;
	11424	ss += indent_len;
	11425
	11426	while ((ss + le) < se && *(ss + le) != '\n')
	11427	le++;
	11428
	11429	sv_catpvn(newstr, ss, le);
	11430	ss += le;
	11431
	11432	/* Line doesn't begin with our indentation? Croak */
	11433	}
	11434	else {
	11435	Safefree(indent);
	11436	Perl_croak(aTHX_
	11437	"Indentation on line %d of here-doc doesn't match delimiter",
	11438	(int)linecount
	11439	);
	11440	}
	11441	} /* while */
	11442
	11443	/* avoid sv_setsv() as we don't want to COW here */
	11444	sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
	11445	Safefree(indent);
	11446	SvREFCNT_dec_NN(newstr);
	11447	}
	11448
	11449	if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
	11450	SvPV_shrink_to_cur(tmpstr);
	11451	}
	11452
	11453	if (!IN_BYTES) {
	11454	if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
	11455	SvUTF8_on(tmpstr);
	11456	}
	11457
	11458	PL_lex_stuff = tmpstr;
	11459	pl_yylval.ival = op_type;
	11460	return s;
	11461
	11462	interminable:
	11463	if (indent)
	11464	Safefree(indent);
	11465	SvREFCNT_dec(tmpstr);
	11466	CopLINE_set(PL_curcop, origline);
	11467	missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
	11468	}
	11469
	11470
	11471	/* scan_inputsymbol
	11472	takes: position of first '<' in input buffer
	11473	returns: position of first char following the matching '>' in
	11474	input buffer
	11475	side-effects: pl_yylval and lex_op are set.
	11476
	11477	This code handles:
	11478
	11479	<> read from ARGV
	11480	<<>> read from ARGV without magic open
	11481	<FH> read from filehandle
	11482	<pkg::FH> read from package qualified filehandle
	11483	<pkg'FH> read from package qualified filehandle
	11484	<$fh> read from filehandle in $fh
	11485	<*.h> filename glob
	11486
	11487	*/
	11488
	11489	STATIC char *
	11490	S_scan_inputsymbol(pTHX_ char *start)
	11491	{
	11492	char s = start; / current position in buffer */
	11493	char *end;
	11494	I32 len;
	11495	bool nomagicopen = FALSE;
	11496	char d = PL_tokenbuf; / start of temp holding space */
	11497	const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
	11498
	11499	PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
	11500
	11501	end = (char *) memchr(s, '\n', PL_bufend - s);
	11502	if (!end)
	11503	end = PL_bufend;
	11504	if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
	11505	nomagicopen = TRUE;
	11506	*d = '\0';
	11507	len = 0;
	11508	s += 3;
	11509	}
	11510	else
	11511	s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
	11512
	11513	/* die if we didn't have space for the contents of the <>,
	11514	or if it didn't end, or if we see a newline
	11515	*/
	11516
	11517	if (len >= (I32)sizeof PL_tokenbuf)
	11518	Perl_croak(aTHX_ "Excessively long <> operator");
	11519	if (s >= end)
	11520	Perl_croak(aTHX_ "Unterminated <> operator");
	11521
	11522	s++;
	11523
	11524	/* check for <$fh>
	11525	Remember, only scalar variables are interpreted as filehandles by
	11526	this code. Anything more complex (e.g., <$fh{$num}>) will be
	11527	treated as a glob() call.
	11528	This code makes use of the fact that except for the $ at the front,
	11529	a scalar variable and a filehandle look the same.
	11530	*/
	11531	if (*d == '$' && d[1]) d++;
	11532
	11533	/* allow <Pkg'VALUE> or <Pkg::VALUE> */
	11534	while (isWORDCHAR_lazy_if_safe(d, e, UTF) \|\| d == '\'' \|\| d == ':') {
	11535	d += UTF ? UTF8SKIP(d) : 1;
	11536	}
	11537
	11538	/* If we've tried to read what we allow filehandles to look like, and
	11539	there's still text left, then it must be a glob() and not a getline.
	11540	Use scan_str to pull out the stuff between the <> and treat it
	11541	as nothing more than a string.
	11542	*/
	11543
	11544	if (d - PL_tokenbuf != len) {
	11545	pl_yylval.ival = OP_GLOB;
	11546	s = scan_str(start,FALSE,FALSE,FALSE,NULL);
	11547	if (!s)
	11548	Perl_croak(aTHX_ "Glob not terminated");
	11549	return s;
	11550	}
	11551	else {
	11552	bool readline_overridden = FALSE;
	11553	GV *gv_readline;
	11554	/* we're in a filehandle read situation */
	11555	d = PL_tokenbuf;
	11556
	11557	/* turn <> into <ARGV> */
	11558	if (!len)
	11559	Copy("ARGV",d,5,char);
	11560
	11561	/* Check whether readline() is overridden */
	11562	if ((gv_readline = gv_override("readline",8)))
	11563	readline_overridden = TRUE;
	11564
	11565	/* if <$fh>, create the ops to turn the variable into a
	11566	filehandle
	11567	*/
	11568	if (*d == '$') {
	11569	/* try to find it in the pad for this block, otherwise find
	11570	add symbol table ops
	11571	*/
	11572	const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
	11573	if (tmp != NOT_IN_PAD) {
	11574	if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
	11575	HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
	11576	HEK * const stashname = HvNAME_HEK(stash);
	11577	SV * const sym = newSVhek_mortal(stashname);
	11578	sv_catpvs(sym, "::");
	11579	sv_catpv(sym, d+1);
	11580	d = SvPVX(sym);
	11581	goto intro_sym;
	11582	}
	11583	else {
	11584	OP * const o = newPADxVOP(OP_PADSV, 0, tmp);
	11585	PL_lex_op = readline_overridden
	11586	? newUNOP(OP_ENTERSUB, OPf_STACKED,
	11587	op_append_elem(OP_LIST, o,
	11588	newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
	11589	: newUNOP(OP_READLINE, 0, o);
	11590	}
	11591	}
	11592	else {
	11593	GV *gv;
	11594	++d;
	11595	intro_sym:
	11596	gv = gv_fetchpv(d,
	11597	GV_ADDMULTI \| ( UTF ? SVf_UTF8 : 0 ),
	11598	SVt_PV);
	11599	PL_lex_op = readline_overridden
	11600	? newUNOP(OP_ENTERSUB, OPf_STACKED,
	11601	op_append_elem(OP_LIST,
	11602	newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
	11603	newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
	11604	: newUNOP(OP_READLINE, 0,
	11605	newUNOP(OP_RV2SV, 0,
	11606	newGVOP(OP_GV, 0, gv)));
	11607	}
	11608	/* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
	11609	pl_yylval.ival = OP_NULL;
	11610	}
	11611
	11612	/* If it's none of the above, it must be a literal filehandle
	11613	(<Foo::BAR> or <FOO>) so build a simple readline OP */
	11614	else {
	11615	GV * const gv = gv_fetchpv(d, GV_ADD \| ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
	11616	PL_lex_op = readline_overridden
	11617	? newUNOP(OP_ENTERSUB, OPf_STACKED,
	11618	op_append_elem(OP_LIST,
	11619	newGVOP(OP_GV, 0, gv),
	11620	newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
	11621	: newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
	11622	pl_yylval.ival = OP_NULL;
	11623
	11624	/* leave the token generation above to avoid confusing the parser */
	11625	if (!FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) {
	11626	no_bareword_filehandle(d);
	11627	}
	11628	}
	11629	}
	11630
	11631	return s;
	11632	}
	11633
	11634
	11635	/* scan_str
	11636	takes:
	11637	start position in buffer
	11638	keep_bracketed_quoted preserve \ quoting of embedded delimiters, but
	11639	only if they are of the open/close form
	11640	keep_delims preserve the delimiters around the string
	11641	re_reparse compiling a run-time /(?{})/:
	11642	collapse // to /, and skip encoding src
	11643	delimp if non-null, this is set to the position of
	11644	the closing delimiter, or just after it if
	11645	the closing and opening delimiters differ
	11646	(i.e., the opening delimiter of a substitu-
	11647	tion replacement)
	11648	returns: position to continue reading from buffer
	11649	side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
	11650	updates the read buffer.
	11651
	11652	This subroutine pulls a string out of the input. It is called for:
	11653	q single quotes q(literal text)
	11654	' single quotes 'literal text'
	11655	qq double quotes qq(interpolate $here please)
	11656	" double quotes "interpolate $here please"
	11657	qx backticks qx(/bin/ls -l)
	11658	` backticks `/bin/ls -l`
	11659	qw quote words @EXPORT_OK = qw( func() $spam )
	11660	m// regexp match m/this/
	11661	s/// regexp substitute s/this/that/
	11662	tr/// string transliterate tr/this/that/
	11663	y/// string transliterate y/this/that/
	11664	($*@) sub prototypes sub foo ($)
	11665	(stuff) sub attr parameters sub foo : attr(stuff)
	11666	<> readline or globs <FOO>, <>, <$fh>, or <*.c>
	11667
	11668	In most of these cases (all but <>, patterns and transliterate)
	11669	yylex() calls scan_str(). m// makes yylex() call scan_pat() which
	11670	calls scan_str(). s/// makes yylex() call scan_subst() which calls
	11671	scan_str(). tr/// and y/// make yylex() call scan_trans() which
	11672	calls scan_str().
	11673
	11674	It skips whitespace before the string starts, and treats the first
	11675	character as the delimiter. If the delimiter is one of ([{< then
	11676	the corresponding "close" character )]}> is used as the closing
	11677	delimiter. It allows quoting of delimiters, and if the string has
	11678	balanced delimiters ([{<>}]) it allows nesting.
	11679
	11680	On success, the SV with the resulting string is put into lex_stuff or,
	11681	if that is already non-NULL, into lex_repl. The second case occurs only
	11682	when parsing the RHS of the special constructs s/// and tr/// (y///).
	11683	For convenience, the terminating delimiter character is stuffed into
	11684	SvIVX of the SV.
	11685	*/
	11686
	11687	char *
	11688	Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
	11689	char **delimp
	11690	)
	11691	{
	11692	SV sv; / scalar value: string */
	11693	char s = start; / current position in the buffer */
	11694	char to; / current position in the sv's data */
	11695	int brackets = 1; /* bracket nesting level */
	11696	bool d_is_utf8 = FALSE; /* is there any utf8 content? */
	11697	UV open_delim_code; /* code point */
	11698	char open_delim_str[UTF8_MAXBYTES+1];
	11699	STRLEN delim_byte_len; /* each delimiter currently is the same number
	11700	of bytes */
	11701	line_t herelines;
	11702
	11703	/* The only non-UTF character that isn't a stand alone grapheme is
	11704	* white-space, hence can't be a delimiter. */
	11705	const char * non_grapheme_msg = "Use of unassigned code point or"
	11706	" non-standalone grapheme for a delimiter"
	11707	" is not allowed";
	11708	PERL_ARGS_ASSERT_SCAN_STR;
	11709
	11710	/* skip space before the delimiter */
	11711	if (isSPACE(s)) { / skipspace can change the buffer 's' is in, so
	11712	'start' also has to change */
	11713	s = start = skipspace(s);
	11714	}
	11715
	11716	/* mark where we are, in case we need to report errors */
	11717	CLINE;
	11718
	11719	/* after skipping whitespace, the next character is the delimiter */
	11720	if (! UTF \|\| UTF8_IS_INVARIANT(*s)) {
	11721	open_delim_code = (U8) *s;
	11722	open_delim_str[0] = *s;
	11723	delim_byte_len = 1;
	11724	}
	11725	else {
	11726	open_delim_code = utf8_to_uvchr_buf((U8)s, (U8)PL_bufend,
	11727	&delim_byte_len);
	11728	if (UNLIKELY(! is_grapheme((U8 *) start,
	11729	(U8 *) s,
	11730	(U8 *) PL_bufend,
	11731	open_delim_code)))
	11732	{
	11733	yyerror(non_grapheme_msg);
	11734	}
	11735
	11736	Copy(s, open_delim_str, delim_byte_len, char);
	11737	}
	11738	open_delim_str[delim_byte_len] = '\0'; /* Only for safety */
	11739
	11740
	11741	/* mark where we are */
	11742	PL_multi_start = CopLINE(PL_curcop);
	11743	PL_multi_open = open_delim_code;
	11744	herelines = PL_parser->herelines;
	11745
	11746	const char * legal_paired_opening_delims;
	11747	const char * legal_paired_closing_delims;
	11748	const char * deprecated_opening_delims;
	11749	if (FEATURE_MORE_DELIMS_IS_ENABLED) {
	11750	if (UTF) {
	11751	legal_paired_opening_delims = EXTRA_OPENING_UTF8_BRACKETS;
	11752	legal_paired_closing_delims = EXTRA_CLOSING_UTF8_BRACKETS;
	11753
	11754	/* We are deprecating using a closing delimiter as the opening, in
	11755	* case we want in the future to accept them reversed. The string
	11756	* may include ones that are legal, but the code below won't look
	11757	* at this string unless it didn't find a legal opening one */
	11758	deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS;
	11759	}
	11760	else {
	11761	legal_paired_opening_delims = EXTRA_OPENING_NON_UTF8_BRACKETS;
	11762	legal_paired_closing_delims = EXTRA_CLOSING_NON_UTF8_BRACKETS;
	11763	deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS;
	11764	}
	11765	}
	11766	else {
	11767	legal_paired_opening_delims = "([{<";
	11768	legal_paired_closing_delims = ")]}>";
	11769	deprecated_opening_delims = (UTF)
	11770	? DEPRECATED_OPENING_UTF8_BRACKETS
	11771	: DEPRECATED_OPENING_NON_UTF8_BRACKETS;
	11772	}
	11773
	11774	const char * legal_paired_opening_delims_end = legal_paired_opening_delims
	11775	+ strlen(legal_paired_opening_delims);
	11776	const char * deprecated_delims_end = deprecated_opening_delims
	11777	+ strlen(deprecated_opening_delims);
	11778
	11779	const char * close_delim_str = open_delim_str;
	11780	UV close_delim_code = open_delim_code;
	11781
	11782	/* If the delimiter has a mirror-image closing one, get it */
	11783	const char *tmps = ninstr(legal_paired_opening_delims,
	11784	legal_paired_opening_delims_end,
	11785	open_delim_str, open_delim_str + delim_byte_len);
	11786	if (tmps) {
	11787	/* Here, there is a paired delimiter, and tmps points to its position
	11788	in the string of the accepted opening paired delimiters. The
	11789	corresponding position in the string of closing ones is the
	11790	beginning of the paired mate. Both contain the same number of
	11791	bytes. */
	11792	close_delim_str = legal_paired_closing_delims
	11793	+ (tmps - legal_paired_opening_delims);
	11794
	11795	/* The list of paired delimiters contains all the ASCII ones that have
	11796	* always been legal, and no other ASCIIs. Don't raise a message if
	11797	* using one of these */
	11798	if (! isASCII(open_delim_code)) {
	11799	Perl_ck_warner_d(aTHX_
	11800	packWARN(WARN_EXPERIMENTAL__EXTRA_PAIRED_DELIMITERS),
	11801	"Use of '%" UTF8f "' is experimental as a string delimiter",
	11802	UTF8fARG(UTF, delim_byte_len, open_delim_str));
	11803	}
	11804
	11805	close_delim_code = (UTF)
	11806	? valid_utf8_to_uvchr((U8 *) close_delim_str, NULL)
	11807	: * (U8 *) close_delim_str;
	11808	}
	11809	else { /* Here, the delimiter isn't paired, hence the close is the same as
	11810	the open; and has already been set up. But make sure it isn't
	11811	deprecated to use this particular delimiter, as we plan
	11812	eventually to make it paired. */
	11813	if (ninstr(deprecated_opening_delims, deprecated_delims_end,
	11814	open_delim_str, open_delim_str + delim_byte_len))
	11815	{
	11816	Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED__DELIMITER_WILL_BE_PAIRED),
	11817	"Use of '%" UTF8f "' is deprecated as a string delimiter",
	11818	UTF8fARG(UTF, delim_byte_len, open_delim_str));
	11819	}
	11820
	11821	/* Note that a NUL may be used as a delimiter, and this happens when
	11822	* delimiting an empty string, and no special handling for it is
	11823	* needed, as ninstr() calls are used */
	11824	}
	11825
	11826	PL_multi_close = close_delim_code;
	11827
	11828	if (PL_multi_open == PL_multi_close) {
	11829	keep_bracketed_quoted = FALSE;
	11830	}
	11831
	11832	/* create a new SV to hold the contents. 79 is the SV's initial length.
	11833	What a random number. */
	11834	sv = newSV_type(SVt_PVIV);
	11835	sv_grow_fresh(sv, 79);
	11836	SvIV_set(sv, close_delim_code);
	11837	(void)SvPOK_only(sv); /* validate pointer */
	11838
	11839	/* move past delimiter and try to read a complete string */
	11840	if (keep_delims)
	11841	sv_catpvn(sv, s, delim_byte_len);
	11842	s += delim_byte_len;
	11843	for (;;) {
	11844	/* extend sv if need be */
	11845	SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
	11846	/* set 'to' to the next character in the sv's string */
	11847	to = SvPVX(sv)+SvCUR(sv);
	11848
	11849	/* read until we run out of string, or we find the closing delimiter */
	11850	while (s < PL_bufend) {
	11851	/* embedded newlines increment the line count */
	11852	if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
	11853	COPLINE_INC_WITH_HERELINES;
	11854
	11855	/* backslashes can escape the closing delimiter */
	11856	if ( *s == '\\' && s < PL_bufend - delim_byte_len
	11857
	11858	/* ... but not if the delimiter itself is a backslash */
	11859	&& close_delim_code != '\\')
	11860	{
	11861	/* Here, we have an escaping backslash. If we're supposed to
	11862	* discard those that escape the closing delimiter, just
	11863	* discard this one */
	11864	if ( ! keep_bracketed_quoted
	11865	&& ( memEQ(s + 1, open_delim_str, delim_byte_len)
	11866	\|\| ( PL_multi_open == PL_multi_close
	11867	&& re_reparse && s[1] == '\\')
	11868	\|\| memEQ(s + 1, close_delim_str, delim_byte_len)))
	11869	{
	11870	s++;
	11871	}
	11872	else /* any other escapes are simply copied straight through */
	11873	to++ = s++;
	11874	}
	11875	else if ( s < PL_bufend - (delim_byte_len - 1)
	11876	&& memEQ(s, close_delim_str, delim_byte_len)
	11877	&& --brackets <= 0)
	11878	{
	11879	/* Found unescaped closing delimiter, unnested if we care about
	11880	* that; so are done.
	11881	*
	11882	* In the case of the opening and closing delimiters being
	11883	* different, we have to deal with nesting; the conditional
	11884	* above makes sure we don't get here until the nesting level,
	11885	* 'brackets', is back down to zero. In the other case,
	11886	* nesting isn't an issue, and 'brackets' never can get
	11887	* incremented above 0, so will come here at the first closing
	11888	* delimiter.
	11889	*
	11890	* Only grapheme delimiters are legal. */
	11891	if ( UTF /* All Non-UTF-8's are graphemes */
	11892	&& UNLIKELY(! is_grapheme((U8 *) start,
	11893	(U8 *) s,
	11894	(U8 *) PL_bufend,
	11895	close_delim_code)))
	11896	{
	11897	yyerror(non_grapheme_msg);
	11898	}
	11899
	11900	break;
	11901	}
	11902	/* No nesting if open eq close */
	11903	else if ( PL_multi_open != PL_multi_close
	11904	&& s < PL_bufend - (delim_byte_len - 1)
	11905	&& memEQ(s, open_delim_str, delim_byte_len))
	11906	{
	11907	brackets++;
	11908	}
	11909
	11910	/* Here, still in the middle of the string; copy this character */
	11911	if (! UTF \|\| UTF8_IS_INVARIANT((U8) *s)) {
	11912	to++ = s++;
	11913	}
	11914	else {
	11915	size_t this_char_len = UTF8SKIP(s);
	11916	Copy(s, to, this_char_len, char);
	11917	s += this_char_len;
	11918	to += this_char_len;
	11919
	11920	d_is_utf8 = TRUE;
	11921	}
	11922	} /* End of loop through buffer */
	11923
	11924	/* Here, found end of the string, OR ran out of buffer: terminate the
	11925	* copied string and update the sv's end-of-string */
	11926	*to = '\0';
	11927	SvCUR_set(sv, to - SvPVX_const(sv));
	11928
	11929	/*
	11930	* this next chunk reads more into the buffer if we're not done yet
	11931	*/
	11932
	11933	if (s < PL_bufend)
	11934	break; /* handle case where we are done yet :-) */
	11935
	11936	#ifndef PERL_STRICT_CR
	11937	if (to - SvPVX_const(sv) >= 2) {
	11938	if ( (to[-2] == '\r' && to[-1] == '\n')
	11939	\|\| (to[-2] == '\n' && to[-1] == '\r'))
	11940	{
	11941	to[-2] = '\n';
	11942	to--;
	11943	SvCUR_set(sv, to - SvPVX_const(sv));
	11944	}
	11945	else if (to[-1] == '\r')
	11946	to[-1] = '\n';
	11947	}
	11948	else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
	11949	to[-1] = '\n';
	11950	#endif
	11951
	11952	/* if we're out of file, or a read fails, bail and reset the current
	11953	line marker so we can report where the unterminated string began
	11954	*/
	11955	COPLINE_INC_WITH_HERELINES;
	11956	PL_bufptr = PL_bufend;
	11957	if (!lex_next_chunk(0)) {
	11958	ASSUME(sv);
	11959	SvREFCNT_dec(sv);
	11960	CopLINE_set(PL_curcop, (line_t)PL_multi_start);
	11961	return NULL;
	11962	}
	11963	s = start = PL_bufptr;
	11964	} /* End of infinite loop */
	11965
	11966	/* at this point, we have successfully read the delimited string */
	11967
	11968	if (keep_delims)
	11969	sv_catpvn(sv, s, delim_byte_len);
	11970	s += delim_byte_len;
	11971
	11972	if (d_is_utf8)
	11973	SvUTF8_on(sv);
	11974
	11975	PL_multi_end = CopLINE(PL_curcop);
	11976	CopLINE_set(PL_curcop, PL_multi_start);
	11977	PL_parser->herelines = herelines;
	11978
	11979	/* if we allocated too much space, give some back */
	11980	if (SvCUR(sv) + 5 < SvLEN(sv)) {
	11981	SvLEN_set(sv, SvCUR(sv) + 1);
	11982	SvPV_shrink_to_cur(sv);
	11983	}
	11984
	11985	/* decide whether this is the first or second quoted string we've read
	11986	for this op
	11987	*/
	11988
	11989	if (PL_lex_stuff)
	11990	PL_parser->lex_sub_repl = sv;
	11991	else
	11992	PL_lex_stuff = sv;
	11993	if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-delim_byte_len : s;
	11994	return s;
	11995	}
	11996
	11997	/*
	11998	scan_num
	11999	takes: pointer to position in buffer
	12000	returns: pointer to new position in buffer
	12001	side-effects: builds ops for the constant in pl_yylval.op
	12002
	12003	Read a number in any of the formats that Perl accepts:
	12004
	12005	\d(_?\d)(\.(\d(_?\d))?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
	12006	\.\d(_?\d)[Ee][\+\-]?(\d(_?\d)) .34
	12007	0b[01](_?[01])* binary integers
	12008	0o?[0-7](_?[0-7])* octal integers
	12009	0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers
	12010	0x[0-9A-Fa-f](_?[0-9A-Fa-f])(?:\.\d)?p[+-]?[0-9]+ hexadecimal floats
	12011
	12012	Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
	12013	thing it reads.
	12014
	12015	If it reads a number without a decimal point or an exponent, it will
	12016	try converting the number to an integer and see if it can do so
	12017	without loss of precision.
	12018	*/
	12019
	12020	char *
	12021	Perl_scan_num(pTHX_ const char start, YYSTYPE lvalp)
	12022	{
	12023	const char s = start; / current position in buffer */
	12024	char d; / destination in temp buffer */
	12025	char e; / end of temp buffer */
	12026	NV nv; /* number read, as a double */
	12027	SV sv = NULL; / place to put the converted number */
	12028	bool floatit; /* boolean: int or float? */
	12029	const char lastub = NULL; / position of last underbar */
	12030	static const char* const number_too_long = "Number too long";
	12031	bool warned_about_underscore = 0;
	12032	I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
	12033	#define WARN_ABOUT_UNDERSCORE() \
	12034	do { \
	12035	if (!warned_about_underscore) { \
	12036	warned_about_underscore = 1; \
	12037	Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
	12038	"Misplaced _ in number"); \
	12039	} \
	12040	} while(0)
	12041	/* Hexadecimal floating point.
	12042	*
	12043	* In many places (where we have quads and NV is IEEE 754 double)
	12044	* we can fit the mantissa bits of a NV into an unsigned quad.
	12045	* (Note that UVs might not be quads even when we have quads.)
	12046	* This will not work everywhere, though (either no quads, or
	12047	* using long doubles), in which case we have to resort to NV,
	12048	* which will probably mean horrible loss of precision due to
	12049	* multiple fp operations. */
	12050	bool hexfp = FALSE;
	12051	int total_bits = 0;
	12052	int significant_bits = 0;
	12053	#if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
	12054	# define HEXFP_UQUAD
	12055	Uquad_t hexfp_uquad = 0;
	12056	int hexfp_frac_bits = 0;
	12057	#else
	12058	# define HEXFP_NV
	12059	NV hexfp_nv = 0.0;
	12060	#endif
	12061	NV hexfp_mult = 1.0;
	12062	UV high_non_zero = 0; /* highest digit */
	12063	int non_zero_integer_digits = 0;
	12064	bool new_octal = FALSE; /* octal with "0o" prefix */
	12065
	12066	PERL_ARGS_ASSERT_SCAN_NUM;
	12067
	12068	/* We use the first character to decide what type of number this is */
	12069
	12070	switch (*s) {
	12071	default:
	12072	Perl_croak(aTHX_ "panic: scan_num, s=%d", s);
	12073
	12074	/* if it starts with a 0, it could be an octal number, a decimal in
	12075	0.13 disguise, or a hexadecimal number, or a binary number. */
	12076	case '0':
	12077	{
	12078	/* variables:
	12079	u holds the "number so far"
	12080	overflowed was the number more than we can hold?
	12081
	12082	Shift is used when we add a digit. It also serves as an "are
	12083	we in octal/hex/binary?" indicator to disallow hex characters
	12084	when in octal mode.
	12085	*/
	12086	NV n = 0.0;
	12087	UV u = 0;
	12088	bool overflowed = FALSE;
	12089	bool just_zero = TRUE; /* just plain 0 or binary number? */
	12090	bool has_digs = FALSE;
	12091	static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
	12092	static const char* const bases[5] =
	12093	{ "", "binary", "", "octal", "hexadecimal" };
	12094	static const char* const Bases[5] =
	12095	{ "", "Binary", "", "Octal", "Hexadecimal" };
	12096	static const char* const maxima[5] =
	12097	{ "",
	12098	"0b11111111111111111111111111111111",
	12099	"",
	12100	"037777777777",
	12101	"0xffffffff" };
	12102
	12103	/* check for hex */
	12104	if (isALPHA_FOLD_EQ(s[1], 'x')) {
	12105	shift = 4;
	12106	s += 2;
	12107	just_zero = FALSE;
	12108	} else if (isALPHA_FOLD_EQ(s[1], 'b')) {
	12109	shift = 1;
	12110	s += 2;
	12111	just_zero = FALSE;
	12112	}
	12113	/* check for a decimal in disguise */
	12114	else if (s[1] == '.' \|\| isALPHA_FOLD_EQ(s[1], 'e'))
	12115	goto decimal;
	12116	/* so it must be octal */
	12117	else {
	12118	shift = 3;
	12119	s++;
	12120	if (isALPHA_FOLD_EQ(*s, 'o')) {
	12121	s++;
	12122	just_zero = FALSE;
	12123	new_octal = TRUE;
	12124	}
	12125	}
	12126
	12127	if (*s == '_') {
	12128	WARN_ABOUT_UNDERSCORE();
	12129	lastub = s++;
	12130	}
	12131
	12132	/* read the rest of the number */
	12133	for (;;) {
	12134	/* x is used in the overflow test,
	12135	b is the digit we're adding on. */
	12136	UV x, b;
	12137
	12138	switch (*s) {
	12139
	12140	/* if we don't mention it, we're done */
	12141	default:
	12142	goto out;
	12143
	12144	/* _ are ignored -- but warned about if consecutive */
	12145	case '_':
	12146	if (lastub && s == lastub + 1)
	12147	WARN_ABOUT_UNDERSCORE();
	12148	lastub = s++;
	12149	break;
	12150
	12151	/* 8 and 9 are not octal */
	12152	case '8': case '9':
	12153	if (shift == 3)
	12154	yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
	12155	/* FALLTHROUGH */
	12156
	12157	/* octal digits */
	12158	case '2': case '3': case '4':
	12159	case '5': case '6': case '7':
	12160	if (shift == 1)
	12161	yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
	12162	/* FALLTHROUGH */
	12163
	12164	case '0': case '1':
	12165	b = s++ & 15; / ASCII digit -> value of digit */
	12166	goto digit;
	12167
	12168	/* hex digits */
	12169	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
	12170	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
	12171	/* make sure they said 0x */
	12172	if (shift != 4)
	12173	goto out;
	12174	b = (*s++ & 7) + 9;
	12175
	12176	/* Prepare to put the digit we have onto the end
	12177	of the number so far. We check for overflows.
	12178	*/
	12179
	12180	digit:
	12181	just_zero = FALSE;
	12182	has_digs = TRUE;
	12183	if (!overflowed) {
	12184	assert(shift >= 0);
	12185	x = u << shift; /* make room for the digit */
	12186
	12187	total_bits += shift;
	12188
	12189	if ((x >> shift) != u
	12190	&& !(PL_hints & HINT_NEW_BINARY)) {
	12191	overflowed = TRUE;
	12192	n = (NV) u;
	12193	Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
	12194	"Integer overflow in %s number",
	12195	bases[shift]);
	12196	} else
	12197	u = x \| b; /* add the digit to the end */
	12198	}
	12199	if (overflowed) {
	12200	n *= nvshift[shift];
	12201	/* If an NV has not enough bits in its
	12202	* mantissa to represent an UV this summing of
	12203	* small low-order numbers is a waste of time
	12204	* (because the NV cannot preserve the
	12205	* low-order bits anyway): we could just
	12206	* remember when did we overflow and in the
	12207	* end just multiply n by the right
	12208	* amount. */
	12209	n += (NV) b;
	12210	}
	12211
	12212	if (high_non_zero == 0 && b > 0)
	12213	high_non_zero = b;
	12214
	12215	if (high_non_zero)
	12216	non_zero_integer_digits++;
	12217
	12218	/* this could be hexfp, but peek ahead
	12219	* to avoid matching ".." */
	12220	if (UNLIKELY(HEXFP_PEEK(s))) {
	12221	goto out;
	12222	}
	12223
	12224	break;
	12225	}
	12226	}
	12227
	12228	/* if we get here, we had success: make a scalar value from
	12229	the number.
	12230	*/
	12231	out:
	12232
	12233	/* final misplaced underbar check */
	12234	if (s[-1] == '_')
	12235	WARN_ABOUT_UNDERSCORE();
	12236
	12237	if (UNLIKELY(HEXFP_PEEK(s))) {
	12238	/* Do sloppy (on the underbars) but quick detection
	12239	* (and value construction) for hexfp, the decimal
	12240	* detection will shortly be more thorough with the
	12241	* underbar checks. */
	12242	const char* h = s;
	12243	significant_bits = non_zero_integer_digits * shift;
	12244	#ifdef HEXFP_UQUAD
	12245	hexfp_uquad = u;
	12246	#else /* HEXFP_NV */
	12247	hexfp_nv = u;
	12248	#endif
	12249	/* Ignore the leading zero bits of
	12250	* the high (first) non-zero digit. */
	12251	if (high_non_zero) {
	12252	if (high_non_zero < 0x8)
	12253	significant_bits--;
	12254	if (high_non_zero < 0x4)
	12255	significant_bits--;
	12256	if (high_non_zero < 0x2)
	12257	significant_bits--;
	12258	}
	12259
	12260	if (*h == '.') {
	12261	#ifdef HEXFP_NV
	12262	NV nv_mult = 1.0;
	12263	#endif
	12264	bool accumulate = TRUE;
	12265	U8 b = 0; /* silence compiler warning */
	12266	int lim = 1 << shift;
	12267	for (h++; ((isXDIGIT(h) && (b = XDIGIT_VALUE(h)) < lim) \|\|
	12268	*h == '_'); h++) {
	12269	if (isXDIGIT(*h)) {
	12270	significant_bits += shift;
	12271	#ifdef HEXFP_UQUAD
	12272	if (accumulate) {
	12273	if (significant_bits < NV_MANT_DIG) {
	12274	/* We are in the long "run" of xdigits,
	12275	* accumulate the full four bits. */
	12276	assert(shift >= 0);
	12277	hexfp_uquad <<= shift;
	12278	hexfp_uquad \|= b;
	12279	hexfp_frac_bits += shift;
	12280	} else if (significant_bits - shift < NV_MANT_DIG) {
	12281	/* We are at a hexdigit either at,
	12282	* or straddling, the edge of mantissa.
	12283	* We will try grabbing as many as
	12284	* possible bits. */
	12285	int tail =
	12286	significant_bits - NV_MANT_DIG;
	12287	if (tail <= 0)
	12288	tail += shift;
	12289	assert(tail >= 0);
	12290	hexfp_uquad <<= tail;
	12291	assert((shift - tail) >= 0);
	12292	hexfp_uquad \|= b >> (shift - tail);
	12293	hexfp_frac_bits += tail;
	12294
	12295	/* Ignore the trailing zero bits
	12296	* of the last non-zero xdigit.
	12297	*
	12298	* The assumption here is that if
	12299	* one has input of e.g. the xdigit
	12300	* eight (0x8), there is only one
	12301	* bit being input, not the full
	12302	* four bits. Conversely, if one
	12303	* specifies a zero xdigit, the
	12304	* assumption is that one really
	12305	* wants all those bits to be zero. */
	12306	if (b) {
	12307	if ((b & 0x1) == 0x0) {
	12308	significant_bits--;
	12309	if ((b & 0x2) == 0x0) {
	12310	significant_bits--;
	12311	if ((b & 0x4) == 0x0) {
	12312	significant_bits--;
	12313	}
	12314	}
	12315	}
	12316	}
	12317
	12318	accumulate = FALSE;
	12319	}
	12320	} else {
	12321	/* Keep skipping the xdigits, and
	12322	* accumulating the significant bits,
	12323	* but do not shift the uquad
	12324	* (which would catastrophically drop
	12325	* high-order bits) or accumulate the
	12326	* xdigits anymore. */
	12327	}
	12328	#else /* HEXFP_NV */
	12329	if (accumulate) {
	12330	nv_mult /= nvshift[shift];
	12331	if (nv_mult > 0.0)
	12332	hexfp_nv += b * nv_mult;
	12333	else
	12334	accumulate = FALSE;
	12335	}
	12336	#endif
	12337	}
	12338	if (significant_bits >= NV_MANT_DIG)
	12339	accumulate = FALSE;
	12340	}
	12341	}
	12342
	12343	if ((total_bits > 0 \|\| significant_bits > 0) &&
	12344	isALPHA_FOLD_EQ(*h, 'p')) {
	12345	bool negexp = FALSE;
	12346	h++;
	12347	if (*h == '+')
	12348	h++;
	12349	else if (*h == '-') {
	12350	negexp = TRUE;
	12351	h++;
	12352	}
	12353	if (isDIGIT(*h)) {
	12354	I32 hexfp_exp = 0;
	12355	while (isDIGIT(h) \|\| h == '_') {
	12356	if (isDIGIT(*h)) {
	12357	hexfp_exp *= 10;
	12358	hexfp_exp += *h - '0';
	12359	#ifdef NV_MIN_EXP
	12360	if (negexp
	12361	&& -hexfp_exp < NV_MIN_EXP - 1) {
	12362	/* NOTE: this means that the exponent
	12363	* underflow warning happens for
	12364	* the IEEE 754 subnormals (denormals),
	12365	* because DBL_MIN_EXP etc are the lowest
	12366	* possible binary (or, rather, DBL_RADIX-base)
	12367	* exponent for normals, not subnormals.
	12368	*
	12369	* This may or may not be a good thing. */
	12370	Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
	12371	"Hexadecimal float: exponent underflow");
	12372	break;
	12373	}
	12374	#endif
	12375	#ifdef NV_MAX_EXP
	12376	if (!negexp
	12377	&& hexfp_exp > NV_MAX_EXP - 1) {
	12378	Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
	12379	"Hexadecimal float: exponent overflow");
	12380	break;
	12381	}
	12382	#endif
	12383	}
	12384	h++;
	12385	}
	12386	if (negexp)
	12387	hexfp_exp = -hexfp_exp;
	12388	#ifdef HEXFP_UQUAD
	12389	hexfp_exp -= hexfp_frac_bits;
	12390	#endif
	12391	hexfp_mult = Perl_pow(2.0, hexfp_exp);
	12392	hexfp = TRUE;
	12393	goto decimal;
	12394	}
	12395	}
	12396	}
	12397
	12398	if (!just_zero && !has_digs) {
	12399	/* 0x, 0o or 0b with no digits, treat it as an error.
	12400	Originally this backed up the parse before the b or
	12401	x, but that has the potential for silent changes in
	12402	behaviour, like for: "0x.3" and "0x+$foo".
	12403	*/
	12404	const char *d = s;
	12405	char *oldbp = PL_bufptr;
	12406	if (d) ++d; / so the user sees the bad non-digit */
	12407	PL_bufptr = (char )d; / so yyerror reports the context */
	12408	yyerror(Perl_form(aTHX_ "No digits found for %s literal",
	12409	bases[shift]));
	12410	PL_bufptr = oldbp;
	12411	}
	12412
	12413	if (overflowed) {
	12414	if (n > 4294967295.0)
	12415	Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
	12416	"%s number > %s non-portable",
	12417	Bases[shift],
	12418	new_octal ? "0o37777777777" : maxima[shift]);
	12419	sv = newSVnv(n);
	12420	}
	12421	else {
	12422	#if UVSIZE > 4
	12423	if (u > 0xffffffff)
	12424	Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
	12425	"%s number > %s non-portable",
	12426	Bases[shift],
	12427	new_octal ? "0o37777777777" : maxima[shift]);
	12428	#endif
	12429	sv = newSVuv(u);
	12430	}
	12431	if (just_zero && (PL_hints & HINT_NEW_INTEGER))
	12432	sv = new_constant(start, s - start, "integer",
	12433	sv, NULL, NULL, 0, NULL);
	12434	else if (PL_hints & HINT_NEW_BINARY)
	12435	sv = new_constant(start, s - start, "binary",
	12436	sv, NULL, NULL, 0, NULL);
	12437	}
	12438	break;
	12439
	12440	/*
	12441	handle decimal numbers.
	12442	we're also sent here when we read a 0 as the first digit
	12443	*/
	12444	case '1': case '2': case '3': case '4': case '5':
	12445	case '6': case '7': case '8': case '9': case '.':
	12446	decimal:
	12447	d = PL_tokenbuf;
	12448	e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
	12449	floatit = FALSE;
	12450	if (hexfp) {
	12451	floatit = TRUE;
	12452	*d++ = '0';
	12453	switch (shift) {
	12454	case 4:
	12455	*d++ = 'x';
	12456	s = start + 2;
	12457	break;
	12458	case 3:
	12459	if (new_octal) {
	12460	*d++ = 'o';
	12461	s = start + 2;
	12462	break;
	12463	}
	12464	s = start + 1;
	12465	break;
	12466	case 1:
	12467	*d++ = 'b';
	12468	s = start + 2;
	12469	break;
	12470	default:
	12471	NOT_REACHED; /* NOTREACHED */
	12472	}
	12473	}
	12474
	12475	/* read next group of digits and _ and copy into d */
	12476	while (isDIGIT(*s)
	12477	\|\| *s == '_'
	12478	\|\| UNLIKELY(hexfp && isXDIGIT(*s)))
	12479	{
	12480	/* skip underscores, checking for misplaced ones
	12481	if -w is on
	12482	*/
	12483	if (*s == '_') {
	12484	if (lastub && s == lastub + 1)
	12485	WARN_ABOUT_UNDERSCORE();
	12486	lastub = s++;
	12487	}
	12488	else {
	12489	/* check for end of fixed-length buffer */
	12490	if (d >= e)
	12491	Perl_croak(aTHX_ "%s", number_too_long);
	12492	/* if we're ok, copy the character */
	12493	d++ = s++;
	12494	}
	12495	}
	12496
	12497	/* final misplaced underbar check */
	12498	if (lastub && s == lastub + 1)
	12499	WARN_ABOUT_UNDERSCORE();
	12500
	12501	/* read a decimal portion if there is one. avoid
	12502	3..5 being interpreted as the number 3. followed
	12503	by .5
	12504	*/
	12505	if (*s == '.' && s[1] != '.') {
	12506	floatit = TRUE;
	12507	d++ = s++;
	12508
	12509	if (*s == '_') {
	12510	WARN_ABOUT_UNDERSCORE();
	12511	lastub = s;
	12512	}
	12513
	12514	/* copy, ignoring underbars, until we run out of digits.
	12515	*/
	12516	for (; isDIGIT(*s)
	12517	\|\| *s == '_'
	12518	\|\| UNLIKELY(hexfp && isXDIGIT(*s));
	12519	s++)
	12520	{
	12521	/* fixed length buffer check */
	12522	if (d >= e)
	12523	Perl_croak(aTHX_ "%s", number_too_long);
	12524	if (*s == '_') {
	12525	if (lastub && s == lastub + 1)
	12526	WARN_ABOUT_UNDERSCORE();
	12527	lastub = s;
	12528	}
	12529	else
	12530	d++ = s;
	12531	}
	12532	/* fractional part ending in underbar? */
	12533	if (s[-1] == '_')
	12534	WARN_ABOUT_UNDERSCORE();
	12535	if (*s == '.' && isDIGIT(s[1])) {
	12536	/* oops, it's really a v-string, but without the "v" */
	12537	s = start;
	12538	goto vstring;
	12539	}
	12540	}
	12541
	12542	/* read exponent part, if present */
	12543	if ((isALPHA_FOLD_EQ(*s, 'e')
	12544	\|\| UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
	12545	&& memCHRs("+-0123456789_", s[1]))
	12546	{
	12547	int exp_digits = 0;
	12548	const char *save_s = s;
	12549	char * save_d = d;
	12550
	12551	/* regardless of whether user said 3E5 or 3e5, use lower 'e',
	12552	ditto for p (hexfloats) */
	12553	if ((isALPHA_FOLD_EQ(*s, 'e'))) {
	12554	/* At least some Mach atof()s don't grok 'E' */
	12555	*d++ = 'e';
	12556	}
	12557	else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
	12558	*d++ = 'p';
	12559	}
	12560
	12561	s++;
	12562
	12563
	12564	/* stray preinitial _ */
	12565	if (*s == '_') {
	12566	WARN_ABOUT_UNDERSCORE();
	12567	lastub = s++;
	12568	}
	12569
	12570	/* allow positive or negative exponent */
	12571	if (s == '+' \|\| s == '-')
	12572	d++ = s++;
	12573
	12574	/* stray initial _ */
	12575	if (*s == '_') {
	12576	WARN_ABOUT_UNDERSCORE();
	12577	lastub = s++;
	12578	}
	12579
	12580	/* read digits of exponent */
	12581	while (isDIGIT(s) \|\| s == '_') {
	12582	if (isDIGIT(*s)) {
	12583	++exp_digits;
	12584	if (d >= e)
	12585	Perl_croak(aTHX_ "%s", number_too_long);
	12586	d++ = s++;
	12587	}
	12588	else {
	12589	if (((lastub && s == lastub + 1)
	12590	\|\| (!isDIGIT(s[1]) && s[1] != '_')))
	12591	WARN_ABOUT_UNDERSCORE();
	12592	lastub = s++;
	12593	}
	12594	}
	12595
	12596	if (!exp_digits) {
	12597	/* no exponent digits, the [eEpP] could be for something else,
	12598	* though in practice we don't get here for p since that's preparsed
	12599	* earlier, and results in only the 0xX being consumed, so behave similarly
	12600	* for decimal floats and consume only the D.DD, leaving the [eE] to the
	12601	* next token.
	12602	*/
	12603	s = save_s;
	12604	d = save_d;
	12605	}
	12606	else {
	12607	floatit = TRUE;
	12608	}
	12609	}
	12610
	12611
	12612	/*
	12613	We try to do an integer conversion first if no characters
	12614	indicating "float" have been found.
	12615	*/
	12616
	12617	if (!floatit) {
	12618	UV uv;
	12619	const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
	12620
	12621	if (flags == IS_NUMBER_IN_UV) {
	12622	if (uv <= IV_MAX)
	12623	sv = newSViv(uv); /* Prefer IVs over UVs. */
	12624	else
	12625	sv = newSVuv(uv);
	12626	} else if (flags == (IS_NUMBER_IN_UV \| IS_NUMBER_NEG)) {
	12627	if (uv <= (UV) IV_MIN)
	12628	sv = newSViv(-(IV)uv);
	12629	else
	12630	floatit = TRUE;
	12631	} else
	12632	floatit = TRUE;
	12633	}
	12634	if (floatit) {
	12635	/* terminate the string */
	12636	*d = '\0';
	12637	if (UNLIKELY(hexfp)) {
	12638	# ifdef NV_MANT_DIG
	12639	if (significant_bits > NV_MANT_DIG)
	12640	Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
	12641	"Hexadecimal float: mantissa overflow");
	12642	# endif
	12643	#ifdef HEXFP_UQUAD
	12644	nv = hexfp_uquad * hexfp_mult;
	12645	#else /* HEXFP_NV */
	12646	nv = hexfp_nv * hexfp_mult;
	12647	#endif
	12648	} else {
	12649	nv = Atof(PL_tokenbuf);
	12650	}
	12651	sv = newSVnv(nv);
	12652	}
	12653
	12654	if ( floatit
	12655	? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
	12656	const char *const key = floatit ? "float" : "integer";
	12657	const STRLEN keylen = floatit ? 5 : 7;
	12658	sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
	12659	key, keylen, sv, NULL, NULL, 0, NULL);
	12660	}
	12661	break;
	12662
	12663	/* if it starts with a v, it could be a v-string */
	12664	case 'v':
	12665	vstring:
	12666	sv = newSV(5); /* preallocate storage space */
	12667	ENTER_with_name("scan_vstring");
	12668	SAVEFREESV(sv);
	12669	s = scan_vstring(s, PL_bufend, sv);
	12670	SvREFCNT_inc_simple_void_NN(sv);
	12671	LEAVE_with_name("scan_vstring");
	12672	break;
	12673	}
	12674
	12675	/* make the op for the constant and return */
	12676
	12677	if (sv)
	12678	lvalp->opval = newSVOP(OP_CONST, 0, sv);
	12679	else
	12680	lvalp->opval = NULL;
	12681
	12682	return (char *)s;
	12683	}
	12684
	12685	STATIC char *
	12686	S_scan_formline(pTHX_ char *s)
	12687	{
	12688	SV * const stuff = newSVpvs("");
	12689	bool needargs = FALSE;
	12690	bool eofmt = FALSE;
	12691
	12692	PERL_ARGS_ASSERT_SCAN_FORMLINE;
	12693
	12694	while (!needargs) {
	12695	char *eol;
	12696	if (*s == '.') {
	12697	char *t = s+1;
	12698	#ifdef PERL_STRICT_CR
	12699	while (SPACE_OR_TAB(*t))
	12700	t++;
	12701	#else
	12702	while (SPACE_OR_TAB(t) \|\| t == '\r')
	12703	t++;
	12704	#endif
	12705	if (*t == '\n' \|\| t == PL_bufend) {
	12706	eofmt = TRUE;
	12707	break;
	12708	}
	12709	}
	12710	eol = (char *) memchr(s,'\n',PL_bufend-s);
	12711	if (! eol) {
	12712	eol = PL_bufend;
	12713	}
	12714	else {
	12715	eol++;
	12716	}
	12717	if (*s != '#') {
	12718	char *t;
	12719	for (t = s; t < eol; t++) {
	12720	if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
	12721	needargs = FALSE;
	12722	goto enough; /* ~~ must be first line in formline */
	12723	}
	12724	if (t == '@' \|\| t == '^')
	12725	needargs = TRUE;
	12726	}
	12727	if (eol > s) {
	12728	sv_catpvn(stuff, s, eol-s);
	12729	#ifndef PERL_STRICT_CR
	12730	if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
	12731	char *end = SvPVX(stuff) + SvCUR(stuff);
	12732	end[-2] = '\n';
	12733	end[-1] = '\0';
	12734	SvCUR_set(stuff, SvCUR(stuff) - 1);
	12735	}
	12736	#endif
	12737	}
	12738	else
	12739	break;
	12740	}
	12741	s = (char*)eol;
	12742	if ((PL_rsfp \|\| PL_parser->filtered)
	12743	&& PL_parser->form_lex_state == LEX_NORMAL) {
	12744	bool got_some;
	12745	PL_bufptr = PL_bufend;
	12746	COPLINE_INC_WITH_HERELINES;
	12747	got_some = lex_next_chunk(0);
	12748	CopLINE_dec(PL_curcop);
	12749	s = PL_bufptr;
	12750	if (!got_some)
	12751	break;
	12752	}
	12753	incline(s, PL_bufend);
	12754	}
	12755	enough:
	12756	if (!SvCUR(stuff) \|\| needargs)
	12757	PL_lex_state = PL_parser->form_lex_state;
	12758	if (SvCUR(stuff)) {
	12759	PL_expect = XSTATE;
	12760	if (needargs) {
	12761	const char *s2 = s;
	12762	while (isSPACE(s2) && s2 != '\n')
	12763	s2++;
	12764	if (*s2 == '{') {
	12765	PL_expect = XTERMBLOCK;
	12766	NEXTVAL_NEXTTOKE.ival = 0;
	12767	force_next(KW_DO);
	12768	}
	12769	NEXTVAL_NEXTTOKE.ival = 0;
	12770	force_next(FORMLBRACK);
	12771	}
	12772	if (!IN_BYTES) {
	12773	if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
	12774	SvUTF8_on(stuff);
	12775	}
	12776	NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
	12777	force_next(THING);
	12778	}
	12779	else {
	12780	SvREFCNT_dec(stuff);
	12781	if (eofmt)
	12782	PL_lex_formbrack = 0;
	12783	}
	12784	return s;
	12785	}
	12786
	12787	/*
	12788	=for apidoc start_subparse
	12789
	12790	Set things up for parsing a subroutine.
	12791
	12792	If C<is_format> is non-zero, the input is to be considered a format sub
	12793	(a specialised sub used to implement perl's C<format> feature); else a
	12794	normal C<sub>.
	12795
	12796	C<flags> are added to the flags for C<PL_compcv>. C<flags> may include the
	12797	C<CVf_IsMETHOD> bit, which causes the new subroutine to be a method.
	12798
	12799	This returns the value of C<PL_savestack_ix> that was in effect upon entry to
	12800	the function;
	12801
	12802	=cut
	12803	*/
	12804
	12805	I32
	12806	Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
	12807	{
	12808	const I32 oldsavestack_ix = PL_savestack_ix;
	12809	CV* const outsidecv = PL_compcv;
	12810	bool is_method = flags & CVf_IsMETHOD;
	12811
	12812	if (is_method)
	12813	croak_kw_unless_class("method");
	12814
	12815	SAVEI32(PL_subline);
	12816	save_item(PL_subname);
	12817	SAVESPTR(PL_compcv);
	12818
	12819	PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
	12820	CvFLAGS(PL_compcv) \|= flags;
	12821
	12822	PL_subline = CopLINE(PL_curcop);
	12823	CvPADLIST(PL_compcv) = pad_new(padnew_SAVE\|padnew_SAVESUB);
	12824	CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
	12825	CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
	12826	if (outsidecv && CvPADLIST(outsidecv))
	12827	CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
	12828	if (is_method)
	12829	class_prepare_method_parse(PL_compcv);
	12830
	12831	return oldsavestack_ix;
	12832	}
	12833
	12834	/* If o represents a builtin attribute, apply it to cv and returns true.
	12835	* Otherwise does nothing and returns false
	12836	*/
	12837
	12838	STATIC bool
	12839	S_apply_builtin_cv_attribute(pTHX_ CV cv, OP o)
	12840	{
	12841	assert(o->op_type == OP_CONST);
	12842	SV *sv = cSVOPo_sv;
	12843	STRLEN len = SvCUR(sv);
	12844
	12845	/* NOTE: any CV attrs applied here need to be part of
	12846	the CVf_BUILTIN_ATTRS define in cv.h! */
	12847
	12848	if(memEQs(SvPVX(sv), len, "lvalue"))
	12849	CvLVALUE_on(cv);
	12850	else if(memEQs(SvPVX(sv), len, "method"))
	12851	CvNOWARN_AMBIGUOUS_on(cv);
	12852	else if(memEQs(SvPVX(sv), len, "const")) {
	12853	Perl_ck_warner_d(aTHX_
	12854	packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
	12855	":const is experimental"
	12856	);
	12857	CvANONCONST_on(cv);
	12858	if (!CvANON(cv))
	12859	yyerror(":const is not permitted on named subroutines");
	12860	}
	12861	else
	12862	return false;
	12863
	12864	return true;
	12865	}
	12866
	12867	/*
	12868	=for apidoc apply_builtin_cv_attributes
	12869
	12870	Given an OP_LIST containing attribute definitions, filter it for known builtin
	12871	attributes to apply to the cv, returning a possibly-smaller list containing
	12872	just the remaining ones.
	12873
	12874	=cut
	12875	*/
	12876
	12877	OP *
	12878	Perl_apply_builtin_cv_attributes(pTHX_ CV cv, OP attrlist)
	12879	{
	12880	PERL_ARGS_ASSERT_APPLY_BUILTIN_CV_ATTRIBUTES;
	12881
	12882	if(!attrlist)
	12883	return attrlist;
	12884
	12885	if(attrlist->op_type != OP_LIST) {
	12886	/* Not in fact a list but just a single attribute */
	12887	if(S_apply_builtin_cv_attribute(aTHX_ cv, attrlist)) {
	12888	op_free(attrlist);
	12889	return NULL;
	12890	}
	12891
	12892	return attrlist;
	12893	}
	12894
	12895	OP *prev = cLISTOPx(attrlist)->op_first;
	12896	assert(prev->op_type == OP_PUSHMARK);
	12897	OP *o = OpSIBLING(prev);
	12898
	12899	OP *next;
	12900	for(; o; o = next) {
	12901	next = OpSIBLING(o);
	12902
	12903	if(S_apply_builtin_cv_attribute(aTHX_ cv, o)) {
	12904	op_sibling_splice(attrlist, prev, 1, NULL);
	12905	op_free(o);
	12906	}
	12907	else {
	12908	prev = o;
	12909	}
	12910	}
	12911
	12912	if(OpHAS_SIBLING(cLISTOPx(attrlist)->op_first))
	12913	return attrlist;
	12914
	12915	/* The list is now entirely empty, we might as well discard it */
	12916	op_free(attrlist);
	12917	return NULL;
	12918	}
	12919
	12920
	12921	/* Do extra initialisation of a CV (typically one just created by
	12922	* start_subparse()) if that CV is for a named sub
	12923	*/
	12924
	12925	void
	12926	Perl_init_named_cv(pTHX_ CV cv, OP nameop)
	12927	{
	12928	PERL_ARGS_ASSERT_INIT_NAMED_CV;
	12929
	12930	if (nameop->op_type == OP_CONST) {
	12931	const char const name = SvPV_nolen_const(((SVOP)nameop)->op_sv);
	12932	if ( strEQ(name, "BEGIN")
	12933	\|\| strEQ(name, "END")
	12934	\|\| strEQ(name, "INIT")
	12935	\|\| strEQ(name, "CHECK")
	12936	\|\| strEQ(name, "UNITCHECK")
	12937	)
	12938	CvSPECIAL_on(cv);
	12939	}
	12940	else
	12941	/* State subs inside anonymous subs need to be
	12942	clonable themselves. */
	12943	if ( CvANON(CvOUTSIDE(cv))
	12944	\|\| CvCLONE(CvOUTSIDE(cv))
	12945	\|\| !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
	12946	CvOUTSIDE(cv)
	12947	))[nameop->op_targ])
	12948	)
	12949	CvCLONE_on(cv);
	12950	}
	12951
	12952
	12953	static int
	12954	S_yywarn(pTHX_ const char *const s, U32 flags)
	12955	{
	12956	PERL_ARGS_ASSERT_YYWARN;
	12957
	12958	PL_in_eval \|= EVAL_WARNONLY;
	12959	yyerror_pv(s, flags);
	12960	return 0;
	12961	}
	12962
	12963	void
	12964	Perl_abort_execution(pTHX_ SV* msg_sv, const char * const name)
	12965	{
	12966	PERL_ARGS_ASSERT_ABORT_EXECUTION;
	12967
	12968	if (msg_sv) {
	12969	if (PL_minus_c)
	12970	Perl_croak(aTHX_ "%" SVf "%s had compilation errors.\n", SVfARG(msg_sv), name);
	12971	else {
	12972	Perl_croak(aTHX_
	12973	"%" SVf "Execution of %s aborted due to compilation errors.\n", SVfARG(msg_sv), name);
	12974	}
	12975	} else {
	12976	if (PL_minus_c)
	12977	Perl_croak(aTHX_ "%s had compilation errors.\n", name);
	12978	else {
	12979	Perl_croak(aTHX_
	12980	"Execution of %s aborted due to compilation errors.\n", name);
	12981	}
	12982	}
	12983
	12984	NOT_REACHED; /* NOTREACHED */
	12985	}
	12986
	12987	void
	12988	Perl_yyquit(pTHX)
	12989	{
	12990	/* Called, after at least one error has been found, to abort the parse now,
	12991	* instead of trying to forge ahead */
	12992
	12993	yyerror_pvn(NULL, 0, 0);
	12994	}
	12995
	12996	int
	12997	Perl_yyerror(pTHX_ const char *const s)
	12998	{
	12999	PERL_ARGS_ASSERT_YYERROR;
	13000	int r = yyerror_pvn(s, strlen(s), 0);
	13001	return r;
	13002	}
	13003
	13004	int
	13005	Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
	13006	{
	13007	PERL_ARGS_ASSERT_YYERROR_PV;
	13008	int r = yyerror_pvn(s, strlen(s), flags);
	13009	return r;
	13010	}
	13011
	13012	int
	13013	Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
	13014	{
	13015	const char *context = NULL;
	13016	int contlen = -1;
	13017	SV *msg;
	13018	SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
	13019	int yychar = PL_parser->yychar;
	13020
	13021	/* Output error message 's' with length 'len'. 'flags' are SV flags that
	13022	* apply. If the number of errors found is large enough, it abandons
	13023	* parsing. If 's' is NULL, there is no message, and it abandons
	13024	* processing unconditionally */
	13025
	13026	if (s != NULL) {
	13027	if (!yychar \|\| (yychar == PERLY_SEMICOLON && !PL_rsfp))
	13028	sv_catpvs(where_sv, "at EOF");
	13029	else if ( PL_oldoldbufptr
	13030	&& PL_bufptr > PL_oldoldbufptr
	13031	&& PL_bufptr - PL_oldoldbufptr < 200
	13032	&& PL_oldoldbufptr != PL_oldbufptr
	13033	&& PL_oldbufptr != PL_bufptr)
	13034	{
	13035	while (isSPACE(*PL_oldoldbufptr))
	13036	PL_oldoldbufptr++;
	13037	context = PL_oldoldbufptr;
	13038	contlen = PL_bufptr - PL_oldoldbufptr;
	13039	}
	13040	else if ( PL_oldbufptr
	13041	&& PL_bufptr > PL_oldbufptr
	13042	&& PL_bufptr - PL_oldbufptr < 200
	13043	&& PL_oldbufptr != PL_bufptr)
	13044	{
	13045	while (isSPACE(*PL_oldbufptr))
	13046	PL_oldbufptr++;
	13047	context = PL_oldbufptr;
	13048	contlen = PL_bufptr - PL_oldbufptr;
	13049	}
	13050	else if (yychar > 255)
	13051	sv_catpvs(where_sv, "next token ???");
	13052	else if (yychar == YYEMPTY) {
	13053	if (PL_lex_state == LEX_NORMAL)
	13054	sv_catpvs(where_sv, "at end of line");
	13055	else if (PL_lex_inpat)
	13056	sv_catpvs(where_sv, "within pattern");
	13057	else
	13058	sv_catpvs(where_sv, "within string");
	13059	}
	13060	else {
	13061	sv_catpvs(where_sv, "next char ");
	13062	if (yychar < 32)
	13063	Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
	13064	else if (isPRINT_LC(yychar)) {
	13065	const char string = yychar;
	13066	sv_catpvn(where_sv, &string, 1);
	13067	}
	13068	else
	13069	Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
	13070	}
	13071	msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) \| SVs_TEMP);
	13072	Perl_sv_catpvf(aTHX_ msg, " at %s line %" LINE_Tf ", ",
	13073	OutCopFILE(PL_curcop),
	13074	(PL_parser->preambling == NOLINE
	13075	? CopLINE(PL_curcop)
	13076	: PL_parser->preambling));
	13077	if (context)
	13078	Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
	13079	UTF8fARG(UTF, contlen, context));
	13080	else
	13081	Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
	13082	if ( PL_multi_start < PL_multi_end
	13083	&& (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
	13084	{
	13085	Perl_sv_catpvf(aTHX_ msg,
	13086	" (Might be a runaway multi-line %c%c string starting on"
	13087	" line %" LINE_Tf ")\n",
	13088	(int)PL_multi_open,(int)PL_multi_close,(line_t)PL_multi_start);
	13089	PL_multi_end = 0;
	13090	}
	13091	if (PL_in_eval & EVAL_WARNONLY) {
	13092	PL_in_eval &= ~EVAL_WARNONLY;
	13093	Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
	13094	}
	13095	else {
	13096	qerror(msg);
	13097	}
	13098	}
	13099	/* if there was no message then this is a yyquit(), which is actualy handled
	13100	* by qerror() with a NULL argument */
	13101	if (s == NULL)
	13102	qerror(NULL);
	13103
	13104	PL_in_my = 0;
	13105	PL_in_my_stash = NULL;
	13106	return 0;
	13107	}
	13108
	13109	STATIC char*
	13110	S_swallow_bom(pTHX_ U8 *s)
	13111	{
	13112	const STRLEN slen = SvCUR(PL_linestr);
	13113
	13114	PERL_ARGS_ASSERT_SWALLOW_BOM;
	13115
	13116	switch (s[0]) {
	13117	case 0xFF:
	13118	if (s[1] == 0xFE) {
	13119	/* UTF-16 little-endian? (or UTF-32LE?) */
	13120	if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
	13121	/* diag_listed_as: Unsupported script encoding %s */
	13122	Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
	13123	#ifndef PERL_NO_UTF16_FILTER
	13124	#ifdef DEBUGGING
	13125	if (DEBUG_p_TEST \|\| DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
	13126	#endif
	13127	s += 2;
	13128	if (PL_bufend > (char*)s) {
	13129	s = add_utf16_textfilter(s, TRUE);
	13130	}
	13131	#else
	13132	/* diag_listed_as: Unsupported script encoding %s */
	13133	Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
	13134	#endif
	13135	}
	13136	break;
	13137	case 0xFE:
	13138	if (s[1] == 0xFF) { /* UTF-16 big-endian? */
	13139	#ifndef PERL_NO_UTF16_FILTER
	13140	#ifdef DEBUGGING
	13141	if (DEBUG_p_TEST \|\| DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
	13142	#endif
	13143	s += 2;
	13144	if (PL_bufend > (char *)s) {
	13145	s = add_utf16_textfilter(s, FALSE);
	13146	}
	13147	#else
	13148	/* diag_listed_as: Unsupported script encoding %s */
	13149	Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
	13150	#endif
	13151	}
	13152	break;
	13153	case BOM_UTF8_FIRST_BYTE: {
	13154	if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
	13155	#ifdef DEBUGGING
	13156	if (DEBUG_p_TEST \|\| DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
	13157	#endif
	13158	s += sizeof(BOM_UTF8) - 1; /* UTF-8 */
	13159	}
	13160	break;
	13161	}
	13162	case 0:
	13163	if (slen > 3) {
	13164	if (s[1] == 0) {
	13165	if (s[2] == 0xFE && s[3] == 0xFF) {
	13166	/* UTF-32 big-endian */
	13167	/* diag_listed_as: Unsupported script encoding %s */
	13168	Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
	13169	}
	13170	}
	13171	else if (s[2] == 0 && s[3] != 0) {
	13172	/* Leading bytes
	13173	* 00 xx 00 xx
	13174	* are a good indicator of UTF-16BE. */
	13175	#ifndef PERL_NO_UTF16_FILTER
	13176	#ifdef DEBUGGING
	13177	if (DEBUG_p_TEST \|\| DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
	13178	#endif
	13179	s = add_utf16_textfilter(s, FALSE);
	13180	#else
	13181	/* diag_listed_as: Unsupported script encoding %s */
	13182	Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
	13183	#endif
	13184	}
	13185	}
	13186	break;
	13187
	13188	default:
	13189	if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
	13190	/* Leading bytes
	13191	* xx 00 xx 00
	13192	* are a good indicator of UTF-16LE. */
	13193	#ifndef PERL_NO_UTF16_FILTER
	13194	#ifdef DEBUGGING
	13195	if (DEBUG_p_TEST \|\| DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
	13196	#endif
	13197	s = add_utf16_textfilter(s, TRUE);
	13198	#else
	13199	/* diag_listed_as: Unsupported script encoding %s */
	13200	Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
	13201	#endif
	13202	}
	13203	}
	13204	return (char*)s;
	13205	}
	13206
	13207
	13208	#ifndef PERL_NO_UTF16_FILTER
	13209	static I32
	13210	S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
	13211	{
	13212	SV *const filter = FILTER_DATA(idx);
	13213	/* We re-use this each time round, throwing the contents away before we
	13214	return. */
	13215	SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
	13216	SV *const utf8_buffer = filter;
	13217	IV status = IoPAGE(filter);
	13218	const bool reverse = cBOOL(IoLINES(filter));
	13219	I32 retval;
	13220
	13221	PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
	13222
	13223	/* As we're automatically added, at the lowest level, and hence only called
	13224	from this file, we can be sure that we're not called in block mode. Hence
	13225	don't bother writing code to deal with block mode. */
	13226	if (maxlen) {
	13227	Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
	13228	}
	13229	if (status < 0) {
	13230	Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
	13231	}
	13232	DEBUG_P(PerlIO_printf(Perl_debug_log,
	13233	"utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
	13234	FPTR2DPTR(void *, S_utf16_textfilter),
	13235	reverse ? 'l' : 'b', idx, maxlen, status,
	13236	(UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
	13237
	13238	while (1) {
	13239	STRLEN chars;
	13240	STRLEN have;
	13241	Size_t newlen;
	13242	U8 *end;
	13243	/* First, look in our buffer of existing UTF-8 data: */
	13244	char nl = (char )memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
	13245
	13246	if (nl) {
	13247	++nl;
	13248	} else if (status == 0) {
	13249	/* EOF */
	13250	IoPAGE(filter) = 0;
	13251	nl = SvEND(utf8_buffer);
	13252	}
	13253	if (nl) {
	13254	STRLEN got = nl - SvPVX(utf8_buffer);
	13255	/* Did we have anything to append? */
	13256	retval = got != 0;
	13257	sv_catpvn(sv, SvPVX(utf8_buffer), got);
	13258	/* Everything else in this code works just fine if SVp_POK isn't
	13259	set. This, however, needs it, and we need it to work, else
	13260	we loop infinitely because the buffer is never consumed. */
	13261	sv_chop(utf8_buffer, nl);
	13262	break;
	13263	}
	13264
	13265	/* OK, not a complete line there, so need to read some more UTF-16.
	13266	Read an extra octect if the buffer currently has an odd number. */
	13267	while (1) {
	13268	if (status <= 0)
	13269	break;
	13270	if (SvCUR(utf16_buffer) >= 2) {
	13271	/* Location of the high octet of the last complete code point.
	13272	Gosh, UTF-16 is a pain. All the benefits of variable length,
	13273	coupled with all the benefits of partial reads and
	13274	endianness. */
	13275	const U8 const last_hi = (U8)SvPVX(utf16_buffer)
	13276	+ ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
	13277
	13278	if (last_hi < 0xd8 \|\| last_hi > 0xdb) {
	13279	break;
	13280	}
	13281
	13282	/* We have the first half of a surrogate. Read more. */
	13283	DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
	13284	}
	13285
	13286	status = FILTER_READ(idx + 1, utf16_buffer,
	13287	160 + (SvCUR(utf16_buffer) & 1));
	13288	DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
	13289	DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
	13290	if (status < 0) {
	13291	/* Error */
	13292	IoPAGE(filter) = status;
	13293	return status;
	13294	}
	13295	}
	13296
	13297	/* 'chars' isn't quite the right name, as code points above 0xFFFF
	13298	* require 4 bytes per char */
	13299	chars = SvCUR(utf16_buffer) >> 1;
	13300	have = SvCUR(utf8_buffer);
	13301
	13302	/* Assume the worst case size as noted by the functions: twice the
	13303	* number of input bytes */
	13304	SvGROW(utf8_buffer, have + chars * 4 + 1);
	13305
	13306	if (reverse) {
	13307	end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
	13308	(U8*)SvPVX_const(utf8_buffer) + have,
	13309	chars * 2, &newlen);
	13310	} else {
	13311	end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
	13312	(U8*)SvPVX_const(utf8_buffer) + have,
	13313	chars * 2, &newlen);
	13314	}
	13315	SvCUR_set(utf8_buffer, have + newlen);
	13316	*end = '\0';
	13317
	13318	/* No need to keep this SV "well-formed" with a '\0' after the end, as
	13319	it's private to us, and utf16_to_utf8{,reversed} take a
	13320	(pointer,length) pair, rather than a NUL-terminated string. */
	13321	if(SvCUR(utf16_buffer) & 1) {
	13322	*SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
	13323	SvCUR_set(utf16_buffer, 1);
	13324	} else {
	13325	SvCUR_set(utf16_buffer, 0);
	13326	}
	13327	}
	13328	DEBUG_P(PerlIO_printf(Perl_debug_log,
	13329	"utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
	13330	status,
	13331	(UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
	13332	DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
	13333	return retval;
	13334	}
	13335
	13336	static U8 *
	13337	S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
	13338	{
	13339	SV *filter = filter_add(S_utf16_textfilter, NULL);
	13340
	13341	PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
	13342
	13343	IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char )s, PL_bufend - (char)s));
	13344	SvPVCLEAR(filter);
	13345	IoLINES(filter) = reversed;
	13346	IoPAGE(filter) = 1; /* Not EOF */
	13347
	13348	/* Sadly, we have to return a valid pointer, come what may, so we have to
	13349	ignore any error return from this. */
	13350	SvCUR_set(PL_linestr, 0);
	13351	if (FILTER_READ(0, PL_linestr, 0)) {
	13352	SvUTF8_on(PL_linestr);
	13353	} else {
	13354	SvUTF8_on(PL_linestr);
	13355	}
	13356	PL_bufend = SvEND(PL_linestr);
	13357	return (U8*)SvPVX(PL_linestr);
	13358	}
	13359	#endif
	13360
	13361	/*
	13362	=for apidoc scan_vstring
	13363
	13364	Returns a pointer to the next character after the parsed
	13365	vstring, as well as updating the passed in sv.
	13366
	13367	Function must be called like
	13368
	13369	sv = sv_2mortal(newSV(5));
	13370	s = scan_vstring(s,e,sv);
	13371
	13372	where s and e are the start and end of the string.
	13373	The sv should already be large enough to store the vstring
	13374	passed in, for performance reasons.
	13375
	13376	This function may croak if fatal warnings are enabled in the
	13377	calling scope, hence the sv_2mortal in the example (to prevent
	13378	a leak). Make sure to do SvREFCNT_inc afterwards if you use
	13379	sv_2mortal.
	13380
	13381	=cut
	13382	*/
	13383
	13384	char *
	13385	Perl_scan_vstring(pTHX_ const char s, const char const e, SV *sv)
	13386	{
	13387	const char *pos = s;
	13388	const char *start = s;
	13389
	13390	PERL_ARGS_ASSERT_SCAN_VSTRING;
	13391
	13392	if (pos == 'v') pos++; / get past 'v' */
	13393	while (pos < e && (isDIGIT(pos) \|\| pos == '_'))
	13394	pos++;
	13395	if ( *pos != '.') {
	13396	/* this may not be a v-string if followed by => */
	13397	const char *next = pos;
	13398	while (next < e && isSPACE(*next))
	13399	++next;
	13400	if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
	13401	/* return string not v-string */
	13402	sv_setpvn(sv,(char *)s,pos-s);
	13403	return (char *)pos;
	13404	}
	13405	}
	13406
	13407	if (!isALPHA(*pos)) {
	13408	U8 tmpbuf[UTF8_MAXBYTES+1];
	13409
	13410	if (*s == 'v')
	13411	s++; /* get past 'v' */
	13412
	13413	SvPVCLEAR(sv);
	13414
	13415	for (;;) {
	13416	/* this is atoi() that tolerates underscores */
	13417	U8 *tmpend;
	13418	UV rev = 0;
	13419	const char *end = pos;
	13420	UV mult = 1;
	13421	while (--end >= s) {
	13422	if (*end != '_') {
	13423	const UV orev = rev;
	13424	rev += (end - '0') mult;
	13425	mult *= 10;
	13426	if (orev > rev)
	13427	/* diag_listed_as: Integer overflow in %s number */
	13428	Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
	13429	"Integer overflow in decimal number");
	13430	}
	13431	}
	13432
	13433	/* Append native character for the rev point */
	13434	tmpend = uvchr_to_utf8(tmpbuf, rev);
	13435	sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
	13436	if (!UVCHR_IS_INVARIANT(rev))
	13437	SvUTF8_on(sv);
	13438	if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
	13439	s = ++pos;
	13440	else {
	13441	s = pos;
	13442	break;
	13443	}
	13444	while (pos < e && (isDIGIT(pos) \|\| pos == '_'))
	13445	pos++;
	13446	}
	13447	SvPOK_on(sv);
	13448	sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
	13449	SvRMAGICAL_on(sv);
	13450	}
	13451	return (char *)s;
	13452	}
	13453
	13454	int
	13455	Perl_keyword_plugin_standard(pTHX_
	13456	char keyword_ptr, STRLEN keyword_len, OP *op_ptr)
	13457	{
	13458	PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
	13459	PERL_UNUSED_CONTEXT;
	13460	PERL_UNUSED_ARG(keyword_ptr);
	13461	PERL_UNUSED_ARG(keyword_len);
	13462	PERL_UNUSED_ARG(op_ptr);
	13463	return KEYWORD_PLUGIN_DECLINE;
	13464	}
	13465
	13466	STRLEN
	13467	Perl_infix_plugin_standard(pTHX_
	13468	char operator_ptr, STRLEN operator_len, struct Perl_custom_infix *def)
	13469	{
	13470	PERL_ARGS_ASSERT_INFIX_PLUGIN_STANDARD;
	13471	PERL_UNUSED_CONTEXT;
	13472	PERL_UNUSED_ARG(operator_ptr);
	13473	PERL_UNUSED_ARG(operator_len);
	13474	PERL_UNUSED_ARG(def);
	13475	return 0;
	13476	}
	13477
	13478	/*
	13479	=for apidoc_section $lexer
	13480	=for apidoc wrap_keyword_plugin
	13481
	13482	Puts a C function into the chain of keyword plugins. This is the
	13483	preferred way to manipulate the L</PL_keyword_plugin> variable.
	13484	C<new_plugin> is a pointer to the C function that is to be added to the
	13485	keyword plugin chain, and C<old_plugin_p> points to the storage location
	13486	where a pointer to the next function in the chain will be stored. The
	13487	value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
	13488	while the value previously stored there is written to C<*old_plugin_p>.
	13489
	13490	L</PL_keyword_plugin> is global to an entire process, and a module wishing
	13491	to hook keyword parsing may find itself invoked more than once per
	13492	process, typically in different threads. To handle that situation, this
	13493	function is idempotent. The location C<*old_plugin_p> must initially
	13494	(once per process) contain a null pointer. A C variable of static
	13495	duration (declared at file scope, typically also marked C<static> to give
	13496	it internal linkage) will be implicitly initialised appropriately, if it
	13497	does not have an explicit initialiser. This function will only actually
	13498	modify the plugin chain if it finds C<*old_plugin_p> to be null. This
	13499	function is also thread safe on the small scale. It uses appropriate
	13500	locking to avoid race conditions in accessing L</PL_keyword_plugin>.
	13501
	13502	When this function is called, the function referenced by C<new_plugin>
	13503	must be ready to be called, except for C<*old_plugin_p> being unfilled.
	13504	In a threading situation, C<new_plugin> may be called immediately, even
	13505	before this function has returned. C<*old_plugin_p> will always be
	13506	appropriately set before C<new_plugin> is called. If C<new_plugin>
	13507	decides not to do anything special with the identifier that it is given
	13508	(which is the usual case for most calls to a keyword plugin), it must
	13509	chain the plugin function referenced by C<*old_plugin_p>.
	13510
	13511	Taken all together, XS code to install a keyword plugin should typically
	13512	look something like this:
	13513
	13514	static Perl_keyword_plugin_t next_keyword_plugin;
	13515	static OP *my_keyword_plugin(pTHX_
	13516	char keyword_ptr, STRLEN keyword_len, OP *op_ptr)
	13517	{
	13518	if (memEQs(keyword_ptr, keyword_len,
	13519	"my_new_keyword")) {
	13520	...
	13521	} else {
	13522	return next_keyword_plugin(aTHX_
	13523	keyword_ptr, keyword_len, op_ptr);
	13524	}
	13525	}
	13526	BOOT:
	13527	wrap_keyword_plugin(my_keyword_plugin,
	13528	&next_keyword_plugin);
	13529
	13530	Direct access to L</PL_keyword_plugin> should be avoided.
	13531
	13532	=cut
	13533	*/
	13534
	13535	void
	13536	Perl_wrap_keyword_plugin(pTHX_
	13537	Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
	13538	{
	13539
	13540	PERL_UNUSED_CONTEXT;
	13541	PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
	13542	if (*old_plugin_p) return;
	13543	KEYWORD_PLUGIN_MUTEX_LOCK;
	13544	if (!*old_plugin_p) {
	13545	*old_plugin_p = PL_keyword_plugin;
	13546	PL_keyword_plugin = new_plugin;
	13547	}
	13548	KEYWORD_PLUGIN_MUTEX_UNLOCK;
	13549	}
	13550
	13551	/*
	13552	=for apidoc wrap_infix_plugin
	13553
	13554	B<NOTE:> This API exists entirely for the purpose of making the CPAN module
	13555	C<XS::Parse::Infix> work. It is not expected that additional modules will make
	13556	use of it; rather, that they should use C<XS::Parse::Infix> to provide parsing
	13557	of new infix operators.
	13558
	13559	Puts a C function into the chain of infix plugins. This is the preferred
	13560	way to manipulate the L</PL_infix_plugin> variable. C<new_plugin> is a
	13561	pointer to the C function that is to be added to the infix plugin chain, and
	13562	C<old_plugin_p> points to a storage location where a pointer to the next
	13563	function in the chain will be stored. The value of C<new_plugin> is written
	13564	into the L</PL_infix_plugin> variable, while the value previously stored there
	13565	is written to C<*old_plugin_p>.
	13566
	13567	Direct access to L</PL_infix_plugin> should be avoided.
	13568
	13569	=cut
	13570	*/
	13571
	13572	void
	13573	Perl_wrap_infix_plugin(pTHX_
	13574	Perl_infix_plugin_t new_plugin, Perl_infix_plugin_t *old_plugin_p)
	13575	{
	13576
	13577	PERL_UNUSED_CONTEXT;
	13578	PERL_ARGS_ASSERT_WRAP_INFIX_PLUGIN;
	13579	if (*old_plugin_p) return;
	13580	/* We use the same mutex as for PL_keyword_plugin as it's so rare either
	13581	* of them is actually updated; no need for a dedicated one each */
	13582	KEYWORD_PLUGIN_MUTEX_LOCK;
	13583	if (!*old_plugin_p) {
	13584	*old_plugin_p = PL_infix_plugin;
	13585	PL_infix_plugin = new_plugin;
	13586	}
	13587	KEYWORD_PLUGIN_MUTEX_UNLOCK;
	13588	}
	13589
	13590	#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
	13591	static void
	13592	S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
	13593	{
	13594	SAVEI32(PL_lex_brackets);
	13595	if (PL_lex_brackets > 100)
	13596	Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
	13597	PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
	13598	SAVEI32(PL_lex_allbrackets);
	13599	PL_lex_allbrackets = 0;
	13600	SAVEI8(PL_lex_fakeeof);
	13601	PL_lex_fakeeof = (U8)fakeeof;
	13602	if(yyparse(gramtype) && !PL_parser->error_count)
	13603	qerror(Perl_mess(aTHX_ "Parse error"));
	13604	}
	13605
	13606	#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
	13607	static OP *
	13608	S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
	13609	{
	13610	OP *o;
	13611	ENTER;
	13612	SAVEVPTR(PL_eval_root);
	13613	PL_eval_root = NULL;
	13614	parse_recdescent(gramtype, fakeeof);
	13615	o = PL_eval_root;
	13616	LEAVE;
	13617	return o;
	13618	}
	13619
	13620	#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
	13621	static OP *
	13622	S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
	13623	{
	13624	OP *exprop;
	13625	if (flags & ~PARSE_OPTIONAL)
	13626	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
	13627	exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
	13628	if (!exprop && !(flags & PARSE_OPTIONAL)) {
	13629	if (!PL_parser->error_count)
	13630	qerror(Perl_mess(aTHX_ "Parse error"));
	13631	exprop = newOP(OP_NULL, 0);
	13632	}
	13633	return exprop;
	13634	}
	13635
	13636	/*
	13637	=for apidoc parse_arithexpr
	13638
	13639	Parse a Perl arithmetic expression. This may contain operators of precedence
	13640	down to the bit shift operators. The expression must be followed (and thus
	13641	terminated) either by a comparison or lower-precedence operator or by
	13642	something that would normally terminate an expression such as semicolon.
	13643	If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
	13644	otherwise it is mandatory. It is up to the caller to ensure that the
	13645	dynamic parser state (L</PL_parser> et al) is correctly set to reflect
	13646	the source of the code to be parsed and the lexical context for the
	13647	expression.
	13648
	13649	The op tree representing the expression is returned. If an optional
	13650	expression is absent, a null pointer is returned, otherwise the pointer
	13651	will be non-null.
	13652
	13653	If an error occurs in parsing or compilation, in most cases a valid op
	13654	tree is returned anyway. The error is reflected in the parser state,
	13655	normally resulting in a single exception at the top level of parsing
	13656	which covers all the compilation errors that occurred. Some compilation
	13657	errors, however, will throw an exception immediately.
	13658
	13659	=for apidoc Amnh\|\|PARSE_OPTIONAL
	13660
	13661	=cut
	13662
	13663	*/
	13664
	13665	OP *
	13666	Perl_parse_arithexpr(pTHX_ U32 flags)
	13667	{
	13668	return parse_expr(LEX_FAKEEOF_COMPARE, flags);
	13669	}
	13670
	13671	/*
	13672	=for apidoc parse_termexpr
	13673
	13674	Parse a Perl term expression. This may contain operators of precedence
	13675	down to the assignment operators. The expression must be followed (and thus
	13676	terminated) either by a comma or lower-precedence operator or by
	13677	something that would normally terminate an expression such as semicolon.
	13678	If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
	13679	otherwise it is mandatory. It is up to the caller to ensure that the
	13680	dynamic parser state (L</PL_parser> et al) is correctly set to reflect
	13681	the source of the code to be parsed and the lexical context for the
	13682	expression.
	13683
	13684	The op tree representing the expression is returned. If an optional
	13685	expression is absent, a null pointer is returned, otherwise the pointer
	13686	will be non-null.
	13687
	13688	If an error occurs in parsing or compilation, in most cases a valid op
	13689	tree is returned anyway. The error is reflected in the parser state,
	13690	normally resulting in a single exception at the top level of parsing
	13691	which covers all the compilation errors that occurred. Some compilation
	13692	errors, however, will throw an exception immediately.
	13693
	13694	=cut
	13695	*/
	13696
	13697	OP *
	13698	Perl_parse_termexpr(pTHX_ U32 flags)
	13699	{
	13700	return parse_expr(LEX_FAKEEOF_COMMA, flags);
	13701	}
	13702
	13703	/*
	13704	=for apidoc parse_listexpr
	13705
	13706	Parse a Perl list expression. This may contain operators of precedence
	13707	down to the comma operator. The expression must be followed (and thus
	13708	terminated) either by a low-precedence logic operator such as C<or> or by
	13709	something that would normally terminate an expression such as semicolon.
	13710	If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
	13711	otherwise it is mandatory. It is up to the caller to ensure that the
	13712	dynamic parser state (L</PL_parser> et al) is correctly set to reflect
	13713	the source of the code to be parsed and the lexical context for the
	13714	expression.
	13715
	13716	The op tree representing the expression is returned. If an optional
	13717	expression is absent, a null pointer is returned, otherwise the pointer
	13718	will be non-null.
	13719
	13720	If an error occurs in parsing or compilation, in most cases a valid op
	13721	tree is returned anyway. The error is reflected in the parser state,
	13722	normally resulting in a single exception at the top level of parsing
	13723	which covers all the compilation errors that occurred. Some compilation
	13724	errors, however, will throw an exception immediately.
	13725
	13726	=cut
	13727	*/
	13728
	13729	OP *
	13730	Perl_parse_listexpr(pTHX_ U32 flags)
	13731	{
	13732	return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
	13733	}
	13734
	13735	/*
	13736	=for apidoc parse_fullexpr
	13737
	13738	Parse a single complete Perl expression. This allows the full
	13739	expression grammar, including the lowest-precedence operators such
	13740	as C<or>. The expression must be followed (and thus terminated) by a
	13741	token that an expression would normally be terminated by: end-of-file,
	13742	closing bracketing punctuation, semicolon, or one of the keywords that
	13743	signals a postfix expression-statement modifier. If C<flags> has the
	13744	C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
	13745	mandatory. It is up to the caller to ensure that the dynamic parser
	13746	state (L</PL_parser> et al) is correctly set to reflect the source of
	13747	the code to be parsed and the lexical context for the expression.
	13748
	13749	The op tree representing the expression is returned. If an optional
	13750	expression is absent, a null pointer is returned, otherwise the pointer
	13751	will be non-null.
	13752
	13753	If an error occurs in parsing or compilation, in most cases a valid op
	13754	tree is returned anyway. The error is reflected in the parser state,
	13755	normally resulting in a single exception at the top level of parsing
	13756	which covers all the compilation errors that occurred. Some compilation
	13757	errors, however, will throw an exception immediately.
	13758
	13759	=cut
	13760	*/
	13761
	13762	OP *
	13763	Perl_parse_fullexpr(pTHX_ U32 flags)
	13764	{
	13765	return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
	13766	}
	13767
	13768	/*
	13769	=for apidoc parse_block
	13770
	13771	Parse a single complete Perl code block. This consists of an opening
	13772	brace, a sequence of statements, and a closing brace. The block
	13773	constitutes a lexical scope, so C<my> variables and various compile-time
	13774	effects can be contained within it. It is up to the caller to ensure
	13775	that the dynamic parser state (L</PL_parser> et al) is correctly set to
	13776	reflect the source of the code to be parsed and the lexical context for
	13777	the statement.
	13778
	13779	The op tree representing the code block is returned. This is always a
	13780	real op, never a null pointer. It will normally be a C<lineseq> list,
	13781	including C<nextstate> or equivalent ops. No ops to construct any kind
	13782	of runtime scope are included by virtue of it being a block.
	13783
	13784	If an error occurs in parsing or compilation, in most cases a valid op
	13785	tree (most likely null) is returned anyway. The error is reflected in
	13786	the parser state, normally resulting in a single exception at the top
	13787	level of parsing which covers all the compilation errors that occurred.
	13788	Some compilation errors, however, will throw an exception immediately.
	13789
	13790	The C<flags> parameter is reserved for future use, and must always
	13791	be zero.
	13792
	13793	=cut
	13794	*/
	13795
	13796	OP *
	13797	Perl_parse_block(pTHX_ U32 flags)
	13798	{
	13799	if (flags)
	13800	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
	13801	return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
	13802	}
	13803
	13804	/*
	13805	=for apidoc parse_barestmt
	13806
	13807	Parse a single unadorned Perl statement. This may be a normal imperative
	13808	statement or a declaration that has compile-time effect. It does not
	13809	include any label or other affixture. It is up to the caller to ensure
	13810	that the dynamic parser state (L</PL_parser> et al) is correctly set to
	13811	reflect the source of the code to be parsed and the lexical context for
	13812	the statement.
	13813
	13814	The op tree representing the statement is returned. This may be a
	13815	null pointer if the statement is null, for example if it was actually
	13816	a subroutine definition (which has compile-time side effects). If not
	13817	null, it will be ops directly implementing the statement, suitable to
	13818	pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
	13819	equivalent op (except for those embedded in a scope contained entirely
	13820	within the statement).
	13821
	13822	If an error occurs in parsing or compilation, in most cases a valid op
	13823	tree (most likely null) is returned anyway. The error is reflected in
	13824	the parser state, normally resulting in a single exception at the top
	13825	level of parsing which covers all the compilation errors that occurred.
	13826	Some compilation errors, however, will throw an exception immediately.
	13827
	13828	The C<flags> parameter is reserved for future use, and must always
	13829	be zero.
	13830
	13831	=cut
	13832	*/
	13833
	13834	OP *
	13835	Perl_parse_barestmt(pTHX_ U32 flags)
	13836	{
	13837	if (flags)
	13838	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
	13839	return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
	13840	}
	13841
	13842	/*
	13843	=for apidoc parse_label
	13844
	13845	Parse a single label, possibly optional, of the type that may prefix a
	13846	Perl statement. It is up to the caller to ensure that the dynamic parser
	13847	state (L</PL_parser> et al) is correctly set to reflect the source of
	13848	the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
	13849	label is optional, otherwise it is mandatory.
	13850
	13851	The name of the label is returned in the form of a fresh scalar. If an
	13852	optional label is absent, a null pointer is returned.
	13853
	13854	If an error occurs in parsing, which can only occur if the label is
	13855	mandatory, a valid label is returned anyway. The error is reflected in
	13856	the parser state, normally resulting in a single exception at the top
	13857	level of parsing which covers all the compilation errors that occurred.
	13858
	13859	=cut
	13860	*/
	13861
	13862	SV *
	13863	Perl_parse_label(pTHX_ U32 flags)
	13864	{
	13865	if (flags & ~PARSE_OPTIONAL)
	13866	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
	13867	if (PL_nexttoke) {
	13868	PL_parser->yychar = yylex();
	13869	if (PL_parser->yychar == LABEL) {
	13870	SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
	13871	PL_parser->yychar = YYEMPTY;
	13872	cSVOPx(pl_yylval.opval)->op_sv = NULL;
	13873	op_free(pl_yylval.opval);
	13874	return labelsv;
	13875	} else {
	13876	yyunlex();
	13877	goto no_label;
	13878	}
	13879	} else {
	13880	char s, t;
	13881	STRLEN wlen, bufptr_pos;
	13882	lex_read_space(0);
	13883	t = s = PL_bufptr;
	13884	if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
	13885	goto no_label;
	13886	t = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen, FALSE);
	13887	if (word_takes_any_delimiter(s, wlen))
	13888	goto no_label;
	13889	bufptr_pos = s - SvPVX(PL_linestr);
	13890	PL_bufptr = t;
	13891	lex_read_space(LEX_KEEP_PREVIOUS);
	13892	t = PL_bufptr;
	13893	s = SvPVX(PL_linestr) + bufptr_pos;
	13894	if (t[0] == ':' && t[1] != ':') {
	13895	PL_oldoldbufptr = PL_oldbufptr;
	13896	PL_oldbufptr = s;
	13897	PL_bufptr = t+1;
	13898	return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
	13899	} else {
	13900	PL_bufptr = s;
	13901	no_label:
	13902	if (flags & PARSE_OPTIONAL) {
	13903	return NULL;
	13904	} else {
	13905	qerror(Perl_mess(aTHX_ "Parse error"));
	13906	return newSVpvs("x");
	13907	}
	13908	}
	13909	}
	13910	}
	13911
	13912	/*
	13913	=for apidoc parse_fullstmt
	13914
	13915	Parse a single complete Perl statement. This may be a normal imperative
	13916	statement or a declaration that has compile-time effect, and may include
	13917	optional labels. It is up to the caller to ensure that the dynamic
	13918	parser state (L</PL_parser> et al) is correctly set to reflect the source
	13919	of the code to be parsed and the lexical context for the statement.
	13920
	13921	The op tree representing the statement is returned. This may be a
	13922	null pointer if the statement is null, for example if it was actually
	13923	a subroutine definition (which has compile-time side effects). If not
	13924	null, it will be the result of a L</newSTATEOP> call, normally including
	13925	a C<nextstate> or equivalent op.
	13926
	13927	If an error occurs in parsing or compilation, in most cases a valid op
	13928	tree (most likely null) is returned anyway. The error is reflected in
	13929	the parser state, normally resulting in a single exception at the top
	13930	level of parsing which covers all the compilation errors that occurred.
	13931	Some compilation errors, however, will throw an exception immediately.
	13932
	13933	The C<flags> parameter is reserved for future use, and must always
	13934	be zero.
	13935
	13936	=cut
	13937	*/
	13938
	13939	OP *
	13940	Perl_parse_fullstmt(pTHX_ U32 flags)
	13941	{
	13942	if (flags)
	13943	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
	13944	return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
	13945	}
	13946
	13947	/*
	13948	=for apidoc parse_stmtseq
	13949
	13950	Parse a sequence of zero or more Perl statements. These may be normal
	13951	imperative statements, including optional labels, or declarations
	13952	that have compile-time effect, or any mixture thereof. The statement
	13953	sequence ends when a closing brace or end-of-file is encountered in a
	13954	place where a new statement could have validly started. It is up to
	13955	the caller to ensure that the dynamic parser state (L</PL_parser> et al)
	13956	is correctly set to reflect the source of the code to be parsed and the
	13957	lexical context for the statements.
	13958
	13959	The op tree representing the statement sequence is returned. This may
	13960	be a null pointer if the statements were all null, for example if there
	13961	were no statements or if there were only subroutine definitions (which
	13962	have compile-time side effects). If not null, it will be a C<lineseq>
	13963	list, normally including C<nextstate> or equivalent ops.
	13964
	13965	If an error occurs in parsing or compilation, in most cases a valid op
	13966	tree is returned anyway. The error is reflected in the parser state,
	13967	normally resulting in a single exception at the top level of parsing
	13968	which covers all the compilation errors that occurred. Some compilation
	13969	errors, however, will throw an exception immediately.
	13970
	13971	The C<flags> parameter is reserved for future use, and must always
	13972	be zero.
	13973
	13974	=cut
	13975	*/
	13976
	13977	OP *
	13978	Perl_parse_stmtseq(pTHX_ U32 flags)
	13979	{
	13980	OP *stmtseqop;
	13981	I32 c;
	13982	if (flags)
	13983	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
	13984	stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
	13985	c = lex_peek_unichar(0);
	13986	if (c != -1 && c != /{/'}')
	13987	qerror(Perl_mess(aTHX_ "Parse error"));
	13988	return stmtseqop;
	13989	}
	13990
	13991	/*
	13992	=for apidoc parse_subsignature
	13993
	13994	Parse a subroutine signature declaration. This is the contents of the
	13995	parentheses following a named or anonymous subroutine declaration when the
	13996	C<signatures> feature is enabled. Note that this function neither expects
	13997	nor consumes the opening and closing parentheses around the signature; it
	13998	is the caller's job to handle these.
	13999
	14000	This function must only be called during parsing of a subroutine; after
	14001	L</start_subparse> has been called. It might allocate lexical variables on
	14002	the pad for the current subroutine.
	14003
	14004	The op tree to unpack the arguments from the stack at runtime is returned.
	14005	This op tree should appear at the beginning of the compiled function. The
	14006	caller may wish to use L</op_append_list> to build their function body
	14007	after it, or splice it together with the body before calling L</newATTRSUB>.
	14008
	14009	The C<flags> parameter is reserved for future use, and must always
	14010	be zero.
	14011
	14012	=cut
	14013	*/
	14014
	14015	OP *
	14016	Perl_parse_subsignature(pTHX_ U32 flags)
	14017	{
	14018	if (flags)
	14019	Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
	14020	return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
	14021	}
	14022
	14023	/*
	14024	* ex: set ts=8 sts=4 sw=4 et:
	14025	*/