toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26 This is the lower layer of the Perl parser, managing characters and tokens.
  27
  28 =for apidoc AmnU|yy_parser *|PL_parser
  29
  30 Pointer to a structure encapsulating the state of the parsing operation
  31 currently in progress.  The pointer can be locally changed to perform
  32 a nested parse without interfering with the state of an outer parse.
  33 Individual members of C<PL_parser> have their own documentation.
  34
  35 =cut
  36 */
  37
  38 #include "EXTERN.h"
  39 #define PERL_IN_TOKE_C
  40 #include "perl.h"
  41 #include "invlist_inline.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g, h)  \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* XXX temporary backwards compatibility */
  49 #define PL_lex_brackets         (PL_parser->lex_brackets)
  50 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  51 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  52 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  53 #define PL_lex_casemods         (PL_parser->lex_casemods)
  54 #define PL_lex_casestack        (PL_parser->lex_casestack)
  55 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  56 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  57 #define PL_lex_inpat            (PL_parser->lex_inpat)
  58 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  59 #define PL_lex_op               (PL_parser->lex_op)
  60 #define PL_lex_repl             (PL_parser->lex_repl)
  61 #define PL_lex_starts           (PL_parser->lex_starts)
  62 #define PL_lex_stuff            (PL_parser->lex_stuff)
  63 #define PL_multi_start          (PL_parser->multi_start)
  64 #define PL_multi_open           (PL_parser->multi_open)
  65 #define PL_multi_close          (PL_parser->multi_close)
  66 #define PL_preambled            (PL_parser->preambled)
  67 #define PL_linestr              (PL_parser->linestr)
  68 #define PL_expect               (PL_parser->expect)
  69 #define PL_copline              (PL_parser->copline)
  70 #define PL_bufptr               (PL_parser->bufptr)
  71 #define PL_oldbufptr            (PL_parser->oldbufptr)
  72 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  73 #define PL_linestart            (PL_parser->linestart)
  74 #define PL_bufend               (PL_parser->bufend)
  75 #define PL_last_uni             (PL_parser->last_uni)
  76 #define PL_last_lop             (PL_parser->last_lop)
  77 #define PL_last_lop_op          (PL_parser->last_lop_op)
  78 #define PL_lex_state            (PL_parser->lex_state)
  79 #define PL_rsfp                 (PL_parser->rsfp)
  80 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  81 #define PL_in_my                (PL_parser->in_my)
  82 #define PL_in_my_stash          (PL_parser->in_my_stash)
  83 #define PL_tokenbuf             (PL_parser->tokenbuf)
  84 #define PL_multi_end            (PL_parser->multi_end)
  85 #define PL_error_count          (PL_parser->error_count)
  86
  87 #  define PL_nexttoke           (PL_parser->nexttoke)
  88 #  define PL_nexttype           (PL_parser->nexttype)
  89 #  define PL_nextval            (PL_parser->nextval)
  90
  91
  92 #define SvEVALED(sv) \
  93     (SvTYPE(sv) >= SVt_PVNV \
  94     && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
  95
  96 static const char ident_too_long[] = "Identifier too long";
  97 static const char ident_var_zero_multi_digit[] = "Numeric variables with more than one digit may not start with '0'";
  98
  99 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 100
 101 #define XENUMMASK  0x3f
 102 #define XFAKEEOF   0x40
 103 #define XFAKEBRACK 0x80
 104
 105 #ifdef USE_UTF8_SCRIPTS
 106 #   define UTF cBOOL(!IN_BYTES)
 107 #else
 108 #   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 109 #endif
 110
 111 /* The maximum number of characters preceding the unrecognized one to display */
 112 #define UNRECOGNIZED_PRECEDE_COUNT 10
 113
 114 /* In variables named $^X, these are the legal values for X.
 115  * 1999-02-27 mjd-perl-patch@plover.com */
 116 #define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x)))
 117
 118 #define SPACE_OR_TAB(c) isBLANK_A(c)
 119
 120 #define HEXFP_PEEK(s)     \
 121     (((s[0] == '.') && \
 122       (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
 123      isALPHA_FOLD_EQ(s[0], 'p'))
 124
 125 /* LEX_* are values for PL_lex_state, the state of the lexer.
 126  * They are arranged oddly so that the guard on the switch statement
 127  * can get by with a single comparison (if the compiler is smart enough).
 128  *
 129  * These values refer to the various states within a sublex parse,
 130  * i.e. within a double quotish string
 131  */
 132
 133 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 134
 135 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 136 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 137 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 138 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 139 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 140
 141                                    /* at end of code, eg "$x" followed by:  */
 142 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 143 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 144
 145 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 146                                         string or after \E, $foo, etc       */
 147 #define LEX_INTERPCONST          2 /* NOT USED */
 148 #define LEX_FORMLINE             1 /* expecting a format line               */
 149
 150 /* returned to yyl_try() to request it to retry the parse loop, expected to only
 151    be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof()
 152    can also return it.
 153
 154    yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1,
 155    other token values are 258 or higher (see perly.h), so -1 should be
 156    a safe value here.
 157 */
 158 #define YYL_RETRY (-1)
 159
 160 #ifdef DEBUGGING
 161 static const char* const lex_state_names[] = {
 162     "KNOWNEXT",
 163     "FORMLINE",
 164     "INTERPCONST",
 165     "INTERPCONCAT",
 166     "INTERPENDMAYBE",
 167     "INTERPEND",
 168     "INTERPSTART",
 169     "INTERPPUSH",
 170     "INTERPCASEMOD",
 171     "INTERPNORMAL",
 172     "NORMAL"
 173 };
 174 #endif
 175
 176 #include "keywords.h"
 177
 178 /* CLINE is a macro that ensures PL_copline has a sane value */
 179
 180 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 181
 182 /*
 183  * Convenience functions to return different tokens and prime the
 184  * lexer for the next token.  They all take an argument.
 185  *
 186  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 187  * OPERATOR     : generic operator
 188  * AOPERATOR    : assignment operator
 189  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 190  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 191  * PREREF       : *EXPR where EXPR is not a simple identifier
 192  * TERM         : expression term
 193  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 194  * LOOPX        : loop exiting command (goto, last, dump, etc)
 195  * FTST         : file test operator
 196  * FUN0         : zero-argument function
 197  * FUN0OP       : zero-argument function, with its op created in this file
 198  * FUN1         : not used, except for not, which isn't a UNIOP
 199  * BOop         : bitwise or or xor
 200  * BAop         : bitwise and
 201  * BCop         : bitwise complement
 202  * SHop         : shift operator
 203  * PWop         : power operator
 204  * PMop         : pattern-matching operator
 205  * Aop          : addition-level operator
 206  * AopNOASSIGN  : addition-level operator that is never part of .=
 207  * Mop          : multiplication-level operator
 208  * ChEop        : chaining equality-testing operator
 209  * NCEop        : non-chaining comparison operator at equality precedence
 210  * ChRop        : chaining relational operator <= != gt
 211  * NCRop        : non-chaining relational operator isa
 212  *
 213  * Also see LOP and lop() below.
 214  */
 215
 216 #ifdef DEBUGGING /* Serve -DT. */
 217 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 218 #else
 219 #   define REPORT(retval) (retval)
 220 #endif
 221
 222 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 223 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 224 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
 225 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 226 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 227 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 228 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 229 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 230 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
 231                          pl_yylval.ival=f, \
 232                          PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
 233                          REPORT((int)LOOPEX))
 234 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 235 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 236 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 237 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 238 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
 239 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
 240 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
 241                        REPORT(PERLY_TILDE)
 242 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
 243 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
 244 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 245 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
 246 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
 247 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
 248 #define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP))
 249 #define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP))
 250 #define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP))
 251 #define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP))
 252
 253 /* This bit of chicanery makes a unary function followed by
 254  * a parenthesis into a function with one argument, highest precedence.
 255  * The UNIDOR macro is for unary functions that can be followed by the //
 256  * operator (such as C<shift // 0>).
 257  */
 258 #define UNI3(f,x,have_x) { \
 259         pl_yylval.ival = f; \
 260         if (have_x) PL_expect = x; \
 261         PL_bufptr = s; \
 262         PL_last_uni = PL_oldbufptr; \
 263         PL_last_lop_op = (f) < 0 ? -(f) : (f); \
 264         if (*s == '(') \
 265             return REPORT( (int)FUNC1 ); \
 266         s = skipspace(s); \
 267         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 268         }
 269 #define UNI(f)    UNI3(f,XTERM,1)
 270 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 271 #define UNIPROTO(f,optional) { \
 272         if (optional) PL_last_uni = PL_oldbufptr; \
 273         OPERATOR(f); \
 274         }
 275
 276 #define UNIBRACK(f) UNI3(f,0,0)
 277
 278 /* return has special case parsing.
 279  *
 280  * List operators have low precedence. Functions have high precedence.
 281  * Every built in, *except return*, if written with () around its arguments, is
 282  * parsed as a function. Hence every other list built in:
 283  *
 284  * $ perl -lwe 'sub foo { join 2,4,6 * 1.5 } print for foo()' # join 2,4,9
 285  * 429
 286  * $ perl -lwe 'sub foo { join(2,4,6) * 1.5 } print for foo()' # 426 * 1.5
 287  * 639
 288  * $ perl -lwe 'sub foo { join+(2,4,6) * 1.5 } print for foo()'
 289  * Useless use of a constant (2) in void context at -e line 1.
 290  * Useless use of a constant (4) in void context at -e line 1.
 291  *
 292  * $
 293  *
 294  * empty line output because C<(2, 4, 6) * 1.5> is the comma operator, not a
 295  * list. * forces scalar context, 6 * 1.5 is 9, and join(9) is the empty string.
 296  *
 297  * Whereas return:
 298  *
 299  * $ perl -lwe 'sub foo { return 2,4,6 * 1.5 } print for foo()'
 300  * 2
 301  * 4
 302  * 9
 303  * $ perl -lwe 'sub foo { return(2,4,6) * 1.5 } print for foo()'
 304  * Useless use of a constant (2) in void context at -e line 1.
 305  * Useless use of a constant (4) in void context at -e line 1.
 306  * 9
 307  * $ perl -lwe 'sub foo { return+(2,4,6) * 1.5 } print for foo()'
 308  * Useless use of a constant (2) in void context at -e line 1.
 309  * Useless use of a constant (4) in void context at -e line 1.
 310  * 9
 311  * $
 312  *
 313  * and:
 314  * $ perl -lwe 'sub foo { return(2,4,6) } print for foo()'
 315  * 2
 316  * 4
 317  * 6
 318  *
 319  * This last example is what we expect, but it's clearly inconsistent with how
 320  * C<return(2,4,6) * 1.5> *ought* to behave, if the rules were consistently
 321  * followed.
 322  *
 323  *
 324  * Perl 3 attempted to be consistent:
 325  *
 326  *   The rules are more consistent about where parens are needed and
 327  *   where they are not.  In particular, unary operators and list operators now
 328  *   behave like functions if they're called like functions.
 329  *
 330  * However, the behaviour for return was reverted to the "old" parsing with
 331  * patches 9-12:
 332  *
 333  *   The construct
 334  *   return (1,2,3);
 335  *   did not do what was expected, since return was swallowing the
 336  *   parens in order to consider itself a function.  The solution,
 337  *   since return never wants any trailing expression such as
 338  *   return (1,2,3) + 2;
 339  *   is to simply make return an exception to the paren-makes-a-function
 340  *   rule, and treat it the way it always was, so that it doesn't
 341  *   strip the parens.
 342  *
 343  * To demonstrate the special-case parsing, replace OLDLOP(OP_RETURN); with
 344  * LOP(OP_RETURN, XTERM);
 345  *
 346  * and constructs such as
 347  *
 348  *     return (Internals::V())[2]
 349  *
 350  * turn into syntax errors
 351  */
 352
 353 #define OLDLOP(f) \
 354         do { \
 355             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 356                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 357             pl_yylval.ival = (f); \
 358             PL_expect = XTERM; \
 359             PL_bufptr = s; \
 360             return (int)LSTOP; \
 361         } while(0)
 362
 363 #define COPLINE_INC_WITH_HERELINES                  \
 364     STMT_START {                                     \
 365         CopLINE_inc(PL_curcop);                       \
 366         if (PL_parser->herelines)                      \
 367             CopLINE(PL_curcop) += PL_parser->herelines, \
 368             PL_parser->herelines = 0;                    \
 369     } STMT_END
 370 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 371  * is no sublex_push to follow. */
 372 #define COPLINE_SET_FROM_MULTI_END            \
 373     STMT_START {                               \
 374         CopLINE_set(PL_curcop, PL_multi_end);   \
 375         if (PL_multi_end != PL_multi_start)      \
 376             PL_parser->herelines = 0;             \
 377     } STMT_END
 378
 379
 380 /* A file-local structure for passing around information about subroutines and
 381  * related definable words */
 382 struct code {
 383     SV *sv;
 384     CV *cv;
 385     GV *gv, **gvp;
 386     OP *rv2cv_op;
 387     PADOFFSET off;
 388     bool lex;
 389 };
 390
 391 static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE };
 392
 393 #ifdef DEBUGGING
 394
 395 /* how to interpret the pl_yylval associated with the token */
 396 enum token_type {
 397     TOKENTYPE_NONE,
 398     TOKENTYPE_IVAL,
 399     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 400     TOKENTYPE_PVAL,
 401     TOKENTYPE_OPVAL
 402 };
 403
 404 #define DEBUG_TOKEN(Type, Name)                                         \
 405     { Name, TOKENTYPE_##Type, #Name }
 406
 407 static struct debug_tokens {
 408     const int token;
 409     enum token_type type;
 410     const char *name;
 411 } const debug_tokens[] =
 412 {
 413     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 414     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 415     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 416     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 417     { ANON_SIGSUB,      TOKENTYPE_IVAL,         "ANON_SIGSUB" },
 418     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 419     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 420     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 421     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 422     { CATCH,            TOKENTYPE_IVAL,         "CATCH" },
 423     { CHEQOP,           TOKENTYPE_OPNUM,        "CHEQOP" },
 424     { CHRELOP,          TOKENTYPE_OPNUM,        "CHRELOP" },
 425     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 426     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 427     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 428     { DO,               TOKENTYPE_NONE,         "DO" },
 429     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 430     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 431     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 432     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 433     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 434     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 435     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 436     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 437     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 438     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 439     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 440     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 441     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 442     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 443     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 444     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 445     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 446     { IF,               TOKENTYPE_IVAL,         "IF" },
 447     { LABEL,            TOKENTYPE_OPVAL,        "LABEL" },
 448     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 449     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 450     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 451     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 452     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 453     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 454     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 455     { MY,               TOKENTYPE_IVAL,         "MY" },
 456     { NCEQOP,           TOKENTYPE_OPNUM,        "NCEQOP" },
 457     { NCRELOP,          TOKENTYPE_OPNUM,        "NCRELOP" },
 458     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 459     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 460     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 461     { OROR,             TOKENTYPE_NONE,         "OROR" },
 462     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 463     DEBUG_TOKEN (IVAL, PERLY_AMPERSAND),
 464     DEBUG_TOKEN (IVAL, PERLY_BRACE_CLOSE),
 465     DEBUG_TOKEN (IVAL, PERLY_BRACE_OPEN),
 466     DEBUG_TOKEN (IVAL, PERLY_BRACKET_CLOSE),
 467     DEBUG_TOKEN (IVAL, PERLY_BRACKET_OPEN),
 468     DEBUG_TOKEN (IVAL, PERLY_COLON),
 469     DEBUG_TOKEN (IVAL, PERLY_COMMA),
 470     DEBUG_TOKEN (IVAL, PERLY_DOT),
 471     DEBUG_TOKEN (IVAL, PERLY_EQUAL_SIGN),
 472     DEBUG_TOKEN (IVAL, PERLY_EXCLAMATION_MARK),
 473     DEBUG_TOKEN (IVAL, PERLY_MINUS),
 474     DEBUG_TOKEN (IVAL, PERLY_PAREN_OPEN),
 475     DEBUG_TOKEN (IVAL, PERLY_PERCENT_SIGN),
 476     DEBUG_TOKEN (IVAL, PERLY_PLUS),
 477     DEBUG_TOKEN (IVAL, PERLY_QUESTION_MARK),
 478     DEBUG_TOKEN (IVAL, PERLY_SEMICOLON),
 479     DEBUG_TOKEN (IVAL, PERLY_SLASH),
 480     DEBUG_TOKEN (IVAL, PERLY_SNAIL),
 481     DEBUG_TOKEN (IVAL, PERLY_STAR),
 482     DEBUG_TOKEN (IVAL, PERLY_TILDE),
 483     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 484     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 485     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 486     { POSTJOIN,         TOKENTYPE_NONE,         "POSTJOIN" },
 487     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 488     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 489     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 490     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 491     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 492     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 493     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 494     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 495     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 496     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 497     { SIGSUB,           TOKENTYPE_NONE,         "SIGSUB" },
 498     { SUB,              TOKENTYPE_NONE,         "SUB" },
 499     { SUBLEXEND,        TOKENTYPE_NONE,         "SUBLEXEND" },
 500     { SUBLEXSTART,      TOKENTYPE_NONE,         "SUBLEXSTART" },
 501     { THING,            TOKENTYPE_OPVAL,        "THING" },
 502     { TRY,              TOKENTYPE_IVAL,         "TRY" },
 503     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 504     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 505     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 506     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 507     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 508     { USE,              TOKENTYPE_IVAL,         "USE" },
 509     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 510     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 511     { BAREWORD,         TOKENTYPE_OPVAL,        "BAREWORD" },
 512     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 513     { 0,                TOKENTYPE_NONE,         NULL }
 514 };
 515
 516 #undef DEBUG_TOKEN
 517
 518 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 519
 520 STATIC int
 521 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 522 {
 523     PERL_ARGS_ASSERT_TOKEREPORT;
 524
 525     if (DEBUG_T_TEST) {
 526         const char *name = NULL;
 527         enum token_type type = TOKENTYPE_NONE;
 528         const struct debug_tokens *p;
 529         SV* const report = newSVpvs("<== ");
 530
 531         for (p = debug_tokens; p->token; p++) {
 532             if (p->token == (int)rv) {
 533                 name = p->name;
 534                 type = p->type;
 535                 break;
 536             }
 537         }
 538         if (name)
 539             Perl_sv_catpv(aTHX_ report, name);
 540         else if (isGRAPH(rv))
 541         {
 542             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 543             if ((char)rv == 'p')
 544                 sv_catpvs(report, " (pending identifier)");
 545         }
 546         else if (!rv)
 547             sv_catpvs(report, "EOF");
 548         else
 549             Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
 550         switch (type) {
 551         case TOKENTYPE_NONE:
 552             break;
 553         case TOKENTYPE_IVAL:
 554             Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
 555             break;
 556         case TOKENTYPE_OPNUM:
 557             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 558                                     PL_op_name[lvalp->ival]);
 559             break;
 560         case TOKENTYPE_PVAL:
 561             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 562             break;
 563         case TOKENTYPE_OPVAL:
 564             if (lvalp->opval) {
 565                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 566                                     PL_op_name[lvalp->opval->op_type]);
 567                 if (lvalp->opval->op_type == OP_CONST) {
 568                     Perl_sv_catpvf(aTHX_ report, " %s",
 569                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 570                 }
 571
 572             }
 573             else
 574                 sv_catpvs(report, "(opval=null)");
 575             break;
 576         }
 577         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 578     };
 579     return (int)rv;
 580 }
 581
 582
 583 /* print the buffer with suitable escapes */
 584
 585 STATIC void
 586 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 587 {
 588     SV* const tmp = newSVpvs("");
 589
 590     PERL_ARGS_ASSERT_PRINTBUF;
 591
 592     GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
 593     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 594     GCC_DIAG_RESTORE_STMT;
 595     SvREFCNT_dec(tmp);
 596 }
 597
 598 #endif
 599
 600 /*
 601  * S_ao
 602  *
 603  * This subroutine looks for an '=' next to the operator that has just been
 604  * parsed and turns it into an ASSIGNOP if it finds one.
 605  */
 606
 607 STATIC int
 608 S_ao(pTHX_ int toketype)
 609 {
 610     if (*PL_bufptr == '=') {
 611         PL_bufptr++;
 612         if (toketype == ANDAND)
 613             pl_yylval.ival = OP_ANDASSIGN;
 614         else if (toketype == OROR)
 615             pl_yylval.ival = OP_ORASSIGN;
 616         else if (toketype == DORDOR)
 617             pl_yylval.ival = OP_DORASSIGN;
 618         toketype = ASSIGNOP;
 619     }
 620     return REPORT(toketype);
 621 }
 622
 623 /*
 624  * S_no_op
 625  * When Perl expects an operator and finds something else, no_op
 626  * prints the warning.  It always prints "<something> found where
 627  * operator expected.  It prints "Missing semicolon on previous line?"
 628  * if the surprise occurs at the start of the line.  "do you need to
 629  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 630  * where the compiler doesn't know if foo is a method call or a function.
 631  * It prints "Missing operator before end of line" if there's nothing
 632  * after the missing operator, or "... before <...>" if there is something
 633  * after the missing operator.
 634  *
 635  * PL_bufptr is expected to point to the start of the thing that was found,
 636  * and s after the next token or partial token.
 637  */
 638
 639 STATIC void
 640 S_no_op(pTHX_ const char *const what, char *s)
 641 {
 642     char * const oldbp = PL_bufptr;
 643     const bool is_first = (PL_oldbufptr == PL_linestart);
 644
 645     PERL_ARGS_ASSERT_NO_OP;
 646
 647     if (!s)
 648         s = oldbp;
 649     else
 650         PL_bufptr = s;
 651     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 652     if (ckWARN_d(WARN_SYNTAX)) {
 653         if (is_first)
 654             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 655                     "\t(Missing semicolon on previous line?)\n");
 656         else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
 657                                                            PL_bufend,
 658                                                            UTF))
 659         {
 660             const char *t;
 661             for (t = PL_oldoldbufptr;
 662                  (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
 663                  t += UTF ? UTF8SKIP(t) : 1)
 664             {
 665                 NOOP;
 666             }
 667             if (t < PL_bufptr && isSPACE(*t))
 668                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 669                         "\t(Do you need to predeclare %" UTF8f "?)\n",
 670                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 671         }
 672         else {
 673             assert(s >= oldbp);
 674             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 675                     "\t(Missing operator before %" UTF8f "?)\n",
 676                      UTF8fARG(UTF, s - oldbp, oldbp));
 677         }
 678     }
 679     PL_bufptr = oldbp;
 680 }
 681
 682 /*
 683  * S_missingterm
 684  * Complain about missing quote/regexp/heredoc terminator.
 685  * If it's called with NULL then it cauterizes the line buffer.
 686  * If we're in a delimited string and the delimiter is a control
 687  * character, it's reformatted into a two-char sequence like ^C.
 688  * This is fatal.
 689  */
 690
 691 STATIC void
 692 S_missingterm(pTHX_ char *s, STRLEN len)
 693 {
 694     char tmpbuf[UTF8_MAXBYTES + 1];
 695     char q;
 696     bool uni = FALSE;
 697     SV *sv;
 698     if (s) {
 699         char * const nl = (char *) my_memrchr(s, '\n', len);
 700         if (nl) {
 701             *nl = '\0';
 702             len = nl - s;
 703         }
 704         uni = UTF;
 705     }
 706     else if (PL_multi_close < 32) {
 707         *tmpbuf = '^';
 708         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 709         tmpbuf[2] = '\0';
 710         s = tmpbuf;
 711         len = 2;
 712     }
 713     else {
 714         if (LIKELY(PL_multi_close < 256)) {
 715             *tmpbuf = (char)PL_multi_close;
 716             tmpbuf[1] = '\0';
 717             len = 1;
 718         }
 719         else {
 720             char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
 721             *end = '\0';
 722             len = end - tmpbuf;
 723             uni = TRUE;
 724         }
 725         s = tmpbuf;
 726     }
 727     q = memchr(s, '"', len) ? '\'' : '"';
 728     sv = newSVpvn_flags(s, len, SVs_TEMP);
 729     if (uni)
 730         SvUTF8_on(sv);
 731     Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c"
 732                      " anywhere before EOF", q, SVfARG(sv), q);
 733 }
 734
 735 #include "feature.h"
 736
 737 /*
 738  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 739  * utf16-to-utf8-reversed.
 740  */
 741
 742 #ifdef PERL_CR_FILTER
 743 static void
 744 strip_return(SV *sv)
 745 {
 746     const char *s = SvPVX_const(sv);
 747     const char * const e = s + SvCUR(sv);
 748
 749     PERL_ARGS_ASSERT_STRIP_RETURN;
 750
 751     /* outer loop optimized to do nothing if there are no CR-LFs */
 752     while (s < e) {
 753         if (*s++ == '\r' && *s == '\n') {
 754             /* hit a CR-LF, need to copy the rest */
 755             char *d = s - 1;
 756             *d++ = *s++;
 757             while (s < e) {
 758                 if (*s == '\r' && s[1] == '\n')
 759                     s++;
 760                 *d++ = *s++;
 761             }
 762             SvCUR(sv) -= s - d;
 763             return;
 764         }
 765     }
 766 }
 767
 768 STATIC I32
 769 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 770 {
 771     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 772     if (count > 0 && !maxlen)
 773         strip_return(sv);
 774     return count;
 775 }
 776 #endif
 777
 778 /*
 779 =for apidoc lex_start
 780
 781 Creates and initialises a new lexer/parser state object, supplying
 782 a context in which to lex and parse from a new source of Perl code.
 783 A pointer to the new state object is placed in L</PL_parser>.  An entry
 784 is made on the save stack so that upon unwinding, the new state object
 785 will be destroyed and the former value of L</PL_parser> will be restored.
 786 Nothing else need be done to clean up the parsing context.
 787
 788 The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
 789 non-null, provides a string (in SV form) containing code to be parsed.
 790 A copy of the string is made, so subsequent modification of C<line>
 791 does not affect parsing.  C<rsfp>, if non-null, provides an input stream
 792 from which code will be read to be parsed.  If both are non-null, the
 793 code in C<line> comes first and must consist of complete lines of input,
 794 and C<rsfp> supplies the remainder of the source.
 795
 796 The C<flags> parameter is reserved for future use.  Currently it is only
 797 used by perl internally, so extensions should always pass zero.
 798
 799 =cut
 800 */
 801
 802 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 803    can share filters with the current parser.
 804    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 805    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 806    destruction. This is used to handle the case of defaulting to reading the
 807    script from the standard input because no filename was given on the command
 808    line (without getting confused by situation where STDIN has been closed, so
 809    the script handle is opened on fd 0)  */
 810
 811 void
 812 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 813 {
 814     const char *s = NULL;
 815     yy_parser *parser, *oparser;
 816
 817     if (flags && flags & ~LEX_START_FLAGS)
 818         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 819
 820     /* create and initialise a parser */
 821
 822     Newxz(parser, 1, yy_parser);
 823     parser->old_parser = oparser = PL_parser;
 824     PL_parser = parser;
 825
 826     parser->stack = NULL;
 827     parser->stack_max1 = NULL;
 828     parser->ps = NULL;
 829
 830     /* on scope exit, free this parser and restore any outer one */
 831     SAVEPARSER(parser);
 832     parser->saved_curcop = PL_curcop;
 833
 834     /* initialise lexer state */
 835
 836     parser->nexttoke = 0;
 837     parser->error_count = oparser ? oparser->error_count : 0;
 838     parser->copline = parser->preambling = NOLINE;
 839     parser->lex_state = LEX_NORMAL;
 840     parser->expect = XSTATE;
 841     parser->rsfp = rsfp;
 842     parser->recheck_utf8_validity = TRUE;
 843     parser->rsfp_filters =
 844       !(flags & LEX_START_SAME_FILTER) || !oparser
 845         ? NULL
 846         : MUTABLE_AV(SvREFCNT_inc(
 847             oparser->rsfp_filters
 848              ? oparser->rsfp_filters
 849              : (oparser->rsfp_filters = newAV())
 850           ));
 851
 852     Newx(parser->lex_brackstack, 120, char);
 853     Newx(parser->lex_casestack, 12, char);
 854     *parser->lex_casestack = '\0';
 855     Newxz(parser->lex_shared, 1, LEXSHARED);
 856
 857     if (line) {
 858         STRLEN len;
 859         const U8* first_bad_char_loc;
 860
 861         s = SvPV_const(line, len);
 862
 863         if (   SvUTF8(line)
 864             && UNLIKELY(! is_utf8_string_loc((U8 *) s,
 865                                              SvCUR(line),
 866                                              &first_bad_char_loc)))
 867         {
 868             _force_out_malformed_utf8_message(first_bad_char_loc,
 869                                               (U8 *) s + SvCUR(line),
 870                                               0,
 871                                               1 /* 1 means die */ );
 872             NOT_REACHED; /* NOTREACHED */
 873         }
 874
 875         parser->linestr = flags & LEX_START_COPIED
 876                             ? SvREFCNT_inc_simple_NN(line)
 877                             : newSVpvn_flags(s, len, SvUTF8(line));
 878         if (!rsfp)
 879             sv_catpvs(parser->linestr, "\n;");
 880     } else {
 881         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 882     }
 883
 884     parser->oldoldbufptr =
 885         parser->oldbufptr =
 886         parser->bufptr =
 887         parser->linestart = SvPVX(parser->linestr);
 888     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 889     parser->last_lop = parser->last_uni = NULL;
 890
 891     STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 892                                                         |LEX_DONT_CLOSE_RSFP));
 893     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 894                                                         |LEX_DONT_CLOSE_RSFP));
 895
 896     parser->in_pod = parser->filtered = 0;
 897 }
 898
 899
 900 /* delete a parser object */
 901
 902 void
 903 Perl_parser_free(pTHX_  const yy_parser *parser)
 904 {
 905     PERL_ARGS_ASSERT_PARSER_FREE;
 906
 907     PL_curcop = parser->saved_curcop;
 908     SvREFCNT_dec(parser->linestr);
 909
 910     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 911         PerlIO_clearerr(parser->rsfp);
 912     else if (parser->rsfp && (!parser->old_parser
 913           || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 914         PerlIO_close(parser->rsfp);
 915     SvREFCNT_dec(parser->rsfp_filters);
 916     SvREFCNT_dec(parser->lex_stuff);
 917     SvREFCNT_dec(parser->lex_sub_repl);
 918
 919     Safefree(parser->lex_brackstack);
 920     Safefree(parser->lex_casestack);
 921     Safefree(parser->lex_shared);
 922     PL_parser = parser->old_parser;
 923     Safefree(parser);
 924 }
 925
 926 void
 927 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 928 {
 929     I32 nexttoke = parser->nexttoke;
 930     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 931     while (nexttoke--) {
 932         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 933          && parser->nextval[nexttoke].opval
 934          && parser->nextval[nexttoke].opval->op_slabbed
 935          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 936             op_free(parser->nextval[nexttoke].opval);
 937             parser->nextval[nexttoke].opval = NULL;
 938         }
 939     }
 940 }
 941
 942
 943 /*
 944 =for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
 945
 946 Buffer scalar containing the chunk currently under consideration of the
 947 text currently being lexed.  This is always a plain string scalar (for
 948 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 949 normal scalar means; instead refer to the buffer directly by the pointer
 950 variables described below.
 951
 952 The lexer maintains various C<char*> pointers to things in the
 953 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 954 reallocated, all of these pointers must be updated.  Don't attempt to
 955 do this manually, but rather use L</lex_grow_linestr> if you need to
 956 reallocate the buffer.
 957
 958 The content of the text chunk in the buffer is commonly exactly one
 959 complete line of input, up to and including a newline terminator,
 960 but there are situations where it is otherwise.  The octets of the
 961 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 962 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 963 flag on this scalar, which may disagree with it.
 964
 965 For direct examination of the buffer, the variable
 966 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 967 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 968 of these pointers is usually preferable to examination of the scalar
 969 through normal scalar means.
 970
 971 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
 972
 973 Direct pointer to the end of the chunk of text currently being lexed, the
 974 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 975 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
 976 always located at the end of the buffer, and does not count as part of
 977 the buffer's contents.
 978
 979 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
 980
 981 Points to the current position of lexing inside the lexer buffer.
 982 Characters around this point may be freely examined, within
 983 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 984 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 985 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 986
 987 Lexing code (whether in the Perl core or not) moves this pointer past
 988 the characters that it consumes.  It is also expected to perform some
 989 bookkeeping whenever a newline character is consumed.  This movement
 990 can be more conveniently performed by the function L</lex_read_to>,
 991 which handles newlines appropriately.
 992
 993 Interpretation of the buffer's octets can be abstracted out by
 994 using the slightly higher-level functions L</lex_peek_unichar> and
 995 L</lex_read_unichar>.
 996
 997 =for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
 998
 999 Points to the start of the current line inside the lexer buffer.
1000 This is useful for indicating at which column an error occurred, and
1001 not much else.  This must be updated by any lexing code that consumes
1002 a newline; the function L</lex_read_to> handles this detail.
1003
1004 =cut
1005 */
1006
1007 /*
1008 =for apidoc lex_bufutf8
1009
1010 Indicates whether the octets in the lexer buffer
1011 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
1012 of Unicode characters.  If not, they should be interpreted as Latin-1
1013 characters.  This is analogous to the C<SvUTF8> flag for scalars.
1014
1015 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
1016 contains valid UTF-8.  Lexing code must be robust in the face of invalid
1017 encoding.
1018
1019 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
1020 is significant, but not the whole story regarding the input character
1021 encoding.  Normally, when a file is being read, the scalar contains octets
1022 and its C<SvUTF8> flag is off, but the octets should be interpreted as
1023 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
1024 however, the scalar may have the C<SvUTF8> flag on, and in this case its
1025 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
1026 is in effect.  This logic may change in the future; use this function
1027 instead of implementing the logic yourself.
1028
1029 =cut
1030 */
1031
1032 bool
1033 Perl_lex_bufutf8(pTHX)
1034 {
1035     return UTF;
1036 }
1037
1038 /*
1039 =for apidoc lex_grow_linestr
1040
1041 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
1042 at least C<len> octets (including terminating C<NUL>).  Returns a
1043 pointer to the reallocated buffer.  This is necessary before making
1044 any direct modification of the buffer that would increase its length.
1045 L</lex_stuff_pvn> provides a more convenient way to insert text into
1046 the buffer.
1047
1048 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
1049 this function updates all of the lexer's variables that point directly
1050 into the buffer.
1051
1052 =cut
1053 */
1054
1055 char *
1056 Perl_lex_grow_linestr(pTHX_ STRLEN len)
1057 {
1058     SV *linestr;
1059     char *buf;
1060     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1061     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
1062     bool current;
1063
1064     linestr = PL_parser->linestr;
1065     buf = SvPVX(linestr);
1066     if (len <= SvLEN(linestr))
1067         return buf;
1068
1069     /* Is the lex_shared linestr SV the same as the current linestr SV?
1070      * Only in this case does re_eval_start need adjusting, since it
1071      * points within lex_shared->ls_linestr's buffer */
1072     current = (   !PL_parser->lex_shared->ls_linestr
1073                || linestr == PL_parser->lex_shared->ls_linestr);
1074
1075     bufend_pos = PL_parser->bufend - buf;
1076     bufptr_pos = PL_parser->bufptr - buf;
1077     oldbufptr_pos = PL_parser->oldbufptr - buf;
1078     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1079     linestart_pos = PL_parser->linestart - buf;
1080     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1081     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1082     re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
1083                             PL_parser->lex_shared->re_eval_start - buf : 0;
1084
1085     buf = sv_grow(linestr, len);
1086
1087     PL_parser->bufend = buf + bufend_pos;
1088     PL_parser->bufptr = buf + bufptr_pos;
1089     PL_parser->oldbufptr = buf + oldbufptr_pos;
1090     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1091     PL_parser->linestart = buf + linestart_pos;
1092     if (PL_parser->last_uni)
1093         PL_parser->last_uni = buf + last_uni_pos;
1094     if (PL_parser->last_lop)
1095         PL_parser->last_lop = buf + last_lop_pos;
1096     if (current && PL_parser->lex_shared->re_eval_start)
1097         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
1098     return buf;
1099 }
1100
1101 /*
1102 =for apidoc lex_stuff_pvn
1103
1104 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1105 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1106 reallocating the buffer if necessary.  This means that lexing code that
1107 runs later will see the characters as if they had appeared in the input.
1108 It is not recommended to do this as part of normal parsing, and most
1109 uses of this facility run the risk of the inserted characters being
1110 interpreted in an unintended manner.
1111
1112 The string to be inserted is represented by C<len> octets starting
1113 at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
1114 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1115 The characters are recoded for the lexer buffer, according to how the
1116 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
1117 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1118 function is more convenient.
1119
1120 =for apidoc Amnh||LEX_STUFF_UTF8
1121
1122 =cut
1123 */
1124
1125 void
1126 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1127 {
1128     char *bufptr;
1129     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1130     if (flags & ~(LEX_STUFF_UTF8))
1131         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1132     if (UTF) {
1133         if (flags & LEX_STUFF_UTF8) {
1134             goto plain_copy;
1135         } else {
1136             STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1137                                                        (U8 *) pv + len);
1138             const char *p, *e = pv+len;;
1139             if (!highhalf)
1140                 goto plain_copy;
1141             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1142             bufptr = PL_parser->bufptr;
1143             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1144             SvCUR_set(PL_parser->linestr,
1145                 SvCUR(PL_parser->linestr) + len+highhalf);
1146             PL_parser->bufend += len+highhalf;
1147             for (p = pv; p != e; p++) {
1148                 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1149             }
1150         }
1151     } else {
1152         if (flags & LEX_STUFF_UTF8) {
1153             STRLEN highhalf = 0;
1154             const char *p, *e = pv+len;
1155             for (p = pv; p != e; p++) {
1156                 U8 c = (U8)*p;
1157                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1158                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1159                                 "non-Latin-1 character into Latin-1 input");
1160                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1161                     p++;
1162                     highhalf++;
1163                 } else assert(UTF8_IS_INVARIANT(c));
1164             }
1165             if (!highhalf)
1166                 goto plain_copy;
1167             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1168             bufptr = PL_parser->bufptr;
1169             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1170             SvCUR_set(PL_parser->linestr,
1171                 SvCUR(PL_parser->linestr) + len-highhalf);
1172             PL_parser->bufend += len-highhalf;
1173             p = pv;
1174             while (p < e) {
1175                 if (UTF8_IS_INVARIANT(*p)) {
1176                     *bufptr++ = *p;
1177                     p++;
1178                 }
1179                 else {
1180                     assert(p < e -1 );
1181                     *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1182                     p += 2;
1183                 }
1184             }
1185         } else {
1186           plain_copy:
1187             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1188             bufptr = PL_parser->bufptr;
1189             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1190             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1191             PL_parser->bufend += len;
1192             Copy(pv, bufptr, len, char);
1193         }
1194     }
1195 }
1196
1197 /*
1198 =for apidoc lex_stuff_pv
1199
1200 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1201 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1202 reallocating the buffer if necessary.  This means that lexing code that
1203 runs later will see the characters as if they had appeared in the input.
1204 It is not recommended to do this as part of normal parsing, and most
1205 uses of this facility run the risk of the inserted characters being
1206 interpreted in an unintended manner.
1207
1208 The string to be inserted is represented by octets starting at C<pv>
1209 and continuing to the first nul.  These octets are interpreted as either
1210 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1211 in C<flags>.  The characters are recoded for the lexer buffer, according
1212 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1213 If it is not convenient to nul-terminate a string to be inserted, the
1214 L</lex_stuff_pvn> function is more appropriate.
1215
1216 =cut
1217 */
1218
1219 void
1220 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1221 {
1222     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1223     lex_stuff_pvn(pv, strlen(pv), flags);
1224 }
1225
1226 /*
1227 =for apidoc lex_stuff_sv
1228
1229 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1230 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1231 reallocating the buffer if necessary.  This means that lexing code that
1232 runs later will see the characters as if they had appeared in the input.
1233 It is not recommended to do this as part of normal parsing, and most
1234 uses of this facility run the risk of the inserted characters being
1235 interpreted in an unintended manner.
1236
1237 The string to be inserted is the string value of C<sv>.  The characters
1238 are recoded for the lexer buffer, according to how the buffer is currently
1239 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1240 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1241 need to construct a scalar.
1242
1243 =cut
1244 */
1245
1246 void
1247 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1248 {
1249     char *pv;
1250     STRLEN len;
1251     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1252     if (flags)
1253         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1254     pv = SvPV(sv, len);
1255     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1256 }
1257
1258 /*
1259 =for apidoc lex_unstuff
1260
1261 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1262 C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
1263 This hides the discarded text from any lexing code that runs later,
1264 as if the text had never appeared.
1265
1266 This is not the normal way to consume lexed text.  For that, use
1267 L</lex_read_to>.
1268
1269 =cut
1270 */
1271
1272 void
1273 Perl_lex_unstuff(pTHX_ char *ptr)
1274 {
1275     char *buf, *bufend;
1276     STRLEN unstuff_len;
1277     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1278     buf = PL_parser->bufptr;
1279     if (ptr < buf)
1280         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1281     if (ptr == buf)
1282         return;
1283     bufend = PL_parser->bufend;
1284     if (ptr > bufend)
1285         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1286     unstuff_len = ptr - buf;
1287     Move(ptr, buf, bufend+1-ptr, char);
1288     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1289     PL_parser->bufend = bufend - unstuff_len;
1290 }
1291
1292 /*
1293 =for apidoc lex_read_to
1294
1295 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1296 to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1297 performing the correct bookkeeping whenever a newline character is passed.
1298 This is the normal way to consume lexed text.
1299
1300 Interpretation of the buffer's octets can be abstracted out by
1301 using the slightly higher-level functions L</lex_peek_unichar> and
1302 L</lex_read_unichar>.
1303
1304 =cut
1305 */
1306
1307 void
1308 Perl_lex_read_to(pTHX_ char *ptr)
1309 {
1310     char *s;
1311     PERL_ARGS_ASSERT_LEX_READ_TO;
1312     s = PL_parser->bufptr;
1313     if (ptr < s || ptr > PL_parser->bufend)
1314         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1315     for (; s != ptr; s++)
1316         if (*s == '\n') {
1317             COPLINE_INC_WITH_HERELINES;
1318             PL_parser->linestart = s+1;
1319         }
1320     PL_parser->bufptr = ptr;
1321 }
1322
1323 /*
1324 =for apidoc lex_discard_to
1325
1326 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1327 up to C<ptr>.  The remaining content of the buffer will be moved, and
1328 all pointers into the buffer updated appropriately.  C<ptr> must not
1329 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1330 it is not permitted to discard text that has yet to be lexed.
1331
1332 Normally it is not necessarily to do this directly, because it suffices to
1333 use the implicit discarding behaviour of L</lex_next_chunk> and things
1334 based on it.  However, if a token stretches across multiple lines,
1335 and the lexing code has kept multiple lines of text in the buffer for
1336 that purpose, then after completion of the token it would be wise to
1337 explicitly discard the now-unneeded earlier lines, to avoid future
1338 multi-line tokens growing the buffer without bound.
1339
1340 =cut
1341 */
1342
1343 void
1344 Perl_lex_discard_to(pTHX_ char *ptr)
1345 {
1346     char *buf;
1347     STRLEN discard_len;
1348     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1349     buf = SvPVX(PL_parser->linestr);
1350     if (ptr < buf)
1351         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1352     if (ptr == buf)
1353         return;
1354     if (ptr > PL_parser->bufptr)
1355         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1356     discard_len = ptr - buf;
1357     if (PL_parser->oldbufptr < ptr)
1358         PL_parser->oldbufptr = ptr;
1359     if (PL_parser->oldoldbufptr < ptr)
1360         PL_parser->oldoldbufptr = ptr;
1361     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1362         PL_parser->last_uni = NULL;
1363     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1364         PL_parser->last_lop = NULL;
1365     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1366     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1367     PL_parser->bufend -= discard_len;
1368     PL_parser->bufptr -= discard_len;
1369     PL_parser->oldbufptr -= discard_len;
1370     PL_parser->oldoldbufptr -= discard_len;
1371     if (PL_parser->last_uni)
1372         PL_parser->last_uni -= discard_len;
1373     if (PL_parser->last_lop)
1374         PL_parser->last_lop -= discard_len;
1375 }
1376
1377 void
1378 Perl_notify_parser_that_changed_to_utf8(pTHX)
1379 {
1380     /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1381      * off to on.  At compile time, this has the effect of entering a 'use
1382      * utf8' section.  This means that any input was not previously checked for
1383      * UTF-8 (because it was off), but now we do need to check it, or our
1384      * assumptions about the input being sane could be wrong, and we could
1385      * segfault.  This routine just sets a flag so that the next time we look
1386      * at the input we do the well-formed UTF-8 check.  If we aren't in the
1387      * proper phase, there may not be a parser object, but if there is, setting
1388      * the flag is harmless */
1389
1390     if (PL_parser) {
1391         PL_parser->recheck_utf8_validity = TRUE;
1392     }
1393 }
1394
1395 /*
1396 =for apidoc lex_next_chunk
1397
1398 Reads in the next chunk of text to be lexed, appending it to
1399 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1400 looked to the end of the current chunk and wants to know more.  It is
1401 usual, but not necessary, for lexing to have consumed the entirety of
1402 the current chunk at this time.
1403
1404 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1405 chunk (i.e., the current chunk has been entirely consumed), normally the
1406 current chunk will be discarded at the same time that the new chunk is
1407 read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1408 will not be discarded.  If the current chunk has not been entirely
1409 consumed, then it will not be discarded regardless of the flag.
1410
1411 Returns true if some new text was added to the buffer, or false if the
1412 buffer has reached the end of the input text.
1413
1414 =for apidoc Amnh||LEX_KEEP_PREVIOUS
1415
1416 =cut
1417 */
1418
1419 #define LEX_FAKE_EOF 0x80000000
1420 #define LEX_NO_TERM  0x40000000 /* here-doc */
1421
1422 bool
1423 Perl_lex_next_chunk(pTHX_ U32 flags)
1424 {
1425     SV *linestr;
1426     char *buf;
1427     STRLEN old_bufend_pos, new_bufend_pos;
1428     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1429     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1430     bool got_some_for_debugger = 0;
1431     bool got_some;
1432
1433     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1434         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1435     if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1436         return FALSE;
1437     linestr = PL_parser->linestr;
1438     buf = SvPVX(linestr);
1439     if (!(flags & LEX_KEEP_PREVIOUS)
1440           && PL_parser->bufptr == PL_parser->bufend)
1441     {
1442         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1443         linestart_pos = 0;
1444         if (PL_parser->last_uni != PL_parser->bufend)
1445             PL_parser->last_uni = NULL;
1446         if (PL_parser->last_lop != PL_parser->bufend)
1447             PL_parser->last_lop = NULL;
1448         last_uni_pos = last_lop_pos = 0;
1449         *buf = 0;
1450         SvCUR_set(linestr, 0);
1451     } else {
1452         old_bufend_pos = PL_parser->bufend - buf;
1453         bufptr_pos = PL_parser->bufptr - buf;
1454         oldbufptr_pos = PL_parser->oldbufptr - buf;
1455         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1456         linestart_pos = PL_parser->linestart - buf;
1457         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1458         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1459     }
1460     if (flags & LEX_FAKE_EOF) {
1461         goto eof;
1462     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1463         got_some = 0;
1464     } else if (filter_gets(linestr, old_bufend_pos)) {
1465         got_some = 1;
1466         got_some_for_debugger = 1;
1467     } else if (flags & LEX_NO_TERM) {
1468         got_some = 0;
1469     } else {
1470         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1471             SvPVCLEAR(linestr);
1472         eof:
1473         /* End of real input.  Close filehandle (unless it was STDIN),
1474          * then add implicit termination.
1475          */
1476         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1477             PerlIO_clearerr(PL_parser->rsfp);
1478         else if (PL_parser->rsfp)
1479             (void)PerlIO_close(PL_parser->rsfp);
1480         PL_parser->rsfp = NULL;
1481         PL_parser->in_pod = PL_parser->filtered = 0;
1482         if (!PL_in_eval && PL_minus_p) {
1483             sv_catpvs(linestr,
1484                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1485             PL_minus_n = PL_minus_p = 0;
1486         } else if (!PL_in_eval && PL_minus_n) {
1487             sv_catpvs(linestr, /*{*/";}");
1488             PL_minus_n = 0;
1489         } else
1490             sv_catpvs(linestr, ";");
1491         got_some = 1;
1492     }
1493     buf = SvPVX(linestr);
1494     new_bufend_pos = SvCUR(linestr);
1495     PL_parser->bufend = buf + new_bufend_pos;
1496     PL_parser->bufptr = buf + bufptr_pos;
1497
1498     if (UTF) {
1499         const U8* first_bad_char_loc;
1500         if (UNLIKELY(! is_utf8_string_loc(
1501                             (U8 *) PL_parser->bufptr,
1502                                    PL_parser->bufend - PL_parser->bufptr,
1503                                    &first_bad_char_loc)))
1504         {
1505             _force_out_malformed_utf8_message(first_bad_char_loc,
1506                                               (U8 *) PL_parser->bufend,
1507                                               0,
1508                                               1 /* 1 means die */ );
1509             NOT_REACHED; /* NOTREACHED */
1510         }
1511     }
1512
1513     PL_parser->oldbufptr = buf + oldbufptr_pos;
1514     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1515     PL_parser->linestart = buf + linestart_pos;
1516     if (PL_parser->last_uni)
1517         PL_parser->last_uni = buf + last_uni_pos;
1518     if (PL_parser->last_lop)
1519         PL_parser->last_lop = buf + last_lop_pos;
1520     if (PL_parser->preambling != NOLINE) {
1521         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1522         PL_parser->preambling = NOLINE;
1523     }
1524     if (   got_some_for_debugger
1525         && PERLDB_LINE_OR_SAVESRC
1526         && PL_curstash != PL_debstash)
1527     {
1528         /* debugger active and we're not compiling the debugger code,
1529          * so store the line into the debugger's array of lines
1530          */
1531         update_debugger_info(NULL, buf+old_bufend_pos,
1532             new_bufend_pos-old_bufend_pos);
1533     }
1534     return got_some;
1535 }
1536
1537 /*
1538 =for apidoc lex_peek_unichar
1539
1540 Looks ahead one (Unicode) character in the text currently being lexed.
1541 Returns the codepoint (unsigned integer value) of the next character,
1542 or -1 if lexing has reached the end of the input text.  To consume the
1543 peeked character, use L</lex_read_unichar>.
1544
1545 If the next character is in (or extends into) the next chunk of input
1546 text, the next chunk will be read in.  Normally the current chunk will be
1547 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1548 bit set, then the current chunk will not be discarded.
1549
1550 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1551 is encountered, an exception is generated.
1552
1553 =cut
1554 */
1555
1556 I32
1557 Perl_lex_peek_unichar(pTHX_ U32 flags)
1558 {
1559     char *s, *bufend;
1560     if (flags & ~(LEX_KEEP_PREVIOUS))
1561         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1562     s = PL_parser->bufptr;
1563     bufend = PL_parser->bufend;
1564     if (UTF) {
1565         U8 head;
1566         I32 unichar;
1567         STRLEN len, retlen;
1568         if (s == bufend) {
1569             if (!lex_next_chunk(flags))
1570                 return -1;
1571             s = PL_parser->bufptr;
1572             bufend = PL_parser->bufend;
1573         }
1574         head = (U8)*s;
1575         if (UTF8_IS_INVARIANT(head))
1576             return head;
1577         if (UTF8_IS_START(head)) {
1578             len = UTF8SKIP(&head);
1579             while ((STRLEN)(bufend-s) < len) {
1580                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1581                     break;
1582                 s = PL_parser->bufptr;
1583                 bufend = PL_parser->bufend;
1584             }
1585         }
1586         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1587         if (retlen == (STRLEN)-1) {
1588             _force_out_malformed_utf8_message((U8 *) s,
1589                                               (U8 *) bufend,
1590                                               0,
1591                                               1 /* 1 means die */ );
1592             NOT_REACHED; /* NOTREACHED */
1593         }
1594         return unichar;
1595     } else {
1596         if (s == bufend) {
1597             if (!lex_next_chunk(flags))
1598                 return -1;
1599             s = PL_parser->bufptr;
1600         }
1601         return (U8)*s;
1602     }
1603 }
1604
1605 /*
1606 =for apidoc lex_read_unichar
1607
1608 Reads the next (Unicode) character in the text currently being lexed.
1609 Returns the codepoint (unsigned integer value) of the character read,
1610 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1611 if lexing has reached the end of the input text.  To non-destructively
1612 examine the next character, use L</lex_peek_unichar> instead.
1613
1614 If the next character is in (or extends into) the next chunk of input
1615 text, the next chunk will be read in.  Normally the current chunk will be
1616 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1617 bit set, then the current chunk will not be discarded.
1618
1619 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1620 is encountered, an exception is generated.
1621
1622 =cut
1623 */
1624
1625 I32
1626 Perl_lex_read_unichar(pTHX_ U32 flags)
1627 {
1628     I32 c;
1629     if (flags & ~(LEX_KEEP_PREVIOUS))
1630         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1631     c = lex_peek_unichar(flags);
1632     if (c != -1) {
1633         if (c == '\n')
1634             COPLINE_INC_WITH_HERELINES;
1635         if (UTF)
1636             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1637         else
1638             ++(PL_parser->bufptr);
1639     }
1640     return c;
1641 }
1642
1643 /*
1644 =for apidoc lex_read_space
1645
1646 Reads optional spaces, in Perl style, in the text currently being
1647 lexed.  The spaces may include ordinary whitespace characters and
1648 Perl-style comments.  C<#line> directives are processed if encountered.
1649 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1650 at a non-space character (or the end of the input text).
1651
1652 If spaces extend into the next chunk of input text, the next chunk will
1653 be read in.  Normally the current chunk will be discarded at the same
1654 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1655 chunk will not be discarded.
1656
1657 =cut
1658 */
1659
1660 #define LEX_NO_INCLINE    0x40000000
1661 #define LEX_NO_NEXT_CHUNK 0x80000000
1662
1663 void
1664 Perl_lex_read_space(pTHX_ U32 flags)
1665 {
1666     char *s, *bufend;
1667     const bool can_incline = !(flags & LEX_NO_INCLINE);
1668     bool need_incline = 0;
1669     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1670         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1671     s = PL_parser->bufptr;
1672     bufend = PL_parser->bufend;
1673     while (1) {
1674         char c = *s;
1675         if (c == '#') {
1676             do {
1677                 c = *++s;
1678             } while (!(c == '\n' || (c == 0 && s == bufend)));
1679         } else if (c == '\n') {
1680             s++;
1681             if (can_incline) {
1682                 PL_parser->linestart = s;
1683                 if (s == bufend)
1684                     need_incline = 1;
1685                 else
1686                     incline(s, bufend);
1687             }
1688         } else if (isSPACE(c)) {
1689             s++;
1690         } else if (c == 0 && s == bufend) {
1691             bool got_more;
1692             line_t l;
1693             if (flags & LEX_NO_NEXT_CHUNK)
1694                 break;
1695             PL_parser->bufptr = s;
1696             l = CopLINE(PL_curcop);
1697             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1698             got_more = lex_next_chunk(flags);
1699             CopLINE_set(PL_curcop, l);
1700             s = PL_parser->bufptr;
1701             bufend = PL_parser->bufend;
1702             if (!got_more)
1703                 break;
1704             if (can_incline && need_incline && PL_parser->rsfp) {
1705                 incline(s, bufend);
1706                 need_incline = 0;
1707             }
1708         } else if (!c) {
1709             s++;
1710         } else {
1711             break;
1712         }
1713     }
1714     PL_parser->bufptr = s;
1715 }
1716
1717 /*
1718
1719 =for apidoc validate_proto
1720
1721 This function performs syntax checking on a prototype, C<proto>.
1722 If C<warn> is true, any illegal characters or mismatched brackets
1723 will trigger illegalproto warnings, declaring that they were
1724 detected in the prototype for C<name>.
1725
1726 The return value is C<true> if this is a valid prototype, and
1727 C<false> if it is not, regardless of whether C<warn> was C<true> or
1728 C<false>.
1729
1730 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1731
1732 =cut
1733
1734  */
1735
1736 bool
1737 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1738 {
1739     STRLEN len, origlen;
1740     char *p;
1741     bool bad_proto = FALSE;
1742     bool in_brackets = FALSE;
1743     bool after_slash = FALSE;
1744     char greedy_proto = ' ';
1745     bool proto_after_greedy_proto = FALSE;
1746     bool must_be_last = FALSE;
1747     bool underscore = FALSE;
1748     bool bad_proto_after_underscore = FALSE;
1749
1750     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1751
1752     if (!proto)
1753         return TRUE;
1754
1755     p = SvPV(proto, len);
1756     origlen = len;
1757     for (; len--; p++) {
1758         if (!isSPACE(*p)) {
1759             if (must_be_last)
1760                 proto_after_greedy_proto = TRUE;
1761             if (underscore) {
1762                 if (!memCHRs(";@%", *p))
1763                     bad_proto_after_underscore = TRUE;
1764                 underscore = FALSE;
1765             }
1766             if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
1767                 bad_proto = TRUE;
1768             }
1769             else {
1770                 if (*p == '[')
1771                     in_brackets = TRUE;
1772                 else if (*p == ']')
1773                     in_brackets = FALSE;
1774                 else if ((*p == '@' || *p == '%')
1775                          && !after_slash
1776                          && !in_brackets )
1777                 {
1778                     must_be_last = TRUE;
1779                     greedy_proto = *p;
1780                 }
1781                 else if (*p == '_')
1782                     underscore = TRUE;
1783             }
1784             if (*p == '\\')
1785                 after_slash = TRUE;
1786             else
1787                 after_slash = FALSE;
1788         }
1789     }
1790
1791     if (warn) {
1792         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1793         p -= origlen;
1794         p = SvUTF8(proto)
1795             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1796                              origlen, UNI_DISPLAY_ISPRINT)
1797             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1798
1799         if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1800             SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1801             sv_catpvs(name2, "::");
1802             sv_catsv(name2, (SV *)name);
1803             name = name2;
1804         }
1805
1806         if (proto_after_greedy_proto)
1807             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1808                         "Prototype after '%c' for %" SVf " : %s",
1809                         greedy_proto, SVfARG(name), p);
1810         if (in_brackets)
1811             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1812                         "Missing ']' in prototype for %" SVf " : %s",
1813                         SVfARG(name), p);
1814         if (bad_proto)
1815             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1816                         "Illegal character in prototype for %" SVf " : %s",
1817                         SVfARG(name), p);
1818         if (bad_proto_after_underscore)
1819             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1820                         "Illegal character after '_' in prototype for %" SVf " : %s",
1821                         SVfARG(name), p);
1822     }
1823
1824     return (! (proto_after_greedy_proto || bad_proto) );
1825 }
1826
1827 /*
1828  * S_incline
1829  * This subroutine has nothing to do with tilting, whether at windmills
1830  * or pinball tables.  Its name is short for "increment line".  It
1831  * increments the current line number in CopLINE(PL_curcop) and checks
1832  * to see whether the line starts with a comment of the form
1833  *    # line 500 "foo.pm"
1834  * If so, it sets the current line number and file to the values in the comment.
1835  */
1836
1837 STATIC void
1838 S_incline(pTHX_ const char *s, const char *end)
1839 {
1840     const char *t;
1841     const char *n;
1842     const char *e;
1843     line_t line_num;
1844     UV uv;
1845
1846     PERL_ARGS_ASSERT_INCLINE;
1847
1848     assert(end >= s);
1849
1850     COPLINE_INC_WITH_HERELINES;
1851     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1852      && s+1 == PL_bufend && *s == ';') {
1853         /* fake newline in string eval */
1854         CopLINE_dec(PL_curcop);
1855         return;
1856     }
1857     if (*s++ != '#')
1858         return;
1859     while (SPACE_OR_TAB(*s))
1860         s++;
1861     if (memBEGINs(s, (STRLEN) (end - s), "line"))
1862         s += sizeof("line") - 1;
1863     else
1864         return;
1865     if (SPACE_OR_TAB(*s))
1866         s++;
1867     else
1868         return;
1869     while (SPACE_OR_TAB(*s))
1870         s++;
1871     if (!isDIGIT(*s))
1872         return;
1873
1874     n = s;
1875     while (isDIGIT(*s))
1876         s++;
1877     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1878         return;
1879     while (SPACE_OR_TAB(*s))
1880         s++;
1881     if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1882         s++;
1883         e = t + 1;
1884     }
1885     else {
1886         t = s;
1887         while (*t && !isSPACE(*t))
1888             t++;
1889         e = t;
1890     }
1891     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1892         e++;
1893     if (*e != '\n' && *e != '\0')
1894         return;         /* false alarm */
1895
1896     if (!grok_atoUV(n, &uv, &e))
1897         return;
1898     line_num = ((line_t)uv) - 1;
1899
1900     if (t - s > 0) {
1901         const STRLEN len = t - s;
1902
1903         if (!PL_rsfp && !PL_parser->filtered) {
1904             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1905              * to *{"::_<newfilename"} */
1906             /* However, the long form of evals is only turned on by the
1907                debugger - usually they're "(eval %lu)" */
1908             GV * const cfgv = CopFILEGV(PL_curcop);
1909             if (cfgv) {
1910                 char smallbuf[128];
1911                 STRLEN tmplen2 = len;
1912                 char *tmpbuf2;
1913                 GV *gv2;
1914
1915                 if (tmplen2 + 2 <= sizeof smallbuf)
1916                     tmpbuf2 = smallbuf;
1917                 else
1918                     Newx(tmpbuf2, tmplen2 + 2, char);
1919
1920                 tmpbuf2[0] = '_';
1921                 tmpbuf2[1] = '<';
1922
1923                 memcpy(tmpbuf2 + 2, s, tmplen2);
1924                 tmplen2 += 2;
1925
1926                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1927                 if (!isGV(gv2)) {
1928                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1929                     /* adjust ${"::_<newfilename"} to store the new file name */
1930                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1931                     /* The line number may differ. If that is the case,
1932                        alias the saved lines that are in the array.
1933                        Otherwise alias the whole array. */
1934                     if (CopLINE(PL_curcop) == line_num) {
1935                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1936                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1937                     }
1938                     else if (GvAV(cfgv)) {
1939                         AV * const av = GvAV(cfgv);
1940                         const line_t start = CopLINE(PL_curcop)+1;
1941                         SSize_t items = AvFILLp(av) - start;
1942                         if (items > 0) {
1943                             AV * const av2 = GvAVn(gv2);
1944                             SV **svp = AvARRAY(av) + start;
1945                             Size_t l = line_num+1;
1946                             while (items-- && l < SSize_t_MAX && l == (line_t)l)
1947                                 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1948                         }
1949                     }
1950                 }
1951
1952                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1953             }
1954         }
1955         CopFILE_free(PL_curcop);
1956         CopFILE_setn(PL_curcop, s, len);
1957     }
1958     CopLINE_set(PL_curcop, line_num);
1959 }
1960
1961 STATIC void
1962 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1963 {
1964     AV *av = CopFILEAVx(PL_curcop);
1965     if (av) {
1966         SV * sv;
1967         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1968         else {
1969             sv = *av_fetch(av, 0, 1);
1970             SvUPGRADE(sv, SVt_PVMG);
1971         }
1972         if (!SvPOK(sv)) SvPVCLEAR(sv);
1973         if (orig_sv)
1974             sv_catsv(sv, orig_sv);
1975         else
1976             sv_catpvn(sv, buf, len);
1977         if (!SvIOK(sv)) {
1978             (void)SvIOK_on(sv);
1979             SvIV_set(sv, 0);
1980         }
1981         if (PL_parser->preambling == NOLINE)
1982             av_store(av, CopLINE(PL_curcop), sv);
1983     }
1984 }
1985
1986 /*
1987  * skipspace
1988  * Called to gobble the appropriate amount and type of whitespace.
1989  * Skips comments as well.
1990  * Returns the next character after the whitespace that is skipped.
1991  *
1992  * peekspace
1993  * Same thing, but look ahead without incrementing line numbers or
1994  * adjusting PL_linestart.
1995  */
1996
1997 #define skipspace(s) skipspace_flags(s, 0)
1998 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1999
2000 char *
2001 Perl_skipspace_flags(pTHX_ char *s, U32 flags)
2002 {
2003     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
2004     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2005         while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
2006             s++;
2007     } else {
2008         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
2009         PL_bufptr = s;
2010         lex_read_space(flags | LEX_KEEP_PREVIOUS |
2011                 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
2012                     LEX_NO_NEXT_CHUNK : 0));
2013         s = PL_bufptr;
2014         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
2015         if (PL_linestart > PL_bufptr)
2016             PL_bufptr = PL_linestart;
2017         return s;
2018     }
2019     return s;
2020 }
2021
2022 /*
2023  * S_check_uni
2024  * Check the unary operators to ensure there's no ambiguity in how they're
2025  * used.  An ambiguous piece of code would be:
2026  *     rand + 5
2027  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
2028  * the +5 is its argument.
2029  */
2030
2031 STATIC void
2032 S_check_uni(pTHX)
2033 {
2034     const char *s;
2035
2036     if (PL_oldoldbufptr != PL_last_uni)
2037         return;
2038     while (isSPACE(*PL_last_uni))
2039         PL_last_uni++;
2040     s = PL_last_uni;
2041     while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
2042         s += UTF ? UTF8SKIP(s) : 1;
2043     if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
2044         return;
2045
2046     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2047                      "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
2048                      UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
2049 }
2050
2051 /*
2052  * LOP : macro to build a list operator.  Its behaviour has been replaced
2053  * with a subroutine, S_lop() for which LOP is just another name.
2054  */
2055
2056 #define LOP(f,x) return lop(f,x,s)
2057
2058 /*
2059  * S_lop
2060  * Build a list operator (or something that might be one).  The rules:
2061  *  - if we have a next token, then it's a list operator (no parens) for
2062  *    which the next token has already been parsed; e.g.,
2063  *       sort foo @args
2064  *       sort foo (@args)
2065  *  - if the next thing is an opening paren, then it's a function
2066  *  - else it's a list operator
2067  */
2068
2069 STATIC I32
2070 S_lop(pTHX_ I32 f, U8 x, char *s)
2071 {
2072     PERL_ARGS_ASSERT_LOP;
2073
2074     pl_yylval.ival = f;
2075     CLINE;
2076     PL_bufptr = s;
2077     PL_last_lop = PL_oldbufptr;
2078     PL_last_lop_op = (OPCODE)f;
2079     if (PL_nexttoke)
2080         goto lstop;
2081     PL_expect = x;
2082     if (*s == '(')
2083         return REPORT(FUNC);
2084     s = skipspace(s);
2085     if (*s == '(')
2086         return REPORT(FUNC);
2087     else {
2088         lstop:
2089         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2090             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2091         return REPORT(LSTOP);
2092     }
2093 }
2094
2095 /*
2096  * S_force_next
2097  * When the lexer realizes it knows the next token (for instance,
2098  * it is reordering tokens for the parser) then it can call S_force_next
2099  * to know what token to return the next time the lexer is called.  Caller
2100  * will need to set PL_nextval[] and possibly PL_expect to ensure
2101  * the lexer handles the token correctly.
2102  */
2103
2104 STATIC void
2105 S_force_next(pTHX_ I32 type)
2106 {
2107 #ifdef DEBUGGING
2108     if (DEBUG_T_TEST) {
2109         PerlIO_printf(Perl_debug_log, "### forced token:\n");
2110         tokereport(type, &NEXTVAL_NEXTTOKE);
2111     }
2112 #endif
2113     assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2114     PL_nexttype[PL_nexttoke] = type;
2115     PL_nexttoke++;
2116 }
2117
2118 /*
2119  * S_postderef
2120  *
2121  * This subroutine handles postfix deref syntax after the arrow has already
2122  * been emitted.  @* $* etc. are emitted as two separate tokens right here.
2123  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2124  * only the first, leaving yylex to find the next.
2125  */
2126
2127 static int
2128 S_postderef(pTHX_ int const funny, char const next)
2129 {
2130     assert(funny == DOLSHARP
2131         || memCHRs("$@%&*", funny)
2132         || funny == PERLY_DOLLAR
2133         || funny == PERLY_SNAIL
2134         || funny == PERLY_PERCENT_SIGN
2135         || funny == PERLY_AMPERSAND
2136         || funny == PERLY_STAR
2137     );
2138     if (next == '*') {
2139         PL_expect = XOPERATOR;
2140         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2141             assert(PERLY_SNAIL == funny || PERLY_DOLLAR == funny || DOLSHARP == funny);
2142             PL_lex_state = LEX_INTERPEND;
2143             if (PERLY_SNAIL == funny)
2144                 force_next(POSTJOIN);
2145         }
2146         force_next(PERLY_STAR);
2147         PL_bufptr+=2;
2148     }
2149     else {
2150         if (PERLY_SNAIL == funny && PL_lex_state == LEX_INTERPNORMAL
2151          && !PL_lex_brackets)
2152             PL_lex_dojoin = 2;
2153         PL_expect = XOPERATOR;
2154         PL_bufptr++;
2155     }
2156     return funny;
2157 }
2158
2159 void
2160 Perl_yyunlex(pTHX)
2161 {
2162     int yyc = PL_parser->yychar;
2163     if (yyc != YYEMPTY) {
2164         if (yyc) {
2165             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2166             if (yyc == PERLY_BRACE_OPEN || yyc == HASHBRACK || yyc == PERLY_BRACKET_OPEN) {
2167                 PL_lex_allbrackets--;
2168                 PL_lex_brackets--;
2169                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2170             } else if (yyc == PERLY_PAREN_OPEN) {
2171                 PL_lex_allbrackets--;
2172                 yyc |= (2<<24);
2173             }
2174             force_next(yyc);
2175         }
2176         PL_parser->yychar = YYEMPTY;
2177     }
2178 }
2179
2180 STATIC SV *
2181 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2182 {
2183     SV * const sv = newSVpvn_utf8(start, len,
2184                     ! IN_BYTES
2185                   &&  UTF
2186                   &&  len != 0
2187                   &&  is_utf8_non_invariant_string((const U8*)start, len));
2188     return sv;
2189 }
2190
2191 /*
2192  * S_force_word
2193  * When the lexer knows the next thing is a word (for instance, it has
2194  * just seen -> and it knows that the next char is a word char, then
2195  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2196  * lookahead.
2197  *
2198  * Arguments:
2199  *   char *start : buffer position (must be within PL_linestr)
2200  *   int token   : PL_next* will be this type of bare word
2201  *                 (e.g., METHOD,BAREWORD)
2202  *   int check_keyword : if true, Perl checks to make sure the word isn't
2203  *       a keyword (do this if the word is a label, e.g. goto FOO)
2204  *   int allow_pack : if true, : characters will also be allowed (require,
2205  *       use, etc. do this)
2206  */
2207
2208 STATIC char *
2209 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2210 {
2211     char *s;
2212     STRLEN len;
2213
2214     PERL_ARGS_ASSERT_FORCE_WORD;
2215
2216     start = skipspace(start);
2217     s = start;
2218     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2219         || (allow_pack && *s == ':' && s[1] == ':') )
2220     {
2221         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2222         if (check_keyword) {
2223           char *s2 = PL_tokenbuf;
2224           STRLEN len2 = len;
2225           if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2226             s2 += sizeof("CORE::") - 1;
2227             len2 -= sizeof("CORE::") - 1;
2228           }
2229           if (keyword(s2, len2, 0))
2230             return start;
2231         }
2232         if (token == METHOD) {
2233             s = skipspace(s);
2234             if (*s == '(')
2235                 PL_expect = XTERM;
2236             else {
2237                 PL_expect = XOPERATOR;
2238             }
2239         }
2240         NEXTVAL_NEXTTOKE.opval
2241             = newSVOP(OP_CONST,0,
2242                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2243         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2244         force_next(token);
2245     }
2246     return s;
2247 }
2248
2249 /*
2250  * S_force_ident
2251  * Called when the lexer wants $foo *foo &foo etc, but the program
2252  * text only contains the "foo" portion.  The first argument is a pointer
2253  * to the "foo", and the second argument is the type symbol to prefix.
2254  * Forces the next token to be a "BAREWORD".
2255  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2256  */
2257
2258 STATIC void
2259 S_force_ident(pTHX_ const char *s, int kind)
2260 {
2261     PERL_ARGS_ASSERT_FORCE_IDENT;
2262
2263     if (s[0]) {
2264         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2265         OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2266                                                                 UTF ? SVf_UTF8 : 0));
2267         NEXTVAL_NEXTTOKE.opval = o;
2268         force_next(BAREWORD);
2269         if (kind) {
2270             o->op_private = OPpCONST_ENTERED;
2271             /* XXX see note in pp_entereval() for why we forgo typo
2272                warnings if the symbol must be introduced in an eval.
2273                GSAR 96-10-12 */
2274             gv_fetchpvn_flags(s, len,
2275                               (PL_in_eval ? GV_ADDMULTI
2276                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2277                               kind == PERLY_DOLLAR ? SVt_PV :
2278                               kind == PERLY_SNAIL ? SVt_PVAV :
2279                               kind == PERLY_PERCENT_SIGN ? SVt_PVHV :
2280                               SVt_PVGV
2281                               );
2282         }
2283     }
2284 }
2285
2286 static void
2287 S_force_ident_maybe_lex(pTHX_ char pit)
2288 {
2289     NEXTVAL_NEXTTOKE.ival = pit;
2290     force_next('p');
2291 }
2292
2293 NV
2294 Perl_str_to_version(pTHX_ SV *sv)
2295 {
2296     NV retval = 0.0;
2297     NV nshift = 1.0;
2298     STRLEN len;
2299     const char *start = SvPV_const(sv,len);
2300     const char * const end = start + len;
2301     const bool utf = cBOOL(SvUTF8(sv));
2302
2303     PERL_ARGS_ASSERT_STR_TO_VERSION;
2304
2305     while (start < end) {
2306         STRLEN skip;
2307         UV n;
2308         if (utf)
2309             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2310         else {
2311             n = *(U8*)start;
2312             skip = 1;
2313         }
2314         retval += ((NV)n)/nshift;
2315         start += skip;
2316         nshift *= 1000;
2317     }
2318     return retval;
2319 }
2320
2321 /*
2322  * S_force_version
2323  * Forces the next token to be a version number.
2324  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2325  * and if "guessing" is TRUE, then no new token is created (and the caller
2326  * must use an alternative parsing method).
2327  */
2328
2329 STATIC char *
2330 S_force_version(pTHX_ char *s, int guessing)
2331 {
2332     OP *version = NULL;
2333     char *d;
2334
2335     PERL_ARGS_ASSERT_FORCE_VERSION;
2336
2337     s = skipspace(s);
2338
2339     d = s;
2340     if (*d == 'v')
2341         d++;
2342     if (isDIGIT(*d)) {
2343         while (isDIGIT(*d) || *d == '_' || *d == '.')
2344             d++;
2345         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2346             SV *ver;
2347             s = scan_num(s, &pl_yylval);
2348             version = pl_yylval.opval;
2349             ver = cSVOPx(version)->op_sv;
2350             if (SvPOK(ver) && !SvNIOK(ver)) {
2351                 SvUPGRADE(ver, SVt_PVNV);
2352                 SvNV_set(ver, str_to_version(ver));
2353                 SvNOK_on(ver);          /* hint that it is a version */
2354             }
2355         }
2356         else if (guessing) {
2357             return s;
2358         }
2359     }
2360
2361     /* NOTE: The parser sees the package name and the VERSION swapped */
2362     NEXTVAL_NEXTTOKE.opval = version;
2363     force_next(BAREWORD);
2364
2365     return s;
2366 }
2367
2368 /*
2369  * S_force_strict_version
2370  * Forces the next token to be a version number using strict syntax rules.
2371  */
2372
2373 STATIC char *
2374 S_force_strict_version(pTHX_ char *s)
2375 {
2376     OP *version = NULL;
2377     const char *errstr = NULL;
2378
2379     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2380
2381     while (isSPACE(*s)) /* leading whitespace */
2382         s++;
2383
2384     if (is_STRICT_VERSION(s,&errstr)) {
2385         SV *ver = newSV(0);
2386         s = (char *)scan_version(s, ver, 0);
2387         version = newSVOP(OP_CONST, 0, ver);
2388     }
2389     else if ((*s != ';' && *s != '{' && *s != '}' )
2390              && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2391     {
2392         PL_bufptr = s;
2393         if (errstr)
2394             yyerror(errstr); /* version required */
2395         return s;
2396     }
2397
2398     /* NOTE: The parser sees the package name and the VERSION swapped */
2399     NEXTVAL_NEXTTOKE.opval = version;
2400     force_next(BAREWORD);
2401
2402     return s;
2403 }
2404
2405 /*
2406  * S_tokeq
2407  * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2408  * modified as necessary.  However, if HINT_NEW_STRING is on, 'sv' is
2409  * unchanged, and a new SV containing the modified input is returned.
2410  */
2411
2412 STATIC SV *
2413 S_tokeq(pTHX_ SV *sv)
2414 {
2415     char *s;
2416     char *send;
2417     char *d;
2418     SV *pv = sv;
2419
2420     PERL_ARGS_ASSERT_TOKEQ;
2421
2422     assert (SvPOK(sv));
2423     assert (SvLEN(sv));
2424     assert (!SvIsCOW(sv));
2425     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2426         goto finish;
2427     s = SvPVX(sv);
2428     send = SvEND(sv);
2429     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2430     while (s < send && !(*s == '\\' && s[1] == '\\'))
2431         s++;
2432     if (s == send)
2433         goto finish;
2434     d = s;
2435     if ( PL_hints & HINT_NEW_STRING ) {
2436         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2437                             SVs_TEMP | SvUTF8(sv));
2438     }
2439     while (s < send) {
2440         if (*s == '\\') {
2441             if (s + 1 < send && (s[1] == '\\'))
2442                 s++;            /* all that, just for this */
2443         }
2444         *d++ = *s++;
2445     }
2446     *d = '\0';
2447     SvCUR_set(sv, d - SvPVX_const(sv));
2448   finish:
2449     if ( PL_hints & HINT_NEW_STRING )
2450        return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2451     return sv;
2452 }
2453
2454 /*
2455  * Now come three functions related to double-quote context,
2456  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2457  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2458  * interact with PL_lex_state, and create fake ( ... ) argument lists
2459  * to handle functions and concatenation.
2460  * For example,
2461  *   "foo\lbar"
2462  * is tokenised as
2463  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2464  */
2465
2466 /*
2467  * S_sublex_start
2468  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2469  *
2470  * Pattern matching will set PL_lex_op to the pattern-matching op to
2471  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2472  *
2473  * OP_CONST is easy--just make the new op and return.
2474  *
2475  * Everything else becomes a FUNC.
2476  *
2477  * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2478  * had an OP_CONST.  This just sets us up for a
2479  * call to S_sublex_push().
2480  */
2481
2482 STATIC I32
2483 S_sublex_start(pTHX)
2484 {
2485     const I32 op_type = pl_yylval.ival;
2486
2487     if (op_type == OP_NULL) {
2488         pl_yylval.opval = PL_lex_op;
2489         PL_lex_op = NULL;
2490         return THING;
2491     }
2492     if (op_type == OP_CONST) {
2493         SV *sv = PL_lex_stuff;
2494         PL_lex_stuff = NULL;
2495         sv = tokeq(sv);
2496
2497         if (SvTYPE(sv) == SVt_PVIV) {
2498             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2499             STRLEN len;
2500             const char * const p = SvPV_const(sv, len);
2501             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2502             SvREFCNT_dec(sv);
2503             sv = nsv;
2504         }
2505         pl_yylval.opval = newSVOP(op_type, 0, sv);
2506         return THING;
2507     }
2508
2509     PL_parser->lex_super_state = PL_lex_state;
2510     PL_parser->lex_sub_inwhat = (U16)op_type;
2511     PL_parser->lex_sub_op = PL_lex_op;
2512     PL_parser->sub_no_recover = FALSE;
2513     PL_parser->sub_error_count = PL_error_count;
2514     PL_lex_state = LEX_INTERPPUSH;
2515
2516     PL_expect = XTERM;
2517     if (PL_lex_op) {
2518         pl_yylval.opval = PL_lex_op;
2519         PL_lex_op = NULL;
2520         return PMFUNC;
2521     }
2522     else
2523         return FUNC;
2524 }
2525
2526 /*
2527  * S_sublex_push
2528  * Create a new scope to save the lexing state.  The scope will be
2529  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2530  * to the uc, lc, etc. found before.
2531  * Sets PL_lex_state to LEX_INTERPCONCAT.
2532  */
2533
2534 STATIC I32
2535 S_sublex_push(pTHX)
2536 {
2537     LEXSHARED *shared;
2538     const bool is_heredoc = PL_multi_close == '<';
2539     ENTER;
2540
2541     PL_lex_state = PL_parser->lex_super_state;
2542     SAVEI8(PL_lex_dojoin);
2543     SAVEI32(PL_lex_brackets);
2544     SAVEI32(PL_lex_allbrackets);
2545     SAVEI32(PL_lex_formbrack);
2546     SAVEI8(PL_lex_fakeeof);
2547     SAVEI32(PL_lex_casemods);
2548     SAVEI32(PL_lex_starts);
2549     SAVEI8(PL_lex_state);
2550     SAVESPTR(PL_lex_repl);
2551     SAVEVPTR(PL_lex_inpat);
2552     SAVEI16(PL_lex_inwhat);
2553     if (is_heredoc)
2554     {
2555         SAVECOPLINE(PL_curcop);
2556         SAVEI32(PL_multi_end);
2557         SAVEI32(PL_parser->herelines);
2558         PL_parser->herelines = 0;
2559     }
2560     SAVEIV(PL_multi_close);
2561     SAVEPPTR(PL_bufptr);
2562     SAVEPPTR(PL_bufend);
2563     SAVEPPTR(PL_oldbufptr);
2564     SAVEPPTR(PL_oldoldbufptr);
2565     SAVEPPTR(PL_last_lop);
2566     SAVEPPTR(PL_last_uni);
2567     SAVEPPTR(PL_linestart);
2568     SAVESPTR(PL_linestr);
2569     SAVEGENERICPV(PL_lex_brackstack);
2570     SAVEGENERICPV(PL_lex_casestack);
2571     SAVEGENERICPV(PL_parser->lex_shared);
2572     SAVEBOOL(PL_parser->lex_re_reparsing);
2573     SAVEI32(PL_copline);
2574
2575     /* The here-doc parser needs to be able to peek into outer lexing
2576        scopes to find the body of the here-doc.  So we put PL_linestr and
2577        PL_bufptr into lex_shared, to ‘share’ those values.
2578      */
2579     PL_parser->lex_shared->ls_linestr = PL_linestr;
2580     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2581
2582     PL_linestr = PL_lex_stuff;
2583     PL_lex_repl = PL_parser->lex_sub_repl;
2584     PL_lex_stuff = NULL;
2585     PL_parser->lex_sub_repl = NULL;
2586
2587     /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2588        set for an inner quote-like operator and then an error causes scope-
2589        popping.  We must not have a PL_lex_stuff value left dangling, as
2590        that breaks assumptions elsewhere.  See bug #123617.  */
2591     SAVEGENERICSV(PL_lex_stuff);
2592     SAVEGENERICSV(PL_parser->lex_sub_repl);
2593
2594     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2595         = SvPVX(PL_linestr);
2596     PL_bufend += SvCUR(PL_linestr);
2597     PL_last_lop = PL_last_uni = NULL;
2598     SAVEFREESV(PL_linestr);
2599     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2600
2601     PL_lex_dojoin = FALSE;
2602     PL_lex_brackets = PL_lex_formbrack = 0;
2603     PL_lex_allbrackets = 0;
2604     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2605     Newx(PL_lex_brackstack, 120, char);
2606     Newx(PL_lex_casestack, 12, char);
2607     PL_lex_casemods = 0;
2608     *PL_lex_casestack = '\0';
2609     PL_lex_starts = 0;
2610     PL_lex_state = LEX_INTERPCONCAT;
2611     if (is_heredoc)
2612         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2613     PL_copline = NOLINE;
2614
2615     Newxz(shared, 1, LEXSHARED);
2616     shared->ls_prev = PL_parser->lex_shared;
2617     PL_parser->lex_shared = shared;
2618
2619     PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2620     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2621     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2622         PL_lex_inpat = PL_parser->lex_sub_op;
2623     else
2624         PL_lex_inpat = NULL;
2625
2626     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2627     PL_in_eval &= ~EVAL_RE_REPARSING;
2628
2629     return SUBLEXSTART;
2630 }
2631
2632 /*
2633  * S_sublex_done
2634  * Restores lexer state after a S_sublex_push.
2635  */
2636
2637 STATIC I32
2638 S_sublex_done(pTHX)
2639 {
2640     if (!PL_lex_starts++) {
2641         SV * const sv = newSVpvs("");
2642         if (SvUTF8(PL_linestr))
2643             SvUTF8_on(sv);
2644         PL_expect = XOPERATOR;
2645         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2646         return THING;
2647     }
2648
2649     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2650         PL_lex_state = LEX_INTERPCASEMOD;
2651         return yylex();
2652     }
2653
2654     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2655     assert(PL_lex_inwhat != OP_TRANSR);
2656     if (PL_lex_repl) {
2657         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2658         PL_linestr = PL_lex_repl;
2659         PL_lex_inpat = 0;
2660         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2661         PL_bufend += SvCUR(PL_linestr);
2662         PL_last_lop = PL_last_uni = NULL;
2663         PL_lex_dojoin = FALSE;
2664         PL_lex_brackets = 0;
2665         PL_lex_allbrackets = 0;
2666         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2667         PL_lex_casemods = 0;
2668         *PL_lex_casestack = '\0';
2669         PL_lex_starts = 0;
2670         if (SvEVALED(PL_lex_repl)) {
2671             PL_lex_state = LEX_INTERPNORMAL;
2672             PL_lex_starts++;
2673             /*  we don't clear PL_lex_repl here, so that we can check later
2674                 whether this is an evalled subst; that means we rely on the
2675                 logic to ensure sublex_done() is called again only via the
2676                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2677         }
2678         else {
2679             PL_lex_state = LEX_INTERPCONCAT;
2680             PL_lex_repl = NULL;
2681         }
2682         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2683             CopLINE(PL_curcop) +=
2684                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2685                  + PL_parser->herelines;
2686             PL_parser->herelines = 0;
2687         }
2688         return PERLY_SLASH;
2689     }
2690     else {
2691         const line_t l = CopLINE(PL_curcop);
2692         LEAVE;
2693         if (PL_parser->sub_error_count != PL_error_count) {
2694             if (PL_parser->sub_no_recover) {
2695                 yyquit();
2696                 NOT_REACHED;
2697             }
2698         }
2699         if (PL_multi_close == '<')
2700             PL_parser->herelines += l - PL_multi_end;
2701         PL_bufend = SvPVX(PL_linestr);
2702         PL_bufend += SvCUR(PL_linestr);
2703         PL_expect = XOPERATOR;
2704         return SUBLEXEND;
2705     }
2706 }
2707
2708 HV *
2709 Perl_load_charnames(pTHX_ SV * char_name, const char * context,
2710                           const STRLEN context_len, const char ** error_msg)
2711 {
2712     /* Load the official _charnames module if not already there.  The
2713      * parameters are just to give info for any error messages generated:
2714      *  char_name   a name to look up which is the reason for loading this
2715      *  context     'char_name' in the context in the input in which it appears
2716      *  context_len how many bytes 'context' occupies
2717      *  error_msg   *error_msg will be set to any error
2718      *
2719      *  Returns the ^H table if success; otherwise NULL */
2720
2721     unsigned int i;
2722     HV * table;
2723     SV **cvp;
2724     SV * res;
2725
2726     PERL_ARGS_ASSERT_LOAD_CHARNAMES;
2727
2728     /* This loop is executed 1 1/2 times.  On the first time through, if it
2729      * isn't already loaded, try loading it, and iterate just once to see if it
2730      * worked.  */
2731     for (i = 0; i < 2; i++) {
2732         table = GvHV(PL_hintgv);                 /* ^H */
2733
2734         if (    table
2735             && (PL_hints & HINT_LOCALIZE_HH)
2736             && (cvp = hv_fetchs(table, "charnames", FALSE))
2737             &&  SvOK(*cvp))
2738         {
2739             return table;   /* Quit if already loaded */
2740         }
2741
2742         if (i == 0) {
2743             Perl_load_module(aTHX_
2744                 0,
2745                 newSVpvs("_charnames"),
2746
2747                 /* version parameter; no need to specify it, as if we get too early
2748                 * a version, will fail anyway, not being able to find 'charnames'
2749                 * */
2750                 NULL,
2751                 newSVpvs(":full"),
2752                 newSVpvs(":short"),
2753                 NULL);
2754         }
2755     }
2756
2757     /* Here, it failed; new_constant will give appropriate error messages */
2758     *error_msg = NULL;
2759     res = new_constant( NULL, 0, "charnames", char_name, NULL,
2760                         context, context_len, error_msg);
2761     SvREFCNT_dec(res);
2762
2763     return NULL;
2764 }
2765
2766 STATIC SV*
2767 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2768 {
2769     /* This justs wraps get_and_check_backslash_N_name() to output any error
2770      * message it returns. */
2771
2772     const char * error_msg = NULL;
2773     SV * result;
2774
2775     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2776
2777     /* charnames doesn't work well if there have been errors found */
2778     if (PL_error_count > 0) {
2779         return NULL;
2780     }
2781
2782     result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2783
2784     if (error_msg) {
2785         yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2786     }
2787
2788     return result;
2789 }
2790
2791 SV*
2792 Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2793                                           const char* e,
2794                                           const bool is_utf8,
2795                                           const char ** error_msg)
2796 {
2797     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2798      * interior, hence to the "}".  Finds what the name resolves to, returning
2799      * an SV* containing it; NULL if no valid one found.
2800      *
2801      * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2802      * doesn't have to be. */
2803
2804     SV* char_name;
2805     SV* res;
2806     HV * table;
2807     SV **cvp;
2808     SV *cv;
2809     SV *rv;
2810     HV *stash;
2811
2812     /* Points to the beginning of the \N{... so that any messages include the
2813      * context of what's failing*/
2814     const char* context = s - 3;
2815     STRLEN context_len = e - context + 1; /* include all of \N{...} */
2816
2817
2818     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2819
2820     assert(e >= s);
2821     assert(s > (char *) 3);
2822
2823     while (s < e && isBLANK(*s)) {
2824         s++;
2825     }
2826
2827     while (s < e && isBLANK(*(e - 1))) {
2828         e--;
2829     }
2830
2831     char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2832
2833     if (!SvCUR(char_name)) {
2834         SvREFCNT_dec_NN(char_name);
2835         /* diag_listed_as: Unknown charname '%s' */
2836         *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2837         return NULL;
2838     }
2839
2840     /* Autoload the charnames module */
2841
2842     table = load_charnames(char_name, context, context_len, error_msg);
2843     if (table == NULL) {
2844         return NULL;
2845     }
2846
2847     *error_msg = NULL;
2848     res = new_constant( NULL, 0, "charnames", char_name, NULL,
2849                         context, context_len, error_msg);
2850     if (*error_msg) {
2851         *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name));
2852
2853         SvREFCNT_dec(res);
2854         return NULL;
2855     }
2856
2857     /* See if the charnames handler is the Perl core's, and if so, we can skip
2858      * the validation needed for a user-supplied one, as Perl's does its own
2859      * validation. */
2860     cvp = hv_fetchs(table, "charnames", FALSE);
2861     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2862         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2863     {
2864         const char * const name = HvNAME(stash);
2865          if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2866            return res;
2867        }
2868     }
2869
2870     /* Here, it isn't Perl's charname handler.  We can't rely on a
2871      * user-supplied handler to validate the input name.  For non-ut8 input,
2872      * look to see that the first character is legal.  Then loop through the
2873      * rest checking that each is a continuation */
2874
2875     /* This code makes the reasonable assumption that the only Latin1-range
2876      * characters that begin a character name alias are alphabetic, otherwise
2877      * would have to create a isCHARNAME_BEGIN macro */
2878
2879     if (! is_utf8) {
2880         if (! isALPHAU(*s)) {
2881             goto bad_charname;
2882         }
2883         s++;
2884         while (s < e) {
2885             if (! isCHARNAME_CONT(*s)) {
2886                 goto bad_charname;
2887             }
2888             if (*s == ' ' && *(s-1) == ' ') {
2889                 goto multi_spaces;
2890             }
2891             s++;
2892         }
2893     }
2894     else {
2895         /* Similarly for utf8.  For invariants can check directly; for other
2896          * Latin1, can calculate their code point and check; otherwise  use an
2897          * inversion list */
2898         if (UTF8_IS_INVARIANT(*s)) {
2899             if (! isALPHAU(*s)) {
2900                 goto bad_charname;
2901             }
2902             s++;
2903         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2904             if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2905                 goto bad_charname;
2906             }
2907             s += 2;
2908         }
2909         else {
2910             if (! _invlist_contains_cp(PL_utf8_charname_begin,
2911                                        utf8_to_uvchr_buf((U8 *) s,
2912                                                          (U8 *) e,
2913                                                          NULL)))
2914             {
2915                 goto bad_charname;
2916             }
2917             s += UTF8SKIP(s);
2918         }
2919
2920         while (s < e) {
2921             if (UTF8_IS_INVARIANT(*s)) {
2922                 if (! isCHARNAME_CONT(*s)) {
2923                     goto bad_charname;
2924                 }
2925                 if (*s == ' ' && *(s-1) == ' ') {
2926                     goto multi_spaces;
2927                 }
2928                 s++;
2929             }
2930             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2931                 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2932                 {
2933                     goto bad_charname;
2934                 }
2935                 s += 2;
2936             }
2937             else {
2938                 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2939                                            utf8_to_uvchr_buf((U8 *) s,
2940                                                              (U8 *) e,
2941                                                              NULL)))
2942                 {
2943                     goto bad_charname;
2944                 }
2945                 s += UTF8SKIP(s);
2946             }
2947         }
2948     }
2949     if (*(s-1) == ' ') {
2950         /* diag_listed_as: charnames alias definitions may not contain
2951                            trailing white-space; marked by <-- HERE in %s
2952          */
2953         *error_msg = Perl_form(aTHX_
2954             "charnames alias definitions may not contain trailing "
2955             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2956             (int)(s - context + 1), context,
2957             (int)(e - s + 1), s + 1);
2958         return NULL;
2959     }
2960
2961     if (SvUTF8(res)) { /* Don't accept malformed charname value */
2962         const U8* first_bad_char_loc;
2963         STRLEN len;
2964         const char* const str = SvPV_const(res, len);
2965         if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2966                                           &first_bad_char_loc)))
2967         {
2968             _force_out_malformed_utf8_message(first_bad_char_loc,
2969                                               (U8 *) PL_parser->bufend,
2970                                               0,
2971                                               0 /* 0 means don't die */ );
2972             /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
2973                                immediately after '%s' */
2974             *error_msg = Perl_form(aTHX_
2975                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2976                  (int) context_len, context,
2977                  (int) ((char *) first_bad_char_loc - str), str);
2978             return NULL;
2979         }
2980     }
2981
2982     return res;
2983
2984   bad_charname: {
2985
2986         /* The final %.*s makes sure that should the trailing NUL be missing
2987          * that this print won't run off the end of the string */
2988         /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
2989                            in \N{%s} */
2990         *error_msg = Perl_form(aTHX_
2991             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2992             (int)(s - context + 1), context,
2993             (int)(e - s + 1), s + 1);
2994         return NULL;
2995     }
2996
2997   multi_spaces:
2998         /* diag_listed_as: charnames alias definitions may not contain a
2999                            sequence of multiple spaces; marked by <-- HERE
3000                            in %s */
3001         *error_msg = Perl_form(aTHX_
3002             "charnames alias definitions may not contain a sequence of "
3003             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
3004             (int)(s - context + 1), context,
3005             (int)(e - s + 1), s + 1);
3006         return NULL;
3007 }
3008
3009 /*
3010   scan_const
3011
3012   Extracts the next constant part of a pattern, double-quoted string,
3013   or transliteration.  This is terrifying code.
3014
3015   For example, in parsing the double-quoted string "ab\x63$d", it would
3016   stop at the '$' and return an OP_CONST containing 'abc'.
3017
3018   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3019   processing a pattern (PL_lex_inpat is true), a transliteration
3020   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3021
3022   Returns a pointer to the character scanned up to. If this is
3023   advanced from the start pointer supplied (i.e. if anything was
3024   successfully parsed), will leave an OP_CONST for the substring scanned
3025   in pl_yylval. Caller must intuit reason for not parsing further
3026   by looking at the next characters herself.
3027
3028   In patterns:
3029     expand:
3030       \N{FOO}  => \N{U+hex_for_character_FOO}
3031       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3032
3033     pass through:
3034         all other \-char, including \N and \N{ apart from \N{ABC}
3035
3036     stops on:
3037         @ and $ where it appears to be a var, but not for $ as tail anchor
3038         \l \L \u \U \Q \E
3039         (?{  or  (??{
3040
3041   In transliterations:
3042     characters are VERY literal, except for - not at the start or end
3043     of the string, which indicates a range.  However some backslash sequences
3044     are recognized: \r, \n, and the like
3045                     \007 \o{}, \x{}, \N{}
3046     If all elements in the transliteration are below 256,
3047     scan_const expands the range to the full set of intermediate
3048     characters. If the range is in utf8, the hyphen is replaced with
3049     a certain range mark which will be handled by pmtrans() in op.c.
3050
3051   In double-quoted strings:
3052     backslashes:
3053       all those recognized in transliterations
3054       deprecated backrefs: \1 (in substitution replacements)
3055       case and quoting: \U \Q \E
3056     stops on @ and $
3057
3058   scan_const does *not* construct ops to handle interpolated strings.
3059   It stops processing as soon as it finds an embedded $ or @ variable
3060   and leaves it to the caller to work out what's going on.
3061
3062   embedded arrays (whether in pattern or not) could be:
3063       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3064
3065   $ in double-quoted strings must be the symbol of an embedded scalar.
3066
3067   $ in pattern could be $foo or could be tail anchor.  Assumption:
3068   it's a tail anchor if $ is the last thing in the string, or if it's
3069   followed by one of "()| \r\n\t"
3070
3071   \1 (backreferences) are turned into $1 in substitutions
3072
3073   The structure of the code is
3074       while (there's a character to process) {
3075           handle transliteration ranges
3076           skip regexp comments /(?#comment)/ and codes /(?{code})/
3077           skip #-initiated comments in //x patterns
3078           check for embedded arrays
3079           check for embedded scalars
3080           if (backslash) {
3081               deprecate \1 in substitution replacements
3082               handle string-changing backslashes \l \U \Q \E, etc.
3083               switch (what was escaped) {
3084                   handle \- in a transliteration (becomes a literal -)
3085                   if a pattern and not \N{, go treat as regular character
3086                   handle \132 (octal characters)
3087                   handle \x15 and \x{1234} (hex characters)
3088                   handle \N{name} (named characters, also \N{3,5} in a pattern)
3089                   handle \cV (control characters)
3090                   handle printf-style backslashes (\f, \r, \n, etc)
3091               } (end switch)
3092               continue
3093           } (end if backslash)
3094           handle regular character
3095     } (end while character to read)
3096
3097 */
3098
3099 STATIC char *
3100 S_scan_const(pTHX_ char *start)
3101 {
3102     char *send = PL_bufend;             /* end of the constant */
3103     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
3104                                            on sizing. */
3105     char *s = start;                    /* start of the constant */
3106     char *d = SvPVX(sv);                /* destination for copies */
3107     bool dorange = FALSE;               /* are we in a translit range? */
3108     bool didrange = FALSE;              /* did we just finish a range? */
3109     bool in_charclass = FALSE;          /* within /[...]/ */
3110     bool s_is_utf8 = cBOOL(UTF);        /* Is the source string assumed to be
3111                                            UTF8?  But, this can show as true
3112                                            when the source isn't utf8, as for
3113                                            example when it is entirely composed
3114                                            of hex constants */
3115     bool d_is_utf8 = FALSE;             /* Output constant is UTF8 */
3116     STRLEN utf8_variant_count = 0;      /* When not in UTF-8, this counts the
3117                                            number of characters found so far
3118                                            that will expand (into 2 bytes)
3119                                            should we have to convert to
3120                                            UTF-8) */
3121     SV *res;                            /* result from charnames */
3122     STRLEN offset_to_max = 0;   /* The offset in the output to where the range
3123                                    high-end character is temporarily placed */
3124
3125     /* Does something require special handling in tr/// ?  This avoids extra
3126      * work in a less likely case.  As such, khw didn't feel it was worth
3127      * adding any branches to the more mainline code to handle this, which
3128      * means that this doesn't get set in some circumstances when things like
3129      * \x{100} get expanded out.  As a result there needs to be extra testing
3130      * done in the tr code */
3131     bool has_above_latin1 = FALSE;
3132
3133     /* Note on sizing:  The scanned constant is placed into sv, which is
3134      * initialized by newSV() assuming one byte of output for every byte of
3135      * input.  This routine expects newSV() to allocate an extra byte for a
3136      * trailing NUL, which this routine will append if it gets to the end of
3137      * the input.  There may be more bytes of input than output (eg., \N{LATIN
3138      * CAPITAL LETTER A}), or more output than input if the constant ends up
3139      * recoded to utf8, but each time a construct is found that might increase
3140      * the needed size, SvGROW() is called.  Its size parameter each time is
3141      * based on the best guess estimate at the time, namely the length used so
3142      * far, plus the length the current construct will occupy, plus room for
3143      * the trailing NUL, plus one byte for every input byte still unscanned */
3144
3145     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3146                        before set */
3147 #ifdef EBCDIC
3148     int backslash_N = 0;            /* ? was the character from \N{} */
3149     int non_portable_endpoint = 0;  /* ? In a range is an endpoint
3150                                        platform-specific like \x65 */
3151 #endif
3152
3153     PERL_ARGS_ASSERT_SCAN_CONST;
3154
3155     assert(PL_lex_inwhat != OP_TRANSR);
3156
3157     /* Protect sv from errors and fatal warnings. */
3158     ENTER_with_name("scan_const");
3159     SAVEFREESV(sv);
3160
3161     /* A bunch of code in the loop below assumes that if s[n] exists and is not
3162      * NUL, then s[n+1] exists.  This assertion makes sure that assumption is
3163      * valid */
3164     assert(*send == '\0');
3165
3166     while (s < send
3167            || dorange   /* Handle tr/// range at right edge of input */
3168     ) {
3169
3170         /* get transliterations out of the way (they're most literal) */
3171         if (PL_lex_inwhat == OP_TRANS) {
3172
3173             /* But there isn't any special handling necessary unless there is a
3174              * range, so for most cases we just drop down and handle the value
3175              * as any other.  There are two exceptions.
3176              *
3177              * 1.  A hyphen indicates that we are actually going to have a
3178              *     range.  In this case, skip the '-', set a flag, then drop
3179              *     down to handle what should be the end range value.
3180              * 2.  After we've handled that value, the next time through, that
3181              *     flag is set and we fix up the range.
3182              *
3183              * Ranges entirely within Latin1 are expanded out entirely, in
3184              * order to make the transliteration a simple table look-up.
3185              * Ranges that extend above Latin1 have to be done differently, so
3186              * there is no advantage to expanding them here, so they are
3187              * stored here as Min, RANGE_INDICATOR, Max.  'RANGE_INDICATOR' is
3188              * a byte that can't occur in legal UTF-8, and hence can signify a
3189              * hyphen without any possible ambiguity.  On EBCDIC machines, if
3190              * the range is expressed as Unicode, the Latin1 portion is
3191              * expanded out even if the range extends above Latin1.  This is
3192              * because each code point in it has to be processed here
3193              * individually to get its native translation */
3194
3195             if (! dorange) {
3196
3197                 /* Here, we don't think we're in a range.  If the new character
3198                  * is not a hyphen; or if it is a hyphen, but it's too close to
3199                  * either edge to indicate a range, or if we haven't output any
3200                  * characters yet then it's a regular character. */
3201                 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv))
3202                 {
3203
3204                     /* A regular character.  Process like any other, but first
3205                      * clear any flags */
3206                     didrange = FALSE;
3207                     dorange = FALSE;
3208 #ifdef EBCDIC
3209                     non_portable_endpoint = 0;
3210                     backslash_N = 0;
3211 #endif
3212                     /* The tests here for being above Latin1 and similar ones
3213                      * in the following 'else' suffice to find all such
3214                      * occurences in the constant, except those added by a
3215                      * backslash escape sequence, like \x{100}.  Mostly, those
3216                      * set 'has_above_latin1' as appropriate */
3217                     if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3218                         has_above_latin1 = TRUE;
3219                     }
3220
3221                     /* Drops down to generic code to process current byte */
3222                 }
3223                 else {  /* Is a '-' in the context where it means a range */
3224                     if (didrange) { /* Something like y/A-C-Z// */
3225                         Perl_croak(aTHX_ "Ambiguous range in transliteration"
3226                                          " operator");
3227                     }
3228
3229                     dorange = TRUE;
3230
3231                     s++;    /* Skip past the hyphen */
3232
3233                     /* d now points to where the end-range character will be
3234                      * placed.  Drop down to get that character.  We'll finish
3235                      * processing the range the next time through the loop */
3236
3237                     if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3238                         has_above_latin1 = TRUE;
3239                     }
3240
3241                     /* Drops down to generic code to process current byte */
3242                 }
3243             }  /* End of not a range */
3244             else {
3245                 /* Here we have parsed a range.  Now must handle it.  At this
3246                  * point:
3247                  * 'sv' is a SV* that contains the output string we are
3248                  *      constructing.  The final two characters in that string
3249                  *      are the range start and range end, in order.
3250                  * 'd'  points to just beyond the range end in the 'sv' string,
3251                  *      where we would next place something
3252                  */
3253                 char * max_ptr;
3254                 char * min_ptr;
3255                 IV range_min;
3256                 IV range_max;   /* last character in range */
3257                 STRLEN grow;
3258                 Size_t offset_to_min = 0;
3259                 Size_t extras = 0;
3260 #ifdef EBCDIC
3261                 bool convert_unicode;
3262                 IV real_range_max = 0;
3263 #endif
3264                 /* Get the code point values of the range ends. */
3265                 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1;
3266                 offset_to_max = max_ptr - SvPVX_const(sv);
3267                 if (d_is_utf8) {
3268                     /* We know the utf8 is valid, because we just constructed
3269                      * it ourselves in previous loop iterations */
3270                     min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3271                     range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3272                     range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3273
3274                     /* This compensates for not all code setting
3275                      * 'has_above_latin1', so that we don't skip stuff that
3276                      * should be executed */
3277                     if (range_max > 255) {
3278                         has_above_latin1 = TRUE;
3279                     }
3280                 }
3281                 else {
3282                     min_ptr = max_ptr - 1;
3283                     range_min = * (U8*) min_ptr;
3284                     range_max = * (U8*) max_ptr;
3285                 }
3286
3287                 /* If the range is just a single code point, like tr/a-a/.../,
3288                  * that code point is already in the output, twice.  We can
3289                  * just back up over the second instance and avoid all the rest
3290                  * of the work.  But if it is a variant character, it's been
3291                  * counted twice, so decrement.  (This unlikely scenario is
3292                  * special cased, like the one for a range of 2 code points
3293                  * below, only because the main-line code below needs a range
3294                  * of 3 or more to work without special casing.  Might as well
3295                  * get it out of the way now.) */
3296                 if (UNLIKELY(range_max == range_min)) {
3297                     d = max_ptr;
3298                     if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3299                         utf8_variant_count--;
3300                     }
3301                     goto range_done;
3302                 }
3303
3304 #ifdef EBCDIC
3305                 /* On EBCDIC platforms, we may have to deal with portable
3306                  * ranges.  These happen if at least one range endpoint is a
3307                  * Unicode value (\N{...}), or if the range is a subset of
3308                  * [A-Z] or [a-z], and both ends are literal characters,
3309                  * like 'A', and not like \x{C1} */
3310                 convert_unicode =
3311                                cBOOL(backslash_N)   /* \N{} forces Unicode,
3312                                                        hence portable range */
3313                     || (     ! non_portable_endpoint
3314                         && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
3315                            || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3316                 if (convert_unicode) {
3317
3318                     /* Special handling is needed for these portable ranges.
3319                      * They are defined to be in Unicode terms, which includes
3320                      * all the Unicode code points between the end points.
3321                      * Convert to Unicode to get the Unicode range.  Later we
3322                      * will convert each code point in the range back to
3323                      * native.  */
3324                     range_min = NATIVE_TO_UNI(range_min);
3325                     range_max = NATIVE_TO_UNI(range_max);
3326                 }
3327 #endif
3328
3329                 if (range_min > range_max) {
3330 #ifdef EBCDIC
3331                     if (convert_unicode) {
3332                         /* Need to convert back to native for meaningful
3333                          * messages for this platform */
3334                         range_min = UNI_TO_NATIVE(range_min);
3335                         range_max = UNI_TO_NATIVE(range_max);
3336                     }
3337 #endif
3338                     /* Use the characters themselves for the error message if
3339                      * ASCII printables; otherwise some visible representation
3340                      * of them */
3341                     if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3342                         Perl_croak(aTHX_
3343                          "Invalid range \"%c-%c\" in transliteration operator",
3344                          (char)range_min, (char)range_max);
3345                     }
3346 #ifdef EBCDIC
3347                     else if (convert_unicode) {
3348         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3349                         Perl_croak(aTHX_
3350                            "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3351                            UVXf "}\" in transliteration operator",
3352                            range_min, range_max);
3353                     }
3354 #endif
3355                     else {
3356         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3357                         Perl_croak(aTHX_
3358                            "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3359                            " in transliteration operator",
3360                            range_min, range_max);
3361                     }
3362                 }
3363
3364                 /* If the range is exactly two code points long, they are
3365                  * already both in the output */
3366                 if (UNLIKELY(range_min + 1 == range_max)) {
3367                     goto range_done;
3368                 }
3369
3370                 /* Here the range contains at least 3 code points */
3371
3372                 if (d_is_utf8) {
3373
3374                     /* If everything in the transliteration is below 256, we
3375                      * can avoid special handling later.  A translation table
3376                      * for each of those bytes is created by op.c.  So we
3377                      * expand out all ranges to their constituent code points.
3378                      * But if we've encountered something above 255, the
3379                      * expanding won't help, so skip doing that.  But if it's
3380                      * EBCDIC, we may have to look at each character below 256
3381                      * if we have to convert to/from Unicode values */
3382                     if (   has_above_latin1
3383 #ifdef EBCDIC
3384                         && (range_min > 255 || ! convert_unicode)
3385 #endif
3386                     ) {
3387                         const STRLEN off = d - SvPVX(sv);
3388                         const STRLEN extra = 1 + (send - s) + 1;
3389                         char *e;
3390
3391                         /* Move the high character one byte to the right; then
3392                          * insert between it and the range begin, an illegal
3393                          * byte which serves to indicate this is a range (using
3394                          * a '-' would be ambiguous). */
3395
3396                         if (off + extra > SvLEN(sv)) {
3397                             d = off + SvGROW(sv, off + extra);
3398                             max_ptr = d - off + offset_to_max;
3399                         }
3400
3401                         e = d++;
3402                         while (e-- > max_ptr) {
3403                             *(e + 1) = *e;
3404                         }
3405                         *(e + 1) = (char) RANGE_INDICATOR;
3406                         goto range_done;
3407                     }
3408
3409                     /* Here, we're going to expand out the range.  For EBCDIC
3410                      * the range can extend above 255 (not so in ASCII), so
3411                      * for EBCDIC, split it into the parts above and below
3412                      * 255/256 */
3413 #ifdef EBCDIC
3414                     if (range_max > 255) {
3415                         real_range_max = range_max;
3416                         range_max = 255;
3417                     }
3418 #endif
3419                 }
3420
3421                 /* Here we need to expand out the string to contain each
3422                  * character in the range.  Grow the output to handle this.
3423                  * For non-UTF8, we need a byte for each code point in the
3424                  * range, minus the three that we've already allocated for: the
3425                  * hyphen, the min, and the max.  For UTF-8, we need this
3426                  * plus an extra byte for each code point that occupies two
3427                  * bytes (is variant) when in UTF-8 (except we've already
3428                  * allocated for the end points, including if they are
3429                  * variants).  For ASCII platforms and Unicode ranges on EBCDIC
3430                  * platforms, it's easy to calculate a precise number.  To
3431                  * start, we count the variants in the range, which we need
3432                  * elsewhere in this function anyway.  (For the case where it
3433                  * isn't easy to calculate, 'extras' has been initialized to 0,
3434                  * and the calculation is done in a loop further down.) */
3435 #ifdef EBCDIC
3436                 if (convert_unicode)
3437 #endif
3438                 {
3439                     /* This is executed unconditionally on ASCII, and for
3440                      * Unicode ranges on EBCDIC.  Under these conditions, all
3441                      * code points above a certain value are variant; and none
3442                      * under that value are.  We just need to find out how much
3443                      * of the range is above that value.  We don't count the
3444                      * end points here, as they will already have been counted
3445                      * as they were parsed. */
3446                     if (range_min >= UTF_CONTINUATION_MARK) {
3447
3448                         /* The whole range is made up of variants */
3449                         extras = (range_max - 1) - (range_min + 1) + 1;
3450                     }
3451                     else if (range_max >= UTF_CONTINUATION_MARK) {
3452
3453                         /* Only the higher portion of the range is variants */
3454                         extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3455                     }
3456
3457                     utf8_variant_count += extras;
3458                 }
3459
3460                 /* The base growth is the number of code points in the range,
3461                  * not including the endpoints, which have already been sized
3462                  * for (and output).  We don't subtract for the hyphen, as it
3463                  * has been parsed but not output, and the SvGROW below is
3464                  * based only on what's been output plus what's left to parse.
3465                  * */
3466                 grow = (range_max - 1) - (range_min + 1) + 1;
3467
3468                 if (d_is_utf8) {
3469 #ifdef EBCDIC
3470                     /* In some cases in EBCDIC, we haven't yet calculated a
3471                      * precise amount needed for the UTF-8 variants.  Just
3472                      * assume the worst case, that everything will expand by a
3473                      * byte */
3474                     if (! convert_unicode) {
3475                         grow *= 2;
3476                     }
3477                     else
3478 #endif
3479                     {
3480                         /* Otherwise we know exactly how many variants there
3481                          * are in the range. */
3482                         grow += extras;
3483                     }
3484                 }
3485
3486                 /* Grow, but position the output to overwrite the range min end
3487                  * point, because in some cases we overwrite that */
3488                 SvCUR_set(sv, d - SvPVX_const(sv));
3489                 offset_to_min = min_ptr - SvPVX_const(sv);
3490
3491                 /* See Note on sizing above. */
3492                 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3493                                              + (send - s)
3494                                              + grow
3495                                              + 1 /* Trailing NUL */ );
3496
3497                 /* Now, we can expand out the range. */
3498 #ifdef EBCDIC
3499                 if (convert_unicode) {
3500                     SSize_t i;
3501
3502                     /* Recall that the min and max are now in Unicode terms, so
3503                      * we have to convert each character to its native
3504                      * equivalent */
3505                     if (d_is_utf8) {
3506                         for (i = range_min; i <= range_max; i++) {
3507                             append_utf8_from_native_byte(
3508                                                     LATIN1_TO_NATIVE((U8) i),
3509                                                     (U8 **) &d);
3510                         }
3511                     }
3512                     else {
3513                         for (i = range_min; i <= range_max; i++) {
3514                             *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3515                         }
3516                     }
3517                 }
3518                 else
3519 #endif
3520                 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3521                 {
3522                     /* Here, no conversions are necessary, which means that the
3523                      * first character in the range is already in 'd' and
3524                      * valid, so we can skip overwriting it */
3525                     if (d_is_utf8) {
3526                         SSize_t i;
3527                         d += UTF8SKIP(d);
3528                         for (i = range_min + 1; i <= range_max; i++) {
3529                             append_utf8_from_native_byte((U8) i, (U8 **) &d);
3530                         }
3531                     }
3532                     else {
3533                         SSize_t i;
3534                         d++;
3535                         assert(range_min + 1 <= range_max);
3536                         for (i = range_min + 1; i < range_max; i++) {
3537 #ifdef EBCDIC
3538                             /* In this case on EBCDIC, we haven't calculated
3539                              * the variants.  Do it here, as we go along */
3540                             if (! UVCHR_IS_INVARIANT(i)) {
3541                                 utf8_variant_count++;
3542                             }
3543 #endif
3544                             *d++ = (char)i;
3545                         }
3546
3547                         /* The range_max is done outside the loop so as to
3548                          * avoid having to special case not incrementing
3549                          * 'utf8_variant_count' on EBCDIC (it's already been
3550                          * counted when originally parsed) */
3551                         *d++ = (char) range_max;
3552                     }
3553                 }
3554
3555 #ifdef EBCDIC
3556                 /* If the original range extended above 255, add in that
3557                  * portion. */
3558                 if (real_range_max) {
3559                     *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3560                     *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3561                     if (real_range_max > 0x100) {
3562                         if (real_range_max > 0x101) {
3563                             *d++ = (char) RANGE_INDICATOR;
3564                         }
3565                         d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3566                     }
3567                 }
3568 #endif
3569
3570               range_done:
3571                 /* mark the range as done, and continue */
3572                 didrange = TRUE;
3573                 dorange = FALSE;
3574 #ifdef EBCDIC
3575                 non_portable_endpoint = 0;
3576                 backslash_N = 0;
3577 #endif
3578                 continue;
3579             } /* End of is a range */
3580         } /* End of transliteration.  Joins main code after these else's */
3581         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3582             char *s1 = s-1;
3583             int esc = 0;
3584             while (s1 >= start && *s1-- == '\\')
3585                 esc = !esc;
3586             if (!esc)
3587                 in_charclass = TRUE;
3588         }
3589         else if (*s == ']' && PL_lex_inpat && in_charclass) {
3590             char *s1 = s-1;
3591             int esc = 0;
3592             while (s1 >= start && *s1-- == '\\')
3593                 esc = !esc;
3594             if (!esc)
3595                 in_charclass = FALSE;
3596         }
3597             /* skip for regexp comments /(?#comment)/, except for the last
3598              * char, which will be done separately.  Stop on (?{..}) and
3599              * friends */
3600         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3601             if (s[2] == '#') {
3602                 if (s_is_utf8) {
3603                     PERL_UINT_FAST8_T  len = UTF8SKIP(s);
3604
3605                     while (s + len < send && *s != ')') {
3606                         Copy(s, d, len, U8);
3607                         d += len;
3608                         s += len;
3609                         len = UTF8_SAFE_SKIP(s, send);
3610                     }
3611                 }
3612                 else while (s+1 < send && *s != ')') {
3613                     *d++ = *s++;
3614                 }
3615             }
3616             else if (!PL_lex_casemods
3617                      && (    s[2] == '{' /* This should match regcomp.c */
3618                          || (s[2] == '?' && s[3] == '{')))
3619             {
3620                 break;
3621             }
3622         }
3623             /* likewise skip #-initiated comments in //x patterns */
3624         else if (*s == '#'
3625                  && PL_lex_inpat
3626                  && !in_charclass
3627                  && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3628         {
3629             while (s < send && *s != '\n')
3630                 *d++ = *s++;
3631         }
3632             /* no further processing of single-quoted regex */
3633         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3634             goto default_action;
3635
3636             /* check for embedded arrays
3637              * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3638              */
3639         else if (*s == '@' && s[1]) {
3640             if (UTF
3641                ? isIDFIRST_utf8_safe(s+1, send)
3642                : isWORDCHAR_A(s[1]))
3643             {
3644                 break;
3645             }
3646             if (memCHRs(":'{$", s[1]))
3647                 break;
3648             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3649                 break; /* in regexp, neither @+ nor @- are interpolated */
3650         }
3651             /* check for embedded scalars.  only stop if we're sure it's a
3652              * variable.  */
3653         else if (*s == '$') {
3654             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3655                 break;
3656             if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
3657                 if (s[1] == '\\') {
3658                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3659                                    "Possible unintended interpolation of $\\ in regex");
3660                 }
3661                 break;          /* in regexp, $ might be tail anchor */
3662             }
3663         }
3664
3665         /* End of else if chain - OP_TRANS rejoin rest */
3666
3667         if (UNLIKELY(s >= send)) {
3668             assert(s == send);
3669             break;
3670         }
3671
3672         /* backslashes */
3673         if (*s == '\\' && s+1 < send) {
3674             char* bslash = s;   /* point to beginning \ */
3675             char* rbrace;       /* point to ending '}' */
3676             char* e;            /* 1 past the meat (non-blanks) before the
3677                                    brace */
3678             s++;
3679
3680             /* warn on \1 - \9 in substitution replacements, but note that \11
3681              * is an octal; and \19 is \1 followed by '9' */
3682             if (PL_lex_inwhat == OP_SUBST
3683                 && !PL_lex_inpat
3684                 && isDIGIT(*s)
3685                 && *s != '0'
3686                 && !isDIGIT(s[1]))
3687             {
3688                 /* diag_listed_as: \%d better written as $%d */
3689                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3690                 s = bslash;
3691                 *s = '$';
3692                 break;
3693             }
3694
3695             /* string-change backslash escapes */
3696             if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
3697                 s = bslash;
3698                 break;
3699             }
3700             /* In a pattern, process \N, but skip any other backslash escapes.
3701              * This is because we don't want to translate an escape sequence
3702              * into a meta symbol and have the regex compiler use the meta
3703              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3704              * in spite of this, we do have to process \N here while the proper
3705              * charnames handler is in scope.  See bugs #56444 and #62056.
3706              *
3707              * There is a complication because \N in a pattern may also stand
3708              * for 'match a non-nl', and not mean a charname, in which case its
3709              * processing should be deferred to the regex compiler.  To be a
3710              * charname it must be followed immediately by a '{', and not look
3711              * like \N followed by a curly quantifier, i.e., not something like
3712              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3713              * quantifier */
3714             else if (PL_lex_inpat
3715                     && (*s != 'N'
3716                         || s[1] != '{'
3717                         || regcurly(s + 1, send, NULL)))
3718             {
3719                 *d++ = '\\';
3720                 goto default_action;
3721             }
3722
3723             switch (*s) {
3724             default:
3725                 {
3726                     if ((isALPHANUMERIC(*s)))
3727                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3728                                        "Unrecognized escape \\%c passed through",
3729                                        *s);
3730                     /* default action is to copy the quoted character */
3731                     goto default_action;
3732                 }
3733
3734             /* eg. \132 indicates the octal constant 0132 */
3735             case '0': case '1': case '2': case '3':
3736             case '4': case '5': case '6': case '7':
3737                 {
3738                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT
3739                               | PERL_SCAN_NOTIFY_ILLDIGIT;
3740                     STRLEN len = 3;
3741                     uv = grok_oct(s, &len, &flags, NULL);
3742                     s += len;
3743                     if (  (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
3744                         && s < send
3745                         && isDIGIT(*s)  /* like \08, \178 */
3746                         && ckWARN(WARN_MISC))
3747                     {
3748                         Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
3749                             form_alien_digit_msg(8, len, s, send, UTF, FALSE));
3750                     }
3751                 }
3752                 goto NUM_ESCAPE_INSERT;
3753
3754             /* eg. \o{24} indicates the octal constant \024 */
3755             case 'o':
3756                 {
3757                     const char* error;
3758
3759                     if (! grok_bslash_o(&s, send,
3760                                                &uv, &error,
3761                                                NULL,
3762                                                FALSE, /* Not strict */
3763                                                FALSE, /* No illegal cp's */
3764                                                UTF))
3765                     {
3766                         yyerror(error);
3767                         uv = 0; /* drop through to ensure range ends are set */
3768                     }
3769                     goto NUM_ESCAPE_INSERT;
3770                 }
3771
3772             /* eg. \x24 indicates the hex constant 0x24 */
3773             case 'x':
3774                 {
3775                     const char* error;
3776
3777                     if (! grok_bslash_x(&s, send,
3778                                                &uv, &error,
3779                                                NULL,
3780                                                FALSE, /* Not strict */
3781                                                FALSE, /* No illegal cp's */
3782                                                UTF))
3783                     {
3784                         yyerror(error);
3785                         uv = 0; /* drop through to ensure range ends are set */
3786                     }
3787                 }
3788
3789               NUM_ESCAPE_INSERT:
3790                 /* Insert oct or hex escaped character. */
3791
3792                 /* Here uv is the ordinal of the next character being added */
3793                 if (UVCHR_IS_INVARIANT(uv)) {
3794                     *d++ = (char) uv;
3795                 }
3796                 else {
3797                     if (!d_is_utf8 && uv > 255) {
3798
3799                         /* Here, 'uv' won't fit unless we convert to UTF-8.
3800                          * If we've only seen invariants so far, all we have to
3801                          * do is turn on the flag */
3802                         if (utf8_variant_count == 0) {
3803                             SvUTF8_on(sv);
3804                         }
3805                         else {
3806                             SvCUR_set(sv, d - SvPVX_const(sv));
3807                             SvPOK_on(sv);
3808                             *d = '\0';
3809
3810                             sv_utf8_upgrade_flags_grow(
3811                                            sv,
3812                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3813
3814                                            /* Since we're having to grow here,
3815                                             * make sure we have enough room for
3816                                             * this escape and a NUL, so the
3817                                             * code immediately below won't have
3818                                             * to actually grow again */
3819                                           UVCHR_SKIP(uv)
3820                                         + (STRLEN)(send - s) + 1);
3821                             d = SvPVX(sv) + SvCUR(sv);
3822                         }
3823
3824                         has_above_latin1 = TRUE;
3825                         d_is_utf8 = TRUE;
3826                     }
3827
3828                     if (! d_is_utf8) {
3829                         *d++ = (char)uv;
3830                         utf8_variant_count++;
3831                     }
3832                     else {
3833                        /* Usually, there will already be enough room in 'sv'
3834                         * since such escapes are likely longer than any UTF-8
3835                         * sequence they can end up as.  This isn't the case on
3836                         * EBCDIC where \x{40000000} contains 12 bytes, and the
3837                         * UTF-8 for it contains 14.  And, we have to allow for
3838                         * a trailing NUL.  It probably can't happen on ASCII
3839                         * platforms, but be safe.  See Note on sizing above. */
3840                         const STRLEN needed = d - SvPVX(sv)
3841                                             + UVCHR_SKIP(uv)
3842                                             + (send - s)
3843                                             + 1;
3844                         if (UNLIKELY(needed > SvLEN(sv))) {
3845                             SvCUR_set(sv, d - SvPVX_const(sv));
3846                             d = SvCUR(sv) + SvGROW(sv, needed);
3847                         }
3848
3849                         d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
3850                                                    (ckWARN(WARN_PORTABLE))
3851                                                    ? UNICODE_WARN_PERL_EXTENDED
3852                                                    : 0);
3853                     }
3854                 }
3855 #ifdef EBCDIC
3856                 non_portable_endpoint++;
3857 #endif
3858                 continue;
3859
3860             case 'N':
3861                 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3862                  * named character, like \N{LATIN SMALL LETTER A}, or a named
3863                  * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3864                  * GRAVE} (except y/// can't handle the latter, croaking).  For
3865                  * convenience all three forms are referred to as "named
3866                  * characters" below.
3867                  *
3868                  * For patterns, \N also can mean to match a non-newline.  Code
3869                  * before this 'switch' statement should already have handled
3870                  * this situation, and hence this code only has to deal with
3871                  * the named character cases.
3872                  *
3873                  * For non-patterns, the named characters are converted to
3874                  * their string equivalents.  In patterns, named characters are
3875                  * not converted to their ultimate forms for the same reasons
3876                  * that other escapes aren't (mainly that the ultimate
3877                  * character could be considered a meta-symbol by the regex
3878                  * compiler).  Instead, they are converted to the \N{U+...}
3879                  * form to get the value from the charnames that is in effect
3880                  * right now, while preserving the fact that it was a named
3881                  * character, so that the regex compiler knows this.
3882                  *
3883                  * The structure of this section of code (besides checking for
3884                  * errors and upgrading to utf8) is:
3885                  *    If the named character is of the form \N{U+...}, pass it
3886                  *      through if a pattern; otherwise convert the code point
3887                  *      to utf8
3888                  *    Otherwise must be some \N{NAME}: convert to
3889                  *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3890                  *
3891                  * Transliteration is an exception.  The conversion to utf8 is
3892                  * only done if the code point requires it to be representable.
3893                  *
3894                  * Here, 's' points to the 'N'; the test below is guaranteed to
3895                  * succeed if we are being called on a pattern, as we already
3896                  * know from a test above that the next character is a '{'.  A
3897                  * non-pattern \N must mean 'named character', which requires
3898                  * braces */
3899                 s++;
3900                 if (*s != '{') {
3901                     yyerror("Missing braces on \\N{}");
3902                     *d++ = '\0';
3903                     continue;
3904                 }
3905                 s++;
3906
3907                 /* If there is no matching '}', it is an error. */
3908                 if (! (rbrace = (char *) memchr(s, '}', send - s))) {
3909                     if (! PL_lex_inpat) {
3910                         yyerror("Missing right brace on \\N{}");
3911                     } else {
3912                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3913                     }
3914                     yyquit(); /* Have exhausted the input. */
3915                 }
3916
3917                 /* Here it looks like a named character */
3918                 while (s < rbrace && isBLANK(*s)) {
3919                     s++;
3920                 }
3921
3922                 e = rbrace;
3923                 while (s < e && isBLANK(*(e - 1))) {
3924                     e--;
3925                 }
3926
3927                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3928                     s += 2;         /* Skip to next char after the 'U+' */
3929                     if (PL_lex_inpat) {
3930
3931                         /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3932                         /* Check the syntax.  */
3933                         if (!isXDIGIT(*s)) {
3934                           bad_NU:
3935                             yyerror(
3936                                 "Invalid hexadecimal number in \\N{U+...}"
3937                             );
3938                             s = rbrace + 1;
3939                             *d++ = '\0';
3940                             continue;
3941                         }
3942                         while (++s < e) {
3943                             if (isXDIGIT(*s))
3944                                 continue;
3945                             else if ((*s == '.' || *s == '_')
3946                                   && isXDIGIT(s[1]))
3947                                 continue;
3948                             goto bad_NU;
3949                         }
3950
3951                         /* Pass everything through unchanged.
3952                          * +1 is to include the '}' */
3953                         Copy(bslash, d, rbrace - bslash + 1, char);
3954                         d += rbrace - bslash + 1;
3955                     }
3956                     else {  /* Not a pattern: convert the hex to string */
3957                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3958                                   | PERL_SCAN_SILENT_ILLDIGIT
3959                                   | PERL_SCAN_SILENT_OVERFLOW
3960                                   | PERL_SCAN_DISALLOW_PREFIX;
3961                         STRLEN len = e - s;
3962
3963                         uv = grok_hex(s, &len, &flags, NULL);
3964                         if (len == 0 || (len != (STRLEN)(e - s)))
3965                             goto bad_NU;
3966
3967                         if (    uv > MAX_LEGAL_CP
3968                             || (flags & PERL_SCAN_GREATER_THAN_UV_MAX))
3969                         {
3970                             yyerror(form_cp_too_large_msg(16, s, len, 0));
3971                             uv = 0; /* drop through to ensure range ends are
3972                                        set */
3973                         }
3974
3975                          /* For non-tr///, if the destination is not in utf8,
3976                           * unconditionally recode it to be so.  This is
3977                           * because \N{} implies Unicode semantics, and scalars
3978                           * have to be in utf8 to guarantee those semantics.
3979                           * tr/// doesn't care about Unicode rules, so no need
3980                           * there to upgrade to UTF-8 for small enough code
3981                           * points */
3982                         if (! d_is_utf8 && (   uv > 0xFF
3983                                            || PL_lex_inwhat != OP_TRANS))
3984                         {
3985                             /* See Note on sizing above.  */
3986                             const STRLEN extra = OFFUNISKIP(uv) + (send - rbrace) + 1;
3987
3988                             SvCUR_set(sv, d - SvPVX_const(sv));
3989                             SvPOK_on(sv);
3990                             *d = '\0';
3991
3992                             if (utf8_variant_count == 0) {
3993                                 SvUTF8_on(sv);
3994                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3995                             }
3996                             else {
3997                                 sv_utf8_upgrade_flags_grow(
3998                                                sv,
3999                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4000                                                extra);
4001                                 d = SvPVX(sv) + SvCUR(sv);
4002                             }
4003
4004                             d_is_utf8 = TRUE;
4005                             has_above_latin1 = TRUE;
4006                         }
4007
4008                         /* Add the (Unicode) code point to the output. */
4009                         if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
4010                             *d++ = (char) LATIN1_TO_NATIVE(uv);
4011                         }
4012                         else {
4013                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
4014                                                    (ckWARN(WARN_PORTABLE))
4015                                                    ? UNICODE_WARN_PERL_EXTENDED
4016                                                    : 0);
4017                         }
4018                     }
4019                 }
4020                 else     /* Here is \N{NAME} but not \N{U+...}. */
4021                      if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
4022                 {   /* Failed.  We should die eventually, but for now use a NUL
4023                        to keep parsing */
4024                     *d++ = '\0';
4025                 }
4026                 else {  /* Successfully evaluated the name */
4027                     STRLEN len;
4028                     const char *str = SvPV_const(res, len);
4029                     if (PL_lex_inpat) {
4030
4031                         if (! len) { /* The name resolved to an empty string */
4032                             const char empty_N[] = "\\N{_}";
4033                             Copy(empty_N, d, sizeof(empty_N) - 1, char);
4034                             d += sizeof(empty_N) - 1;
4035                         }
4036                         else {
4037                             /* In order to not lose information for the regex
4038                             * compiler, pass the result in the specially made
4039                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
4040                             * the code points in hex of each character
4041                             * returned by charnames */
4042
4043                             const char *str_end = str + len;
4044                             const STRLEN off = d - SvPVX_const(sv);
4045
4046                             if (! SvUTF8(res)) {
4047                                 /* For the non-UTF-8 case, we can determine the
4048                                  * exact length needed without having to parse
4049                                  * through the string.  Each character takes up
4050                                  * 2 hex digits plus either a trailing dot or
4051                                  * the "}" */
4052                                 const char initial_text[] = "\\N{U+";
4053                                 const STRLEN initial_len = sizeof(initial_text)
4054                                                            - 1;
4055                                 d = off + SvGROW(sv, off
4056                                                     + 3 * len
4057
4058                                                     /* +1 for trailing NUL */
4059                                                     + initial_len + 1
4060
4061                                                     + (STRLEN)(send - rbrace));
4062                                 Copy(initial_text, d, initial_len, char);
4063                                 d += initial_len;
4064                                 while (str < str_end) {
4065                                     char hex_string[4];
4066                                     int len =
4067                                         my_snprintf(hex_string,
4068                                                   sizeof(hex_string),
4069                                                   "%02X.",
4070
4071                                                   /* The regex compiler is
4072                                                    * expecting Unicode, not
4073                                                    * native */
4074                                                   NATIVE_TO_LATIN1(*str));
4075                                     PERL_MY_SNPRINTF_POST_GUARD(len,
4076                                                            sizeof(hex_string));
4077                                     Copy(hex_string, d, 3, char);
4078                                     d += 3;
4079                                     str++;
4080                                 }
4081                                 d--;    /* Below, we will overwrite the final
4082                                            dot with a right brace */
4083                             }
4084                             else {
4085                                 STRLEN char_length; /* cur char's byte length */
4086
4087                                 /* and the number of bytes after this is
4088                                  * translated into hex digits */
4089                                 STRLEN output_length;
4090
4091                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
4092                                  * for max('U+', '.'); and 1 for NUL */
4093                                 char hex_string[2 * UTF8_MAXBYTES + 5];
4094
4095                                 /* Get the first character of the result. */
4096                                 U32 uv = utf8n_to_uvchr((U8 *) str,
4097                                                         len,
4098                                                         &char_length,
4099                                                         UTF8_ALLOW_ANYUV);
4100                                 /* Convert first code point to Unicode hex,
4101                                  * including the boiler plate before it. */
4102                                 output_length =
4103                                     my_snprintf(hex_string, sizeof(hex_string),
4104                                              "\\N{U+%X",
4105                                              (unsigned int) NATIVE_TO_UNI(uv));
4106
4107                                 /* Make sure there is enough space to hold it */
4108                                 d = off + SvGROW(sv, off
4109                                                     + output_length
4110                                                     + (STRLEN)(send - rbrace)
4111                                                     + 2);       /* '}' + NUL */
4112                                 /* And output it */
4113                                 Copy(hex_string, d, output_length, char);
4114                                 d += output_length;
4115
4116                                 /* For each subsequent character, append dot and
4117                                 * its Unicode code point in hex */
4118                                 while ((str += char_length) < str_end) {
4119                                     const STRLEN off = d - SvPVX_const(sv);
4120                                     U32 uv = utf8n_to_uvchr((U8 *) str,
4121                                                             str_end - str,
4122                                                             &char_length,
4123                                                             UTF8_ALLOW_ANYUV);
4124                                     output_length =
4125                                         my_snprintf(hex_string,
4126                                              sizeof(hex_string),
4127                                              ".%X",
4128                                              (unsigned int) NATIVE_TO_UNI(uv));
4129
4130                                     d = off + SvGROW(sv, off
4131                                                         + output_length
4132                                                         + (STRLEN)(send - rbrace)
4133                                                         + 2);   /* '}' +  NUL */
4134                                     Copy(hex_string, d, output_length, char);
4135                                     d += output_length;
4136                                 }
4137                             }
4138
4139                             *d++ = '}'; /* Done.  Add the trailing brace */
4140                         }
4141                     }
4142                     else { /* Here, not in a pattern.  Convert the name to a
4143                             * string. */
4144
4145                         if (PL_lex_inwhat == OP_TRANS) {
4146                             str = SvPV_const(res, len);
4147                             if (len > ((SvUTF8(res))
4148                                        ? UTF8SKIP(str)
4149                                        : 1U))
4150                             {
4151                                 yyerror(Perl_form(aTHX_
4152                                     "%.*s must not be a named sequence"
4153                                     " in transliteration operator",
4154                                         /*  +1 to include the "}" */
4155                                     (int) (rbrace + 1 - start), start));
4156                                 *d++ = '\0';
4157                                 goto end_backslash_N;
4158                             }
4159
4160                             if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
4161                                 has_above_latin1 = TRUE;
4162                             }
4163
4164                         }
4165                         else if (! SvUTF8(res)) {
4166                             /* Make sure \N{} return is UTF-8.  This is because
4167                              * \N{} implies Unicode semantics, and scalars have
4168                              * to be in utf8 to guarantee those semantics; but
4169                              * not needed in tr/// */
4170                             sv_utf8_upgrade_flags(res, 0);
4171                             str = SvPV_const(res, len);
4172                         }
4173
4174                          /* Upgrade destination to be utf8 if this new
4175                           * component is */
4176                         if (! d_is_utf8 && SvUTF8(res)) {
4177                             /* See Note on sizing above.  */
4178                             const STRLEN extra = len + (send - s) + 1;
4179
4180                             SvCUR_set(sv, d - SvPVX_const(sv));
4181                             SvPOK_on(sv);
4182                             *d = '\0';
4183
4184                             if (utf8_variant_count == 0) {
4185                                 SvUTF8_on(sv);
4186                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4187                             }
4188                             else {
4189                                 sv_utf8_upgrade_flags_grow(sv,
4190                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4191                                                 extra);
4192                                 d = SvPVX(sv) + SvCUR(sv);
4193                             }
4194                             d_is_utf8 = TRUE;
4195                         } else if (len > (STRLEN)(e - s + 4)) { /* +4 is for \N{} */
4196
4197                             /* See Note on sizing above.  (NOTE: SvCUR() is not
4198                              * set correctly here). */
4199                             const STRLEN extra = len + (send - rbrace) + 1;
4200                             const STRLEN off = d - SvPVX_const(sv);
4201                             d = off + SvGROW(sv, off + extra);
4202                         }
4203                         Copy(str, d, len, char);
4204                         d += len;
4205                     }
4206
4207                     SvREFCNT_dec(res);
4208
4209                 } /* End \N{NAME} */
4210
4211               end_backslash_N:
4212 #ifdef EBCDIC
4213                 backslash_N++; /* \N{} is defined to be Unicode */
4214 #endif
4215                 s = rbrace + 1;  /* Point to just after the '}' */
4216                 continue;
4217
4218             /* \c is a control character */
4219             case 'c':
4220                 s++;
4221                 if (s < send) {
4222                     const char * message;
4223
4224                     if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
4225                         yyerror(message);
4226                         yyquit();   /* Have always immediately croaked on
4227                                        errors in this */
4228                     }
4229                     d++;
4230                 }
4231                 else {
4232                     yyerror("Missing control char name in \\c");
4233                     yyquit();   /* Are at end of input, no sense continuing */
4234                 }
4235 #ifdef EBCDIC
4236                 non_portable_endpoint++;
4237 #endif
4238                 break;
4239
4240             /* printf-style backslashes, formfeeds, newlines, etc */
4241             case 'b':
4242                 *d++ = '\b';
4243                 break;
4244             case 'n':
4245                 *d++ = '\n';
4246                 break;
4247             case 'r':
4248                 *d++ = '\r';
4249                 break;
4250             case 'f':
4251                 *d++ = '\f';
4252                 break;
4253             case 't':
4254                 *d++ = '\t';
4255                 break;
4256             case 'e':
4257                 *d++ = ESC_NATIVE;
4258                 break;
4259             case 'a':
4260                 *d++ = '\a';
4261                 break;
4262             } /* end switch */
4263
4264             s++;
4265             continue;
4266         } /* end if (backslash) */
4267
4268     default_action:
4269         /* Just copy the input to the output, though we may have to convert
4270          * to/from UTF-8.
4271          *
4272          * If the input has the same representation in UTF-8 as not, it will be
4273          * a single byte, and we don't care about UTF8ness; just copy the byte */
4274         if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4275             *d++ = *s++;
4276         }
4277         else if (! s_is_utf8 && ! d_is_utf8) {
4278             /* If neither source nor output is UTF-8, is also a single byte,
4279              * just copy it; but this byte counts should we later have to
4280              * convert to UTF-8 */
4281             *d++ = *s++;
4282             utf8_variant_count++;
4283         }
4284         else if (s_is_utf8 && d_is_utf8) {   /* Both UTF-8, can just copy */
4285             const STRLEN len = UTF8SKIP(s);
4286
4287             /* We expect the source to have already been checked for
4288              * malformedness */
4289             assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4290
4291             Copy(s, d, len, U8);
4292             d += len;
4293             s += len;
4294         }
4295         else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4296             STRLEN need = send - s + 1; /* See Note on sizing above. */
4297
4298             SvCUR_set(sv, d - SvPVX_const(sv));
4299             SvPOK_on(sv);
4300             *d = '\0';
4301
4302             if (utf8_variant_count == 0) {
4303                 SvUTF8_on(sv);
4304                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4305             }
4306             else {
4307                 sv_utf8_upgrade_flags_grow(sv,
4308                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4309                                            need);
4310                 d = SvPVX(sv) + SvCUR(sv);
4311             }
4312             d_is_utf8 = TRUE;
4313             goto default_action; /* Redo, having upgraded so both are UTF-8 */
4314         }
4315         else {  /* UTF8ness matters: convert this non-UTF8 source char to
4316                    UTF-8 for output.  It will occupy 2 bytes, but don't include
4317                    the input byte since we haven't incremented 's' yet. See
4318                    Note on sizing above. */
4319             const STRLEN off = d - SvPVX(sv);
4320             const STRLEN extra = 2 + (send - s - 1) + 1;
4321             if (off + extra > SvLEN(sv)) {
4322                 d = off + SvGROW(sv, off + extra);
4323             }
4324             *d++ = UTF8_EIGHT_BIT_HI(*s);
4325             *d++ = UTF8_EIGHT_BIT_LO(*s);
4326             s++;
4327         }
4328     } /* while loop to process each character */
4329
4330     {
4331         const STRLEN off = d - SvPVX(sv);
4332
4333         /* See if room for the terminating NUL */
4334         if (UNLIKELY(off >= SvLEN(sv))) {
4335
4336 #ifndef DEBUGGING
4337
4338             if (off > SvLEN(sv))
4339 #endif
4340                 Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
4341                         " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
4342
4343             /* Whew!  Here we don't have room for the terminating NUL, but
4344              * everything else so far has fit.  It's not too late to grow
4345              * to fit the NUL and continue on.  But it is a bug, as the code
4346              * above was supposed to have made room for this, so under
4347              * DEBUGGING builds, we panic anyway.  */
4348             d = off + SvGROW(sv, off + 1);
4349         }
4350     }
4351
4352     /* terminate the string and set up the sv */
4353     *d = '\0';
4354     SvCUR_set(sv, d - SvPVX_const(sv));
4355
4356     SvPOK_on(sv);
4357     if (d_is_utf8) {
4358         SvUTF8_on(sv);
4359     }
4360
4361     /* shrink the sv if we allocated more than we used */
4362     if (SvCUR(sv) + 5 < SvLEN(sv)) {
4363         SvPV_shrink_to_cur(sv);
4364     }
4365
4366     /* return the substring (via pl_yylval) only if we parsed anything */
4367     if (s > start) {
4368         char *s2 = start;
4369         for (; s2 < s; s2++) {
4370             if (*s2 == '\n')
4371                 COPLINE_INC_WITH_HERELINES;
4372         }
4373         SvREFCNT_inc_simple_void_NN(sv);
4374         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4375             && ! PL_parser->lex_re_reparsing)
4376         {
4377             const char *const key = PL_lex_inpat ? "qr" : "q";
4378             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4379             const char *type;
4380             STRLEN typelen;
4381
4382             if (PL_lex_inwhat == OP_TRANS) {
4383                 type = "tr";
4384                 typelen = 2;
4385             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4386                 type = "s";
4387                 typelen = 1;
4388             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4389                 type = "q";
4390                 typelen = 1;
4391             } else {
4392                 type = "qq";
4393                 typelen = 2;
4394             }
4395
4396             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4397                                 type, typelen, NULL);
4398         }
4399         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4400     }
4401     LEAVE_with_name("scan_const");
4402     return s;
4403 }
4404
4405 /* S_intuit_more
4406  * Returns TRUE if there's more to the expression (e.g., a subscript),
4407  * FALSE otherwise.
4408  *
4409  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4410  *
4411  * ->[ and ->{ return TRUE
4412  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4413  * { and [ outside a pattern are always subscripts, so return TRUE
4414  * if we're outside a pattern and it's not { or [, then return FALSE
4415  * if we're in a pattern and the first char is a {
4416  *   {4,5} (any digits around the comma) returns FALSE
4417  * if we're in a pattern and the first char is a [
4418  *   [] returns FALSE
4419  *   [SOMETHING] has a funky algorithm to decide whether it's a
4420  *      character class or not.  It has to deal with things like
4421  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4422  * anything else returns TRUE
4423  */
4424
4425 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4426
4427 STATIC int
4428 S_intuit_more(pTHX_ char *s, char *e)
4429 {
4430     PERL_ARGS_ASSERT_INTUIT_MORE;
4431
4432     if (PL_lex_brackets)
4433         return TRUE;
4434     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4435         return TRUE;
4436     if (*s == '-' && s[1] == '>'
4437      && FEATURE_POSTDEREF_QQ_IS_ENABLED
4438      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4439         ||(s[2] == '@' && memCHRs("*[{",s[3])) ))
4440         return TRUE;
4441     if (*s != '{' && *s != '[')
4442         return FALSE;
4443     PL_parser->sub_no_recover = TRUE;
4444     if (!PL_lex_inpat)
4445         return TRUE;
4446
4447     /* In a pattern, so maybe we have {n,m}. */
4448     if (*s == '{') {
4449         if (regcurly(s, e, NULL)) {
4450             return FALSE;
4451         }
4452         return TRUE;
4453     }
4454
4455     /* On the other hand, maybe we have a character class */
4456
4457     s++;
4458     if (*s == ']' || *s == '^')
4459         return FALSE;
4460     else {
4461         /* this is terrifying, and it works */
4462         int weight;
4463         char seen[256];
4464         const char * const send = (char *) memchr(s, ']', e - s);
4465         unsigned char un_char, last_un_char;
4466         char tmpbuf[sizeof PL_tokenbuf * 4];
4467
4468         if (!send)              /* has to be an expression */
4469             return TRUE;
4470         weight = 2;             /* let's weigh the evidence */
4471
4472         if (*s == '$')
4473             weight -= 3;
4474         else if (isDIGIT(*s)) {
4475             if (s[1] != ']') {
4476                 if (isDIGIT(s[1]) && s[2] == ']')
4477                     weight -= 10;
4478             }
4479             else
4480                 weight -= 100;
4481         }
4482         Zero(seen,256,char);
4483         un_char = 255;
4484         for (; s < send; s++) {
4485             last_un_char = un_char;
4486             un_char = (unsigned char)*s;
4487             switch (*s) {
4488             case '@':
4489             case '&':
4490             case '$':
4491                 weight -= seen[un_char] * 10;
4492                 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4493                     int len;
4494                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4495                     len = (int)strlen(tmpbuf);
4496                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4497                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
4498                         weight -= 100;
4499                     else
4500                         weight -= 10;
4501                 }
4502                 else if (*s == '$'
4503                          && s[1]
4504                          && memCHRs("[#!%*<>()-=",s[1]))
4505                 {
4506                     if (/*{*/ memCHRs("])} =",s[2]))
4507                         weight -= 10;
4508                     else
4509                         weight -= 1;
4510                 }
4511                 break;
4512             case '\\':
4513                 un_char = 254;
4514                 if (s[1]) {
4515                     if (memCHRs("wds]",s[1]))
4516                         weight += 100;
4517                     else if (seen[(U8)'\''] || seen[(U8)'"'])
4518                         weight += 1;
4519                     else if (memCHRs("rnftbxcav",s[1]))
4520                         weight += 40;
4521                     else if (isDIGIT(s[1])) {
4522                         weight += 40;
4523                         while (s[1] && isDIGIT(s[1]))
4524                             s++;
4525                     }
4526                 }
4527                 else
4528                     weight += 100;
4529                 break;
4530             case '-':
4531                 if (s[1] == '\\')
4532                     weight += 50;
4533                 if (memCHRs("aA01! ",last_un_char))
4534                     weight += 30;
4535                 if (memCHRs("zZ79~",s[1]))
4536                     weight += 30;
4537                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4538                     weight -= 5;        /* cope with negative subscript */
4539                 break;
4540             default:
4541                 if (!isWORDCHAR(last_un_char)
4542                     && !(last_un_char == '$' || last_un_char == '@'
4543                          || last_un_char == '&')
4544                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4545                     char *d = s;
4546                     while (isALPHA(*s))
4547                         s++;
4548                     if (keyword(d, s - d, 0))
4549                         weight -= 150;
4550                 }
4551                 if (un_char == last_un_char + 1)
4552                     weight += 5;
4553                 weight -= seen[un_char];
4554                 break;
4555             }
4556             seen[un_char]++;
4557         }
4558         if (weight >= 0)        /* probably a character class */
4559             return FALSE;
4560     }
4561
4562     return TRUE;
4563 }
4564
4565 /*
4566  * S_intuit_method
4567  *
4568  * Does all the checking to disambiguate
4569  *   foo bar
4570  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4571  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4572  *
4573  * First argument is the stuff after the first token, e.g. "bar".
4574  *
4575  * Not a method if foo is a filehandle.
4576  * Not a method if foo is a subroutine prototyped to take a filehandle.
4577  * Not a method if it's really "Foo $bar"
4578  * Method if it's "foo $bar"
4579  * Not a method if it's really "print foo $bar"
4580  * Method if it's really "foo package::" (interpreted as package->foo)
4581  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4582  * Not a method if bar is a filehandle or package, but is quoted with
4583  *   =>
4584  */
4585
4586 STATIC int
4587 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4588 {
4589     char *s = start + (*start == '$');
4590     char tmpbuf[sizeof PL_tokenbuf];
4591     STRLEN len;
4592     GV* indirgv;
4593         /* Mustn't actually add anything to a symbol table.
4594            But also don't want to "initialise" any placeholder
4595            constants that might already be there into full
4596            blown PVGVs with attached PVCV.  */
4597     GV * const gv =
4598         ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4599
4600     PERL_ARGS_ASSERT_INTUIT_METHOD;
4601
4602     if (!FEATURE_INDIRECT_IS_ENABLED)
4603         return 0;
4604
4605     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4606             return 0;
4607     if (cv && SvPOK(cv)) {
4608         const char *proto = CvPROTO(cv);
4609         if (proto) {
4610             while (*proto && (isSPACE(*proto) || *proto == ';'))
4611                 proto++;
4612             if (*proto == '*')
4613                 return 0;
4614         }
4615     }
4616
4617     if (*start == '$') {
4618         SSize_t start_off = start - SvPVX(PL_linestr);
4619         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4620             || isUPPER(*PL_tokenbuf))
4621             return 0;
4622         /* this could be $# */
4623         if (isSPACE(*s))
4624             s = skipspace(s);
4625         PL_bufptr = SvPVX(PL_linestr) + start_off;
4626         PL_expect = XREF;
4627         return *s == '(' ? FUNCMETH : METHOD;
4628     }
4629
4630     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4631     /* start is the beginning of the possible filehandle/object,
4632      * and s is the end of it
4633      * tmpbuf is a copy of it (but with single quotes as double colons)
4634      */
4635
4636     if (!keyword(tmpbuf, len, 0)) {
4637         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4638             len -= 2;
4639             tmpbuf[len] = '\0';
4640             goto bare_package;
4641         }
4642         indirgv = gv_fetchpvn_flags(tmpbuf, len,
4643                                     GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4644                                     SVt_PVCV);
4645         if (indirgv && SvTYPE(indirgv) != SVt_NULL
4646          && (!isGV(indirgv) || GvCVu(indirgv)))
4647             return 0;
4648         /* filehandle or package name makes it a method */
4649         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4650             s = skipspace(s);
4651             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4652                 return 0;       /* no assumptions -- "=>" quotes bareword */
4653       bare_package:
4654             NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4655                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4656             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4657             PL_expect = XTERM;
4658             force_next(BAREWORD);
4659             PL_bufptr = s;
4660             return *s == '(' ? FUNCMETH : METHOD;
4661         }
4662     }
4663     return 0;
4664 }
4665
4666 /* Encoded script support. filter_add() effectively inserts a
4667  * 'pre-processing' function into the current source input stream.
4668  * Note that the filter function only applies to the current source file
4669  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4670  *
4671  * The datasv parameter (which may be NULL) can be used to pass
4672  * private data to this instance of the filter. The filter function
4673  * can recover the SV using the FILTER_DATA macro and use it to
4674  * store private buffers and state information.
4675  *
4676  * The supplied datasv parameter is upgraded to a PVIO type
4677  * and the IoDIRP/IoANY field is used to store the function pointer,
4678  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4679  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4680  * private use must be set using malloc'd pointers.
4681  */
4682
4683 SV *
4684 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4685 {
4686     if (!funcp)
4687         return NULL;
4688
4689     if (!PL_parser)
4690         return NULL;
4691
4692     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4693         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4694
4695     if (!PL_rsfp_filters)
4696         PL_rsfp_filters = newAV();
4697     if (!datasv)
4698         datasv = newSV(0);
4699     SvUPGRADE(datasv, SVt_PVIO);
4700     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4701     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4702     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4703                           FPTR2DPTR(void *, IoANY(datasv)),
4704                           SvPV_nolen(datasv)));
4705     av_unshift(PL_rsfp_filters, 1);
4706     av_store(PL_rsfp_filters, 0, datasv) ;
4707     if (
4708         !PL_parser->filtered
4709      && PL_parser->lex_flags & LEX_EVALBYTES
4710      && PL_bufptr < PL_bufend
4711     ) {
4712         const char *s = PL_bufptr;
4713         while (s < PL_bufend) {
4714             if (*s == '\n') {
4715                 SV *linestr = PL_parser->linestr;
4716                 char *buf = SvPVX(linestr);
4717                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4718                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4719                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4720                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4721                 STRLEN const last_uni_pos =
4722                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4723                 STRLEN const last_lop_pos =
4724                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4725                 av_push(PL_rsfp_filters, linestr);
4726                 PL_parser->linestr =
4727                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4728                 buf = SvPVX(PL_parser->linestr);
4729                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4730                 PL_parser->bufptr = buf + bufptr_pos;
4731                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4732                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4733                 PL_parser->linestart = buf + linestart_pos;
4734                 if (PL_parser->last_uni)
4735                     PL_parser->last_uni = buf + last_uni_pos;
4736                 if (PL_parser->last_lop)
4737                     PL_parser->last_lop = buf + last_lop_pos;
4738                 SvLEN_set(linestr, SvCUR(linestr));
4739                 SvCUR_set(linestr, s - SvPVX(linestr));
4740                 PL_parser->filtered = 1;
4741                 break;
4742             }
4743             s++;
4744         }
4745     }
4746     return(datasv);
4747 }
4748
4749
4750 /* Delete most recently added instance of this filter function. */
4751 void
4752 Perl_filter_del(pTHX_ filter_t funcp)
4753 {
4754     SV *datasv;
4755
4756     PERL_ARGS_ASSERT_FILTER_DEL;
4757
4758 #ifdef DEBUGGING
4759     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4760                           FPTR2DPTR(void*, funcp)));
4761 #endif
4762     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4763         return;
4764     /* if filter is on top of stack (usual case) just pop it off */
4765     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4766     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4767         sv_free(av_pop(PL_rsfp_filters));
4768
4769         return;
4770     }
4771     /* we need to search for the correct entry and clear it     */
4772     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4773 }
4774
4775
4776 /* Invoke the idxth filter function for the current rsfp.        */
4777 /* maxlen 0 = read one text line */
4778 I32
4779 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4780 {
4781     filter_t funcp;
4782     I32 ret;
4783     SV *datasv = NULL;
4784     /* This API is bad. It should have been using unsigned int for maxlen.
4785        Not sure if we want to change the API, but if not we should sanity
4786        check the value here.  */
4787     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4788
4789     PERL_ARGS_ASSERT_FILTER_READ;
4790
4791     if (!PL_parser || !PL_rsfp_filters)
4792         return -1;
4793     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4794         /* Provide a default input filter to make life easy.    */
4795         /* Note that we append to the line. This is handy.      */
4796         DEBUG_P(PerlIO_printf(Perl_debug_log,
4797                               "filter_read %d: from rsfp\n", idx));
4798         if (correct_length) {
4799             /* Want a block */
4800             int len ;
4801             const int old_len = SvCUR(buf_sv);
4802
4803             /* ensure buf_sv is large enough */
4804             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4805             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4806                                    correct_length)) <= 0) {
4807                 if (PerlIO_error(PL_rsfp))
4808                     return -1;          /* error */
4809                 else
4810                     return 0 ;          /* end of file */
4811             }
4812             SvCUR_set(buf_sv, old_len + len) ;
4813             SvPVX(buf_sv)[old_len + len] = '\0';
4814         } else {
4815             /* Want a line */
4816             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4817                 if (PerlIO_error(PL_rsfp))
4818                     return -1;          /* error */
4819                 else
4820                     return 0 ;          /* end of file */
4821             }
4822         }
4823         return SvCUR(buf_sv);
4824     }
4825     /* Skip this filter slot if filter has been deleted */
4826     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4827         DEBUG_P(PerlIO_printf(Perl_debug_log,
4828                               "filter_read %d: skipped (filter deleted)\n",
4829                               idx));
4830         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4831     }
4832     if (SvTYPE(datasv) != SVt_PVIO) {
4833         if (correct_length) {
4834             /* Want a block */
4835             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4836             if (!remainder) return 0; /* eof */
4837             if (correct_length > remainder) correct_length = remainder;
4838             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4839             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4840         } else {
4841             /* Want a line */
4842             const char *s = SvEND(datasv);
4843             const char *send = SvPVX(datasv) + SvLEN(datasv);
4844             while (s < send) {
4845                 if (*s == '\n') {
4846                     s++;
4847                     break;
4848                 }
4849                 s++;
4850             }
4851             if (s == send) return 0; /* eof */
4852             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4853             SvCUR_set(datasv, s-SvPVX(datasv));
4854         }
4855         return SvCUR(buf_sv);
4856     }
4857     /* Get function pointer hidden within datasv        */
4858     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4859     DEBUG_P(PerlIO_printf(Perl_debug_log,
4860                           "filter_read %d: via function %p (%s)\n",
4861                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4862     /* Call function. The function is expected to       */
4863     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4864     /* Return: <0:error, =0:eof, >0:not eof             */
4865     ENTER;
4866     save_scalar(PL_errgv);
4867     ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4868     LEAVE;
4869     return ret;
4870 }
4871
4872 STATIC char *
4873 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4874 {
4875     PERL_ARGS_ASSERT_FILTER_GETS;
4876
4877 #ifdef PERL_CR_FILTER
4878     if (!PL_rsfp_filters) {
4879         filter_add(S_cr_textfilter,NULL);
4880     }
4881 #endif
4882     if (PL_rsfp_filters) {
4883         if (!append)
4884             SvCUR_set(sv, 0);   /* start with empty line        */
4885         if (FILTER_READ(0, sv, 0) > 0)
4886             return ( SvPVX(sv) ) ;
4887         else
4888             return NULL ;
4889     }
4890     else
4891         return (sv_gets(sv, PL_rsfp, append));
4892 }
4893
4894 STATIC HV *
4895 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4896 {
4897     GV *gv;
4898
4899     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4900
4901     if (memEQs(pkgname, len, "__PACKAGE__"))
4902         return PL_curstash;
4903
4904     if (len > 2
4905         && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4906         && (gv = gv_fetchpvn_flags(pkgname,
4907                                    len,
4908                                    ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4909     {
4910         return GvHV(gv);                        /* Foo:: */
4911     }
4912
4913     /* use constant CLASS => 'MyClass' */
4914     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4915     if (gv && GvCV(gv)) {
4916         SV * const sv = cv_const_sv(GvCV(gv));
4917         if (sv)
4918             return gv_stashsv(sv, 0);
4919     }
4920
4921     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4922 }
4923
4924
4925 STATIC char *
4926 S_tokenize_use(pTHX_ int is_use, char *s) {
4927     PERL_ARGS_ASSERT_TOKENIZE_USE;
4928
4929     if (PL_expect != XSTATE)
4930         /* diag_listed_as: "use" not allowed in expression */
4931         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4932                     is_use ? "use" : "no"));
4933     PL_expect = XTERM;
4934     s = skipspace(s);
4935     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4936         s = force_version(s, TRUE);
4937         if (*s == ';' || *s == '}'
4938                 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4939             NEXTVAL_NEXTTOKE.opval = NULL;
4940             force_next(BAREWORD);
4941         }
4942         else if (*s == 'v') {
4943             s = force_word(s,BAREWORD,FALSE,TRUE);
4944             s = force_version(s, FALSE);
4945         }
4946     }
4947     else {
4948         s = force_word(s,BAREWORD,FALSE,TRUE);
4949         s = force_version(s, FALSE);
4950     }
4951     pl_yylval.ival = is_use;
4952     return s;
4953 }
4954 #ifdef DEBUGGING
4955     static const char* const exp_name[] =
4956         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4957           "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4958           "SIGVAR", "TERMORDORDOR"
4959         };
4960 #endif
4961
4962 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4963 STATIC bool
4964 S_word_takes_any_delimiter(char *p, STRLEN len)
4965 {
4966     return (len == 1 && memCHRs("msyq", p[0]))
4967             || (len == 2
4968                 && ((p[0] == 't' && p[1] == 'r')
4969                     || (p[0] == 'q' && memCHRs("qwxr", p[1]))));
4970 }
4971
4972 static void
4973 S_check_scalar_slice(pTHX_ char *s)
4974 {
4975     s++;
4976     while (SPACE_OR_TAB(*s)) s++;
4977     if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4978                                                              PL_bufend,
4979                                                              UTF))
4980     {
4981         return;
4982     }
4983     while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4984            || (*s && memCHRs(" \t$#+-'\"", *s)))
4985     {
4986         s += UTF ? UTF8SKIP(s) : 1;
4987     }
4988     if (*s == '}' || *s == ']')
4989         pl_yylval.ival = OPpSLICEWARNING;
4990 }
4991
4992 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4993 static void
4994 S_lex_token_boundary(pTHX)
4995 {
4996     PL_oldoldbufptr = PL_oldbufptr;
4997     PL_oldbufptr = PL_bufptr;
4998 }
4999
5000 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
5001 static char *
5002 S_vcs_conflict_marker(pTHX_ char *s)
5003 {
5004     lex_token_boundary();
5005     PL_bufptr = s;
5006     yyerror("Version control conflict marker");
5007     while (s < PL_bufend && *s != '\n')
5008         s++;
5009     return s;
5010 }
5011
5012 static int
5013 yyl_sigvar(pTHX_ char *s)
5014 {
5015     /* we expect the sigil and optional var name part of a
5016      * signature element here. Since a '$' is not necessarily
5017      * followed by a var name, handle it specially here; the general
5018      * yylex code would otherwise try to interpret whatever follows
5019      * as a var; e.g. ($, ...) would be seen as the var '$,'
5020      */
5021
5022     U8 sigil;
5023
5024     s = skipspace(s);
5025     sigil = *s++;
5026     PL_bufptr = s; /* for error reporting */
5027     switch (sigil) {
5028     case '$':
5029     case '@':
5030     case '%':
5031         /* spot stuff that looks like an prototype */
5032         if (memCHRs("$:@%&*;\\[]", *s)) {
5033             yyerror("Illegal character following sigil in a subroutine signature");
5034             break;
5035         }
5036         /* '$#' is banned, while '$ # comment' isn't */
5037         if (*s == '#') {
5038             yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
5039             break;
5040         }
5041         s = skipspace(s);
5042         if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5043             char *dest = PL_tokenbuf + 1;
5044             /* read var name, including sigil, into PL_tokenbuf */
5045             PL_tokenbuf[0] = sigil;
5046             parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
5047                 0, cBOOL(UTF), FALSE, FALSE);
5048             *dest = '\0';
5049             assert(PL_tokenbuf[1]); /* we have a variable name */
5050         }
5051         else {
5052             *PL_tokenbuf = 0;
5053             PL_in_my = 0;
5054         }
5055
5056         s = skipspace(s);
5057         /* parse the = for the default ourselves to avoid '+=' etc being accepted here
5058          * as the ASSIGNOP, and exclude other tokens that start with =
5059          */
5060         if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) {
5061             /* save now to report with the same context as we did when
5062              * all ASSIGNOPS were accepted */
5063             PL_oldbufptr = s;
5064
5065             ++s;
5066             NEXTVAL_NEXTTOKE.ival = 0;
5067             force_next(ASSIGNOP);
5068             PL_expect = XTERM;
5069         }
5070         else if (*s == ',' || *s == ')') {
5071             PL_expect = XOPERATOR;
5072         }
5073         else {
5074             /* make sure the context shows the unexpected character and
5075              * hopefully a bit more */
5076             if (*s) ++s;
5077             while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5078                 s++;
5079             PL_bufptr = s; /* for error reporting */
5080             yyerror("Illegal operator following parameter in a subroutine signature");
5081             PL_in_my = 0;
5082         }
5083         if (*PL_tokenbuf) {
5084             NEXTVAL_NEXTTOKE.ival = sigil;
5085             force_next('p'); /* force a signature pending identifier */
5086         }
5087         break;
5088
5089     case ')':
5090         PL_expect = XBLOCK;
5091         break;
5092     case ',': /* handle ($a,,$b) */
5093         break;
5094
5095     default:
5096         PL_in_my = 0;
5097         yyerror("A signature parameter must start with '$', '@' or '%'");
5098         /* very crude error recovery: skip to likely next signature
5099          * element */
5100         while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5101             s++;
5102         break;
5103     }
5104
5105     switch (sigil) {
5106         case ',': TOKEN (PERLY_COMMA);
5107         case '$': TOKEN (PERLY_DOLLAR);
5108         case '@': TOKEN (PERLY_SNAIL);
5109         case '%': TOKEN (PERLY_PERCENT_SIGN);
5110         case ')': TOKEN (PERLY_PAREN_CLOSE);
5111         default:  TOKEN (sigil);
5112     }
5113 }
5114
5115 static int
5116 yyl_dollar(pTHX_ char *s)
5117 {
5118     CLINE;
5119
5120     if (PL_expect == XPOSTDEREF) {
5121         if (s[1] == '#') {
5122             s++;
5123             POSTDEREF(DOLSHARP);
5124         }
5125         POSTDEREF(PERLY_DOLLAR);
5126     }
5127
5128     if (   s[1] == '#'
5129         && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
5130             || memCHRs("{$:+-@", s[2])))
5131     {
5132         PL_tokenbuf[0] = '@';
5133         s = scan_ident(s + 1, PL_tokenbuf + 1,
5134                        sizeof PL_tokenbuf - 1, FALSE);
5135         if (PL_expect == XOPERATOR) {
5136             char *d = s;
5137             if (PL_bufptr > s) {
5138                 d = PL_bufptr-1;
5139                 PL_bufptr = PL_oldbufptr;
5140             }
5141             no_op("Array length", d);
5142         }
5143         if (!PL_tokenbuf[1])
5144             PREREF(DOLSHARP);
5145         PL_expect = XOPERATOR;
5146         force_ident_maybe_lex('#');
5147         TOKEN(DOLSHARP);
5148     }
5149
5150     PL_tokenbuf[0] = '$';
5151     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5152     if (PL_expect == XOPERATOR) {
5153         char *d = s;
5154         if (PL_bufptr > s) {
5155             d = PL_bufptr-1;
5156             PL_bufptr = PL_oldbufptr;
5157         }
5158         no_op("Scalar", d);
5159     }
5160     if (!PL_tokenbuf[1]) {
5161         if (s == PL_bufend)
5162             yyerror("Final $ should be \\$ or $name");
5163         PREREF(PERLY_DOLLAR);
5164     }
5165
5166     {
5167         const char tmp = *s;
5168         if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5169             s = skipspace(s);
5170
5171         if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5172             && intuit_more(s, PL_bufend)) {
5173             if (*s == '[') {
5174                 PL_tokenbuf[0] = '@';
5175                 if (ckWARN(WARN_SYNTAX)) {
5176                     char *t = s+1;
5177
5178                     while ( t < PL_bufend ) {
5179                         if (isSPACE(*t)) {
5180                             do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
5181                             /* consumed one or more space chars */
5182                         } else if (*t == '$' || *t == '@') {
5183                             /* could be more than one '$' like $$ref or @$ref */
5184                             do { t++; } while (t < PL_bufend && *t == '$');
5185
5186                             /* could be an abigail style identifier like $ foo */
5187                             while (t < PL_bufend && *t == ' ') t++;
5188
5189                             /* strip off the name of the var */
5190                             while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5191                                 t += UTF ? UTF8SKIP(t) : 1;
5192                             /* consumed a varname */
5193                         } else if (isDIGIT(*t)) {
5194                             /* deal with hex constants like 0x11 */
5195                             if (t[0] == '0' && t[1] == 'x') {
5196                                 t += 2;
5197                                 while (t < PL_bufend && isXDIGIT(*t)) t++;
5198                             } else {
5199                                 /* deal with decimal/octal constants like 1 and 0123 */
5200                                 do { t++; } while (isDIGIT(*t));
5201                                 if (t<PL_bufend && *t == '.') {
5202                                     do { t++; } while (isDIGIT(*t));
5203                                 }
5204                             }
5205                             /* consumed a number */
5206                         } else {
5207                             /* not a var nor a space nor a number */
5208                             break;
5209                         }
5210                     }
5211                     if (t < PL_bufend && *t++ == ',') {
5212                         PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
5213                         while (t < PL_bufend && *t != ']')
5214                             t++;
5215                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5216                                     "Multidimensional syntax %" UTF8f " not supported",
5217                                     UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
5218                     }
5219                 }
5220             }
5221             else if (*s == '{') {
5222                 char *t;
5223                 PL_tokenbuf[0] = '%';
5224                 if (    strEQ(PL_tokenbuf+1, "SIG")
5225                     && ckWARN(WARN_SYNTAX)
5226                     && (t = (char *) memchr(s, '}', PL_bufend - s))
5227                     && (t = (char *) memchr(t, '=', PL_bufend - t)))
5228                 {
5229                     char tmpbuf[sizeof PL_tokenbuf];
5230                     do {
5231                         t++;
5232                     } while (isSPACE(*t));
5233                     if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
5234                         STRLEN len;
5235                         t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5236                                         &len);
5237                         while (isSPACE(*t))
5238                             t++;
5239                         if (  *t == ';'
5240                             && get_cvn_flags(tmpbuf, len, UTF
5241                                                             ? SVf_UTF8
5242                                                             : 0))
5243                         {
5244                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5245                                 "You need to quote \"%" UTF8f "\"",
5246                                     UTF8fARG(UTF, len, tmpbuf));
5247                         }
5248                     }
5249                 }
5250             }
5251         }
5252
5253         PL_expect = XOPERATOR;
5254         if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
5255             const bool islop = (PL_last_lop == PL_oldoldbufptr);
5256             if (!islop || PL_last_lop_op == OP_GREPSTART)
5257                 PL_expect = XOPERATOR;
5258             else if (memCHRs("$@\"'`q", *s))
5259                 PL_expect = XTERM;              /* e.g. print $fh "foo" */
5260             else if (   memCHRs("&*<%", *s)
5261                      && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
5262             {
5263                 PL_expect = XTERM;              /* e.g. print $fh &sub */
5264             }
5265             else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5266                 char tmpbuf[sizeof PL_tokenbuf];
5267                 int t2;
5268                 STRLEN len;
5269                 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5270                 if ((t2 = keyword(tmpbuf, len, 0))) {
5271                     /* binary operators exclude handle interpretations */
5272                     switch (t2) {
5273                     case -KEY_x:
5274                     case -KEY_eq:
5275                     case -KEY_ne:
5276                     case -KEY_gt:
5277                     case -KEY_lt:
5278                     case -KEY_ge:
5279                     case -KEY_le:
5280                     case -KEY_cmp:
5281                         break;
5282                     default:
5283                         PL_expect = XTERM;      /* e.g. print $fh length() */
5284                         break;
5285                     }
5286                 }
5287                 else {
5288                     PL_expect = XTERM;  /* e.g. print $fh subr() */
5289                 }
5290             }
5291             else if (isDIGIT(*s))
5292                 PL_expect = XTERM;              /* e.g. print $fh 3 */
5293             else if (*s == '.' && isDIGIT(s[1]))
5294                 PL_expect = XTERM;              /* e.g. print $fh .3 */
5295             else if ((*s == '?' || *s == '-' || *s == '+')
5296                 && !isSPACE(s[1]) && s[1] != '=')
5297                 PL_expect = XTERM;              /* e.g. print $fh -1 */
5298             else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5299                      && s[1] != '/')
5300                 PL_expect = XTERM;              /* e.g. print $fh /.../
5301                                                XXX except DORDOR operator
5302                                             */
5303             else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5304                      && s[2] != '=')
5305                 PL_expect = XTERM;              /* print $fh <<"EOF" */
5306         }
5307     }
5308     force_ident_maybe_lex('$');
5309     TOKEN(PERLY_DOLLAR);
5310 }
5311
5312 static int
5313 yyl_sub(pTHX_ char *s, const int key)
5314 {
5315     char * const tmpbuf = PL_tokenbuf + 1;
5316     bool have_name, have_proto;
5317     STRLEN len;
5318     SV *format_name = NULL;
5319     bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
5320
5321     SSize_t off = s-SvPVX(PL_linestr);
5322     char *d;
5323
5324     s = skipspace(s); /* can move PL_linestr */
5325
5326     d = SvPVX(PL_linestr)+off;
5327
5328     SAVEBOOL(PL_parser->sig_seen);
5329     PL_parser->sig_seen = FALSE;
5330
5331     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5332         || *s == '\''
5333         || (*s == ':' && s[1] == ':'))
5334     {
5335
5336         PL_expect = XATTRBLOCK;
5337         d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
5338                       &len);
5339         if (key == KEY_format)
5340             format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
5341         *PL_tokenbuf = '&';
5342         if (memchr(tmpbuf, ':', len) || key != KEY_sub
5343          || pad_findmy_pvn(
5344                 PL_tokenbuf, len + 1, 0
5345             ) != NOT_IN_PAD)
5346             sv_setpvn(PL_subname, tmpbuf, len);
5347         else {
5348             sv_setsv(PL_subname,PL_curstname);
5349             sv_catpvs(PL_subname,"::");
5350             sv_catpvn(PL_subname,tmpbuf,len);
5351         }
5352         if (SvUTF8(PL_linestr))
5353             SvUTF8_on(PL_subname);
5354         have_name = TRUE;
5355
5356         s = skipspace(d);
5357     }
5358     else {
5359         if (key == KEY_my || key == KEY_our || key==KEY_state) {
5360             *d = '\0';
5361             /* diag_listed_as: Missing name in "%s sub" */
5362             Perl_croak(aTHX_
5363                       "Missing name in \"%s\"", PL_bufptr);
5364         }
5365         PL_expect = XATTRTERM;
5366         sv_setpvs(PL_subname,"?");
5367         have_name = FALSE;
5368     }
5369
5370     if (key == KEY_format) {
5371         if (format_name) {
5372             NEXTVAL_NEXTTOKE.opval
5373                 = newSVOP(OP_CONST,0, format_name);
5374             NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
5375             force_next(BAREWORD);
5376         }
5377         PREBLOCK(FORMAT);
5378     }
5379
5380     /* Look for a prototype */
5381     if (*s == '(' && !is_sigsub) {
5382         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5383         if (!s)
5384             Perl_croak(aTHX_ "Prototype not terminated");
5385         COPLINE_SET_FROM_MULTI_END;
5386         (void)validate_proto(PL_subname, PL_lex_stuff,
5387                              ckWARN(WARN_ILLEGALPROTO), 0);
5388         have_proto = TRUE;
5389
5390         s = skipspace(s);
5391     }
5392     else
5393         have_proto = FALSE;
5394
5395     if (  !(*s == ':' && s[1] != ':')
5396         && (*s != '{' && *s != '(') && key != KEY_format)
5397     {
5398         assert(key == KEY_sub || key == KEY_AUTOLOAD ||
5399                key == KEY_DESTROY || key == KEY_BEGIN ||
5400                key == KEY_UNITCHECK || key == KEY_CHECK ||
5401                key == KEY_INIT || key == KEY_END ||
5402                key == KEY_my || key == KEY_state ||
5403                key == KEY_our);
5404         if (!have_name)
5405             Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5406         else if (*s != ';' && *s != '}')
5407             Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
5408     }
5409
5410     if (have_proto) {
5411         NEXTVAL_NEXTTOKE.opval =
5412             newSVOP(OP_CONST, 0, PL_lex_stuff);
5413         PL_lex_stuff = NULL;
5414         force_next(THING);
5415     }
5416     if (!have_name) {
5417         if (PL_curstash)
5418             sv_setpvs(PL_subname, "__ANON__");
5419         else
5420             sv_setpvs(PL_subname, "__ANON__::__ANON__");
5421         if (is_sigsub)
5422             TOKEN(ANON_SIGSUB);
5423         else
5424             TOKEN(ANONSUB);
5425     }
5426     force_ident_maybe_lex('&');
5427     if (is_sigsub)
5428         TOKEN(SIGSUB);
5429     else
5430         TOKEN(SUB);
5431 }
5432
5433 static int
5434 yyl_interpcasemod(pTHX_ char *s)
5435 {
5436 #ifdef DEBUGGING
5437     if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5438         Perl_croak(aTHX_
5439                    "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
5440                    PL_bufptr, PL_bufend, *PL_bufptr);
5441 #endif
5442
5443     if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
5444         /* if at a \E */
5445         if (PL_lex_casemods) {
5446             const char oldmod = PL_lex_casestack[--PL_lex_casemods];
5447             PL_lex_casestack[PL_lex_casemods] = '\0';
5448
5449             if (PL_bufptr != PL_bufend
5450                 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
5451                     || oldmod == 'F')) {
5452                 PL_bufptr += 2;
5453                 PL_lex_state = LEX_INTERPCONCAT;
5454             }
5455             PL_lex_allbrackets--;
5456             return REPORT(PERLY_PAREN_CLOSE);
5457         }
5458         else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
5459            /* Got an unpaired \E */
5460            Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5461                     "Useless use of \\E");
5462         }
5463         if (PL_bufptr != PL_bufend)
5464             PL_bufptr += 2;
5465         PL_lex_state = LEX_INTERPCONCAT;
5466         return yylex();
5467     }
5468     else {
5469         DEBUG_T({
5470             PerlIO_printf(Perl_debug_log, "### Saw case modifier\n");
5471         });
5472         s = PL_bufptr + 1;
5473         if (s[1] == '\\' && s[2] == 'E') {
5474             PL_bufptr = s + 3;
5475             PL_lex_state = LEX_INTERPCONCAT;
5476             return yylex();
5477         }
5478         else {
5479             I32 tmp;
5480             if (   memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
5481                 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
5482             {
5483                 tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
5484             }
5485             if ((*s == 'L' || *s == 'U' || *s == 'F')
5486                 && (strpbrk(PL_lex_casestack, "LUF")))
5487             {
5488                 PL_lex_casestack[--PL_lex_casemods] = '\0';
5489                 PL_lex_allbrackets--;
5490                 return REPORT(PERLY_PAREN_CLOSE);
5491             }
5492             if (PL_lex_casemods > 10)
5493                 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
5494             PL_lex_casestack[PL_lex_casemods++] = *s;
5495             PL_lex_casestack[PL_lex_casemods] = '\0';
5496             PL_lex_state = LEX_INTERPCONCAT;
5497             NEXTVAL_NEXTTOKE.ival = 0;
5498             force_next((2<<24)|PERLY_PAREN_OPEN);
5499             if (*s == 'l')
5500                 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
5501             else if (*s == 'u')
5502                 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
5503             else if (*s == 'L')
5504                 NEXTVAL_NEXTTOKE.ival = OP_LC;
5505             else if (*s == 'U')
5506                 NEXTVAL_NEXTTOKE.ival = OP_UC;
5507             else if (*s == 'Q')
5508                 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
5509             else if (*s == 'F')
5510                 NEXTVAL_NEXTTOKE.ival = OP_FC;
5511             else
5512                 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5513             PL_bufptr = s + 1;
5514         }
5515         force_next(FUNC);
5516         if (PL_lex_starts) {
5517             s = PL_bufptr;
5518             PL_lex_starts = 0;
5519             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5520             if (PL_lex_casemods == 1 && PL_lex_inpat)
5521                 TOKEN(PERLY_COMMA);
5522             else
5523                 AopNOASSIGN(OP_CONCAT);
5524         }
5525         else
5526             return yylex();
5527     }
5528 }
5529
5530 static int
5531 yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword,
5532                         GV **pgv, GV ***pgvp)
5533 {
5534     GV *ogv = NULL;     /* override (winner) */
5535     GV *hgv = NULL;     /* hidden (loser) */
5536     GV *gv = *pgv;
5537
5538     if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5539         CV *cv;
5540         if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5541                                     (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5542                                     SVt_PVCV))
5543             && (cv = GvCVu(gv)))
5544         {
5545             if (GvIMPORTED_CV(gv))
5546                 ogv = gv;
5547             else if (! CvMETHOD(cv))
5548                 hgv = gv;
5549         }
5550         if (!ogv
5551             && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE))
5552             && (gv = **pgvp)
5553             && (isGV_with_GP(gv)
5554                 ? GvCVu(gv) && GvIMPORTED_CV(gv)
5555                 :   SvPCS_IMPORTED(gv)
5556                 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
5557                                                          len, 0), 1)))
5558         {
5559             ogv = gv;
5560         }
5561     }
5562
5563     *pgv = gv;
5564
5565     if (ogv) {
5566         *orig_keyword = key;
5567         return 0;               /* overridden by import or by GLOBAL */
5568     }
5569     else if (gv && !*pgvp
5570              && -key==KEY_lock  /* XXX generalizable kludge */
5571              && GvCVu(gv))
5572     {
5573         return 0;               /* any sub overrides "weak" keyword */
5574     }
5575     else {                      /* no override */
5576         key = -key;
5577         if (key == KEY_dump) {
5578             Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
5579         }
5580         *pgv = NULL;
5581         *pgvp = 0;
5582         if (hgv && key != KEY_x)        /* never ambiguous */
5583             Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5584                            "Ambiguous call resolved as CORE::%s(), "
5585                            "qualify as such or use &",
5586                            GvENAME(hgv));
5587         return key;
5588     }
5589 }
5590
5591 static int
5592 yyl_qw(pTHX_ char *s, STRLEN len)
5593 {
5594     OP *words = NULL;
5595
5596     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5597     if (!s)
5598         missingterm(NULL, 0);
5599
5600     COPLINE_SET_FROM_MULTI_END;
5601     PL_expect = XOPERATOR;
5602     if (SvCUR(PL_lex_stuff)) {
5603         int warned_comma = !ckWARN(WARN_QW);
5604         int warned_comment = warned_comma;
5605         char *d = SvPV_force(PL_lex_stuff, len);
5606         while (len) {
5607             for (; isSPACE(*d) && len; --len, ++d)
5608                 /**/;
5609             if (len) {
5610                 SV *sv;
5611                 const char *b = d;
5612                 if (!warned_comma || !warned_comment) {
5613                     for (; !isSPACE(*d) && len; --len, ++d) {
5614                         if (!warned_comma && *d == ',') {
5615                             Perl_warner(aTHX_ packWARN(WARN_QW),
5616                                 "Possible attempt to separate words with commas");
5617                             ++warned_comma;
5618                         }
5619                         else if (!warned_comment && *d == '#') {
5620                             Perl_warner(aTHX_ packWARN(WARN_QW),
5621                                 "Possible attempt to put comments in qw() list");
5622                             ++warned_comment;
5623                         }
5624                     }
5625                 }
5626                 else {
5627                     for (; !isSPACE(*d) && len; --len, ++d)
5628                         /**/;
5629                 }
5630                 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
5631                 words = op_append_elem(OP_LIST, words,
5632                                        newSVOP(OP_CONST, 0, tokeq(sv)));
5633             }
5634         }
5635     }
5636     if (!words)
5637         words = newNULLLIST();
5638     SvREFCNT_dec_NN(PL_lex_stuff);
5639     PL_lex_stuff = NULL;
5640     PL_expect = XOPERATOR;
5641     pl_yylval.opval = sawparens(words);
5642     TOKEN(QWLIST);
5643 }
5644
5645 static int
5646 yyl_hyphen(pTHX_ char *s)
5647 {
5648     if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5649         I32 ftst = 0;
5650         char tmp;
5651
5652         s++;
5653         PL_bufptr = s;
5654         tmp = *s++;
5655
5656         while (s < PL_bufend && SPACE_OR_TAB(*s))
5657             s++;
5658
5659         if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5660             s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5661             DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5662             OPERATOR(PERLY_MINUS);              /* unary minus */
5663         }
5664         switch (tmp) {
5665         case 'r': ftst = OP_FTEREAD;    break;
5666         case 'w': ftst = OP_FTEWRITE;   break;
5667         case 'x': ftst = OP_FTEEXEC;    break;
5668         case 'o': ftst = OP_FTEOWNED;   break;
5669         case 'R': ftst = OP_FTRREAD;    break;
5670         case 'W': ftst = OP_FTRWRITE;   break;
5671         case 'X': ftst = OP_FTREXEC;    break;
5672         case 'O': ftst = OP_FTROWNED;   break;
5673         case 'e': ftst = OP_FTIS;       break;
5674         case 'z': ftst = OP_FTZERO;     break;
5675         case 's': ftst = OP_FTSIZE;     break;
5676         case 'f': ftst = OP_FTFILE;     break;
5677         case 'd': ftst = OP_FTDIR;      break;
5678         case 'l': ftst = OP_FTLINK;     break;
5679         case 'p': ftst = OP_FTPIPE;     break;
5680         case 'S': ftst = OP_FTSOCK;     break;
5681         case 'u': ftst = OP_FTSUID;     break;
5682         case 'g': ftst = OP_FTSGID;     break;
5683         case 'k': ftst = OP_FTSVTX;     break;
5684         case 'b': ftst = OP_FTBLK;      break;
5685         case 'c': ftst = OP_FTCHR;      break;
5686         case 't': ftst = OP_FTTTY;      break;
5687         case 'T': ftst = OP_FTTEXT;     break;
5688         case 'B': ftst = OP_FTBINARY;   break;
5689         case 'M': case 'A': case 'C':
5690             gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5691             switch (tmp) {
5692             case 'M': ftst = OP_FTMTIME; break;
5693             case 'A': ftst = OP_FTATIME; break;
5694             case 'C': ftst = OP_FTCTIME; break;
5695             default:                     break;
5696             }
5697             break;
5698         default:
5699             break;
5700         }
5701         if (ftst) {
5702             PL_last_uni = PL_oldbufptr;
5703             PL_last_lop_op = (OPCODE)ftst;
5704             DEBUG_T( {
5705                 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp);
5706             } );
5707             FTST(ftst);
5708         }
5709         else {
5710             /* Assume it was a minus followed by a one-letter named
5711              * subroutine call (or a -bareword), then. */
5712             DEBUG_T( {
5713                 PerlIO_printf(Perl_debug_log,
5714                     "### '-%c' looked like a file test but was not\n",
5715                     (int) tmp);
5716             } );
5717             s = --PL_bufptr;
5718         }
5719     }
5720     {
5721         const char tmp = *s++;
5722         if (*s == tmp) {
5723             s++;
5724             if (PL_expect == XOPERATOR)
5725                 TERM(POSTDEC);
5726             else
5727                 OPERATOR(PREDEC);
5728         }
5729         else if (*s == '>') {
5730             s++;
5731             s = skipspace(s);
5732             if (((*s == '$' || *s == '&') && s[1] == '*')
5733               ||(*s == '$' && s[1] == '#' && s[2] == '*')
5734               ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1]))
5735               ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5736              )
5737             {
5738                 PL_expect = XPOSTDEREF;
5739                 TOKEN(ARROW);
5740             }
5741             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5742                 s = force_word(s,METHOD,FALSE,TRUE);
5743                 TOKEN(ARROW);
5744             }
5745             else if (*s == '$')
5746                 OPERATOR(ARROW);
5747             else
5748                 TERM(ARROW);
5749         }
5750         if (PL_expect == XOPERATOR) {
5751             if (*s == '='
5752                 && !PL_lex_allbrackets
5753                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5754             {
5755                 s--;
5756                 TOKEN(0);
5757             }
5758             Aop(OP_SUBTRACT);
5759         }
5760         else {
5761             if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5762                 check_uni();
5763             OPERATOR(PERLY_MINUS);              /* unary minus */
5764         }
5765     }
5766 }
5767
5768 static int
5769 yyl_plus(pTHX_ char *s)
5770 {
5771     const char tmp = *s++;
5772     if (*s == tmp) {
5773         s++;
5774         if (PL_expect == XOPERATOR)
5775             TERM(POSTINC);
5776         else
5777             OPERATOR(PREINC);
5778     }
5779     if (PL_expect == XOPERATOR) {
5780         if (*s == '='
5781             && !PL_lex_allbrackets
5782             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5783         {
5784             s--;
5785             TOKEN(0);
5786         }
5787         Aop(OP_ADD);
5788     }
5789     else {
5790         if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5791             check_uni();
5792         OPERATOR(PERLY_PLUS);
5793     }
5794 }
5795
5796 static int
5797 yyl_star(pTHX_ char *s)
5798 {
5799     if (PL_expect == XPOSTDEREF)
5800         POSTDEREF(PERLY_STAR);
5801
5802     if (PL_expect != XOPERATOR) {
5803         s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5804         PL_expect = XOPERATOR;
5805         force_ident(PL_tokenbuf, PERLY_STAR);
5806         if (!*PL_tokenbuf)
5807             PREREF(PERLY_STAR);
5808         TERM(PERLY_STAR);
5809     }
5810
5811     s++;
5812     if (*s == '*') {
5813         s++;
5814         if (*s == '=' && !PL_lex_allbrackets
5815             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5816         {
5817             s -= 2;
5818             TOKEN(0);
5819         }
5820         PWop(OP_POW);
5821     }
5822
5823     if (*s == '='
5824         && !PL_lex_allbrackets
5825         && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5826     {
5827         s--;
5828         TOKEN(0);
5829     }
5830
5831     Mop(OP_MULTIPLY);
5832 }
5833
5834 static int
5835 yyl_percent(pTHX_ char *s)
5836 {
5837     if (PL_expect == XOPERATOR) {
5838         if (s[1] == '='
5839             && !PL_lex_allbrackets
5840             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5841         {
5842             TOKEN(0);
5843         }
5844         ++s;
5845         Mop(OP_MODULO);
5846     }
5847     else if (PL_expect == XPOSTDEREF)
5848         POSTDEREF(PERLY_PERCENT_SIGN);
5849
5850     PL_tokenbuf[0] = '%';
5851     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5852     pl_yylval.ival = 0;
5853     if (!PL_tokenbuf[1]) {
5854         PREREF(PERLY_PERCENT_SIGN);
5855     }
5856     if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5857         && intuit_more(s, PL_bufend)) {
5858         if (*s == '[')
5859             PL_tokenbuf[0] = '@';
5860     }
5861     PL_expect = XOPERATOR;
5862     force_ident_maybe_lex('%');
5863     TERM(PERLY_PERCENT_SIGN);
5864 }
5865
5866 static int
5867 yyl_caret(pTHX_ char *s)
5868 {
5869     char *d = s;
5870     const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED);
5871     if (bof && s[1] == '.')
5872         s++;
5873     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5874             (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5875     {
5876         s = d;
5877         TOKEN(0);
5878     }
5879     s++;
5880     BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5881 }
5882
5883 static int
5884 yyl_colon(pTHX_ char *s)
5885 {
5886     OP *attrs;
5887
5888     switch (PL_expect) {
5889     case XOPERATOR:
5890         if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
5891             break;
5892         PL_bufptr = s;  /* update in case we back off */
5893         if (*s == '=') {
5894             Perl_croak(aTHX_
5895                        "Use of := for an empty attribute list is not allowed");
5896         }
5897         goto grabattrs;
5898     case XATTRBLOCK:
5899         PL_expect = XBLOCK;
5900         goto grabattrs;
5901     case XATTRTERM:
5902         PL_expect = XTERMBLOCK;
5903      grabattrs:
5904         /* NB: as well as parsing normal attributes, we also end up
5905          * here if there is something looking like attributes
5906          * following a signature (which is illegal, but used to be
5907          * legal in 5.20..5.26). If the latter, we still parse the
5908          * attributes so that error messages(s) are less confusing,
5909          * but ignore them (parser->sig_seen).
5910          */
5911         s = skipspace(s);
5912         attrs = NULL;
5913         while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5914             bool sig = PL_parser->sig_seen;
5915             I32 tmp;
5916             SV *sv;
5917             STRLEN len;
5918             char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5919             if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5920                 if (tmp < 0) tmp = -tmp;
5921                 switch (tmp) {
5922                 case KEY_or:
5923                 case KEY_and:
5924                 case KEY_for:
5925                 case KEY_foreach:
5926                 case KEY_unless:
5927                 case KEY_if:
5928                 case KEY_while:
5929                 case KEY_until:
5930                     goto got_attrs;
5931                 default:
5932                     break;
5933                 }
5934             }
5935             sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5936             if (*d == '(') {
5937                 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5938                 if (!d) {
5939                     if (attrs)
5940                         op_free(attrs);
5941                     sv_free(sv);
5942                     Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5943                 }
5944                 COPLINE_SET_FROM_MULTI_END;
5945             }
5946             if (PL_lex_stuff) {
5947                 sv_catsv(sv, PL_lex_stuff);
5948                 attrs = op_append_elem(OP_LIST, attrs,
5949                                     newSVOP(OP_CONST, 0, sv));
5950                 SvREFCNT_dec_NN(PL_lex_stuff);
5951                 PL_lex_stuff = NULL;
5952             }
5953             else {
5954                 /* NOTE: any CV attrs applied here need to be part of
5955                    the CVf_BUILTIN_ATTRS define in cv.h! */
5956                 if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
5957                     sv_free(sv);
5958                     if (!sig)
5959                         CvLVALUE_on(PL_compcv);
5960                 }
5961                 else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
5962                     sv_free(sv);
5963                     if (!sig)
5964                         CvMETHOD_on(PL_compcv);
5965                 }
5966                 else if (!PL_in_my && memEQs(SvPVX(sv), len, "const")) {
5967                     sv_free(sv);
5968                     if (!sig) {
5969                         Perl_ck_warner_d(aTHX_
5970                             packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5971                            ":const is experimental"
5972                         );
5973                         CvANONCONST_on(PL_compcv);
5974                         if (!CvANON(PL_compcv))
5975                             yyerror(":const is not permitted on named "
5976                                     "subroutines");
5977                     }
5978                 }
5979                 /* After we've set the flags, it could be argued that
5980                    we don't need to do the attributes.pm-based setting
5981                    process, and shouldn't bother appending recognized
5982                    flags.  To experiment with that, uncomment the
5983                    following "else".  (Note that's already been
5984                    uncommented.  That keeps the above-applied built-in
5985                    attributes from being intercepted (and possibly
5986                    rejected) by a package's attribute routines, but is
5987                    justified by the performance win for the common case
5988                    of applying only built-in attributes.) */
5989                 else
5990                     attrs = op_append_elem(OP_LIST, attrs,
5991                                         newSVOP(OP_CONST, 0,
5992                                                 sv));
5993             }
5994             s = skipspace(d);
5995             if (*s == ':' && s[1] != ':')
5996                 s = skipspace(s+1);
5997             else if (s == d)
5998                 break;  /* require real whitespace or :'s */
5999             /* XXX losing whitespace on sequential attributes here */
6000         }
6001
6002         if (*s != ';'
6003             && *s != '}'
6004             && !(PL_expect == XOPERATOR
6005                  ? (*s == '=' ||  *s == ')')
6006                  : (*s == '{' ||  *s == '(')))
6007         {
6008             const char q = ((*s == '\'') ? '"' : '\'');
6009             /* If here for an expression, and parsed no attrs, back off. */
6010             if (PL_expect == XOPERATOR && !attrs) {
6011                 s = PL_bufptr;
6012                 break;
6013             }
6014             /* MUST advance bufptr here to avoid bogus "at end of line"
6015                context messages from yyerror().
6016             */
6017             PL_bufptr = s;
6018             yyerror( (const char *)
6019                      (*s
6020                       ? Perl_form(aTHX_ "Invalid separator character "
6021                                   "%c%c%c in attribute list", q, *s, q)
6022                       : "Unterminated attribute list" ) );
6023             if (attrs)
6024                 op_free(attrs);
6025             OPERATOR(PERLY_COLON);
6026         }
6027
6028     got_attrs:
6029         if (PL_parser->sig_seen) {
6030             /* see comment about about sig_seen and parser error
6031              * handling */
6032             if (attrs)
6033                 op_free(attrs);
6034             Perl_croak(aTHX_ "Subroutine attributes must come "
6035                              "before the signature");
6036         }
6037         if (attrs) {
6038             NEXTVAL_NEXTTOKE.opval = attrs;
6039             force_next(THING);
6040         }
6041         TOKEN(COLONATTR);
6042     }
6043
6044     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6045         s--;
6046         TOKEN(0);
6047     }
6048
6049     PL_lex_allbrackets--;
6050     OPERATOR(PERLY_COLON);
6051 }
6052
6053 static int
6054 yyl_subproto(pTHX_ char *s, CV *cv)
6055 {
6056     STRLEN protolen = CvPROTOLEN(cv);
6057     const char *proto = CvPROTO(cv);
6058     bool optional;
6059
6060     proto = S_strip_spaces(aTHX_ proto, &protolen);
6061     if (!protolen)
6062         TERM(FUNC0SUB);
6063     if ((optional = *proto == ';')) {
6064         do {
6065             proto++;
6066         } while (*proto == ';');
6067     }
6068
6069     if (
6070         (
6071             (
6072                 *proto == '$' || *proto == '_'
6073              || *proto == '*' || *proto == '+'
6074             )
6075          && proto[1] == '\0'
6076         )
6077      || (
6078          *proto == '\\' && proto[1] && proto[2] == '\0'
6079         )
6080     ) {
6081         UNIPROTO(UNIOPSUB,optional);
6082     }
6083
6084     if (*proto == '\\' && proto[1] == '[') {
6085         const char *p = proto + 2;
6086         while(*p && *p != ']')
6087             ++p;
6088         if(*p == ']' && !p[1])
6089             UNIPROTO(UNIOPSUB,optional);
6090     }
6091
6092     if (*proto == '&' && *s == '{') {
6093         if (PL_curstash)
6094             sv_setpvs(PL_subname, "__ANON__");
6095         else
6096             sv_setpvs(PL_subname, "__ANON__::__ANON__");
6097         if (!PL_lex_allbrackets
6098             && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6099         {
6100             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6101         }
6102         PREBLOCK(LSTOPSUB);
6103     }
6104
6105     return KEY_NULL;
6106 }
6107
6108 static int
6109 yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
6110 {
6111     char *d;
6112     if (PL_lex_brackets > 100) {
6113         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6114     }
6115
6116     switch (PL_expect) {
6117     case XTERM:
6118     case XTERMORDORDOR:
6119         PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6120         PL_lex_allbrackets++;
6121         OPERATOR(HASHBRACK);
6122     case XOPERATOR:
6123         while (s < PL_bufend && SPACE_OR_TAB(*s))
6124             s++;
6125         d = s;
6126         PL_tokenbuf[0] = '\0';
6127         if (d < PL_bufend && *d == '-') {
6128             PL_tokenbuf[0] = '-';
6129             d++;
6130             while (d < PL_bufend && SPACE_OR_TAB(*d))
6131                 d++;
6132         }
6133         if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6134             STRLEN len;
6135             d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6136                           FALSE, &len);
6137             while (d < PL_bufend && SPACE_OR_TAB(*d))
6138                 d++;
6139             if (*d == '}') {
6140                 const char minus = (PL_tokenbuf[0] == '-');
6141                 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6142                 if (minus)
6143                     force_next(PERLY_MINUS);
6144             }
6145         }
6146         /* FALLTHROUGH */
6147     case XATTRTERM:
6148     case XTERMBLOCK:
6149         PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6150         PL_lex_allbrackets++;
6151         PL_expect = XSTATE;
6152         break;
6153     case XATTRBLOCK:
6154     case XBLOCK:
6155         PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6156         PL_lex_allbrackets++;
6157         PL_expect = XSTATE;
6158         break;
6159     case XBLOCKTERM:
6160         PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6161         PL_lex_allbrackets++;
6162         PL_expect = XSTATE;
6163         break;
6164     default: {
6165             const char *t;
6166             if (PL_oldoldbufptr == PL_last_lop)
6167                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6168             else
6169                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6170             PL_lex_allbrackets++;
6171             s = skipspace(s);
6172             if (*s == '}') {
6173                 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6174                     PL_expect = XTERM;
6175                     /* This hack is to get the ${} in the message. */
6176                     PL_bufptr = s+1;
6177                     yyerror("syntax error");
6178                     break;
6179                 }
6180                 OPERATOR(HASHBRACK);
6181             }
6182             if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6183                 /* ${...} or @{...} etc., but not print {...}
6184                  * Skip the disambiguation and treat this as a block.
6185                  */
6186                 goto block_expectation;
6187             }
6188             /* This hack serves to disambiguate a pair of curlies
6189              * as being a block or an anon hash.  Normally, expectation
6190              * determines that, but in cases where we're not in a
6191              * position to expect anything in particular (like inside
6192              * eval"") we have to resolve the ambiguity.  This code
6193              * covers the case where the first term in the curlies is a
6194              * quoted string.  Most other cases need to be explicitly
6195              * disambiguated by prepending a "+" before the opening
6196              * curly in order to force resolution as an anon hash.
6197              *
6198              * XXX should probably propagate the outer expectation
6199              * into eval"" to rely less on this hack, but that could
6200              * potentially break current behavior of eval"".
6201              * GSAR 97-07-21
6202              */
6203             t = s;
6204             if (*s == '\'' || *s == '"' || *s == '`') {
6205                 /* common case: get past first string, handling escapes */
6206                 for (t++; t < PL_bufend && *t != *s;)
6207                     if (*t++ == '\\')
6208                         t++;
6209                 t++;
6210             }
6211             else if (*s == 'q') {
6212                 if (++t < PL_bufend
6213                     && (!isWORDCHAR(*t)
6214                         || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6215                             && !isWORDCHAR(*t))))
6216                 {
6217                     /* skip q//-like construct */
6218                     const char *tmps;
6219                     char open, close, term;
6220                     I32 brackets = 1;
6221
6222                     while (t < PL_bufend && isSPACE(*t))
6223                         t++;
6224                     /* check for q => */
6225                     if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6226                         OPERATOR(HASHBRACK);
6227                     }
6228                     term = *t;
6229                     open = term;
6230                     if (term && (tmps = memCHRs("([{< )]}> )]}>",term)))
6231                         term = tmps[5];
6232                     close = term;
6233                     if (open == close)
6234                         for (t++; t < PL_bufend; t++) {
6235                             if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6236                                 t++;
6237                             else if (*t == open)
6238                                 break;
6239                         }
6240                     else {
6241                         for (t++; t < PL_bufend; t++) {
6242                             if (*t == '\\' && t+1 < PL_bufend)
6243                                 t++;
6244                             else if (*t == close && --brackets <= 0)
6245                                 break;
6246                             else if (*t == open)
6247                                 brackets++;
6248                         }
6249                     }
6250                     t++;
6251                 }
6252                 else
6253                     /* skip plain q word */
6254                     while (   t < PL_bufend
6255                            && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6256                     {
6257                         t += UTF ? UTF8SKIP(t) : 1;
6258                     }
6259             }
6260             else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6261                 t += UTF ? UTF8SKIP(t) : 1;
6262                 while (   t < PL_bufend
6263                        && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6264                 {
6265                     t += UTF ? UTF8SKIP(t) : 1;
6266                 }
6267             }
6268             while (t < PL_bufend && isSPACE(*t))
6269                 t++;
6270             /* if comma follows first term, call it an anon hash */
6271             /* XXX it could be a comma expression with loop modifiers */
6272             if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6273                                || (*t == '=' && t[1] == '>')))
6274                 OPERATOR(HASHBRACK);
6275             if (PL_expect == XREF) {
6276               block_expectation:
6277                 /* If there is an opening brace or 'sub:', treat it
6278                    as a term to make ${{...}}{k} and &{sub:attr...}
6279                    dwim.  Otherwise, treat it as a statement, so
6280                    map {no strict; ...} works.
6281                  */
6282                 s = skipspace(s);
6283                 if (*s == '{') {
6284                     PL_expect = XTERM;
6285                     break;
6286                 }
6287                 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6288                     PL_bufptr = s;
6289                     d = s + 3;
6290                     d = skipspace(d);
6291                     s = PL_bufptr;
6292                     if (*d == ':') {
6293                         PL_expect = XTERM;
6294                         break;
6295                     }
6296                 }
6297                 PL_expect = XSTATE;
6298             }
6299             else {
6300                 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6301                 PL_expect = XSTATE;
6302             }
6303         }
6304         break;
6305     }
6306
6307     pl_yylval.ival = CopLINE(PL_curcop);
6308     PL_copline = NOLINE;   /* invalidate current command line number */
6309     TOKEN(formbrack ? PERLY_EQUAL_SIGN : PERLY_BRACE_OPEN);
6310 }
6311
6312 static int
6313 yyl_rightcurly(pTHX_ char *s, const U8 formbrack)
6314 {
6315     assert(s != PL_bufend);
6316     s++;
6317
6318     if (PL_lex_brackets <= 0)
6319         /* diag_listed_as: Unmatched right %s bracket */
6320         yyerror("Unmatched right curly bracket");
6321     else
6322         PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6323
6324     PL_lex_allbrackets--;
6325
6326     if (PL_lex_state == LEX_INTERPNORMAL) {
6327         if (PL_lex_brackets == 0) {
6328             if (PL_expect & XFAKEBRACK) {
6329                 PL_expect &= XENUMMASK;
6330                 PL_lex_state = LEX_INTERPEND;
6331                 PL_bufptr = s;
6332                 return yylex(); /* ignore fake brackets */
6333             }
6334             if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6335              && SvEVALED(PL_lex_repl))
6336                 PL_lex_state = LEX_INTERPEND;
6337             else if (*s == '-' && s[1] == '>')
6338                 PL_lex_state = LEX_INTERPENDMAYBE;
6339             else if (*s != '[' && *s != '{')
6340                 PL_lex_state = LEX_INTERPEND;
6341         }
6342     }
6343
6344     if (PL_expect & XFAKEBRACK) {
6345         PL_expect &= XENUMMASK;
6346         PL_bufptr = s;
6347         return yylex();         /* ignore fake brackets */
6348     }
6349
6350     force_next(formbrack ? PERLY_DOT : PERLY_BRACE_CLOSE);
6351     if (formbrack) LEAVE_with_name("lex_format");
6352     if (formbrack == 2) { /* means . where arguments were expected */
6353         force_next(PERLY_SEMICOLON);
6354         TOKEN(FORMRBRACK);
6355     }
6356
6357     TOKEN(PERLY_SEMICOLON);
6358 }
6359
6360 static int
6361 yyl_ampersand(pTHX_ char *s)
6362 {
6363     if (PL_expect == XPOSTDEREF)
6364         POSTDEREF(PERLY_AMPERSAND);
6365
6366     s++;
6367     if (*s++ == '&') {
6368         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6369                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6370             s -= 2;
6371             TOKEN(0);
6372         }
6373         AOPERATOR(ANDAND);
6374     }
6375     s--;
6376
6377     if (PL_expect == XOPERATOR) {
6378         char *d;
6379         bool bof;
6380         if (   PL_bufptr == PL_linestart
6381             && ckWARN(WARN_SEMICOLON)
6382             && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6383         {
6384             CopLINE_dec(PL_curcop);
6385             Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6386             CopLINE_inc(PL_curcop);
6387         }
6388         d = s;
6389         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6390             s++;
6391         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6392                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6393             s = d;
6394             s--;
6395             TOKEN(0);
6396         }
6397         if (d == s)
6398             BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6399         else
6400             BAop(OP_SBIT_AND);
6401     }
6402
6403     PL_tokenbuf[0] = '&';
6404     s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6405     pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6406
6407     if (PL_tokenbuf[1])
6408         force_ident_maybe_lex('&');
6409     else
6410         PREREF(PERLY_AMPERSAND);
6411
6412     TERM(PERLY_AMPERSAND);
6413 }
6414
6415 static int
6416 yyl_verticalbar(pTHX_ char *s)
6417 {
6418     char *d;
6419     bool bof;
6420
6421     s++;
6422     if (*s++ == '|') {
6423         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6424                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6425             s -= 2;
6426             TOKEN(0);
6427         }
6428         AOPERATOR(OROR);
6429     }
6430
6431     s--;
6432     d = s;
6433     if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6434         s++;
6435
6436     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6437             (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6438         s = d - 1;
6439         TOKEN(0);
6440     }
6441
6442     BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6443 }
6444
6445 static int
6446 yyl_bang(pTHX_ char *s)
6447 {
6448     const char tmp = *s++;
6449     if (tmp == '=') {
6450         /* was this !=~ where !~ was meant?
6451          * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6452
6453         if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6454             const char *t = s+1;
6455
6456             while (t < PL_bufend && isSPACE(*t))
6457                 ++t;
6458
6459             if (*t == '/' || *t == '?'
6460                 || ((*t == 'm' || *t == 's' || *t == 'y')
6461                     && !isWORDCHAR(t[1]))
6462                 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6463                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6464                             "!=~ should be !~");
6465         }
6466
6467         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6468             s -= 2;
6469             TOKEN(0);
6470         }
6471
6472         ChEop(OP_NE);
6473     }
6474
6475     if (tmp == '~')
6476         PMop(OP_NOT);
6477
6478     s--;
6479     OPERATOR(PERLY_EXCLAMATION_MARK);
6480 }
6481
6482 static int
6483 yyl_snail(pTHX_ char *s)
6484 {
6485     if (PL_expect == XPOSTDEREF)
6486         POSTDEREF(PERLY_SNAIL);
6487     PL_tokenbuf[0] = '@';
6488     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6489     if (PL_expect == XOPERATOR) {
6490         char *d = s;
6491         if (PL_bufptr > s) {
6492             d = PL_bufptr-1;
6493             PL_bufptr = PL_oldbufptr;
6494         }
6495         no_op("Array", d);
6496     }
6497     pl_yylval.ival = 0;
6498     if (!PL_tokenbuf[1]) {
6499         PREREF(PERLY_SNAIL);
6500     }
6501     if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6502         s = skipspace(s);
6503     if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6504         && intuit_more(s, PL_bufend))
6505     {
6506         if (*s == '{')
6507             PL_tokenbuf[0] = '%';
6508
6509         /* Warn about @ where they meant $. */
6510         if (*s == '[' || *s == '{') {
6511             if (ckWARN(WARN_SYNTAX)) {
6512                 S_check_scalar_slice(aTHX_ s);
6513             }
6514         }
6515     }
6516     PL_expect = XOPERATOR;
6517     force_ident_maybe_lex('@');
6518     TERM(PERLY_SNAIL);
6519 }
6520
6521 static int
6522 yyl_slash(pTHX_ char *s)
6523 {
6524     if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6525         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6526                 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6527             TOKEN(0);
6528         s += 2;
6529         AOPERATOR(DORDOR);
6530     }
6531     else if (PL_expect == XOPERATOR) {
6532         s++;
6533         if (*s == '=' && !PL_lex_allbrackets
6534             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6535         {
6536             s--;
6537             TOKEN(0);
6538         }
6539         Mop(OP_DIVIDE);
6540     }
6541     else {
6542         /* Disable warning on "study /blah/" */
6543         if (    PL_oldoldbufptr == PL_last_uni
6544             && (   *PL_last_uni != 's' || s - PL_last_uni < 5
6545                 || memNE(PL_last_uni, "study", 5)
6546                 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6547          ))
6548             check_uni();
6549         s = scan_pat(s,OP_MATCH);
6550         TERM(sublex_start());
6551     }
6552 }
6553
6554 static int
6555 yyl_leftsquare(pTHX_ char *s)
6556 {
6557     if (PL_lex_brackets > 100)
6558         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6559     PL_lex_brackstack[PL_lex_brackets++] = 0;
6560     PL_lex_allbrackets++;
6561     s++;
6562     OPERATOR(PERLY_BRACKET_OPEN);
6563 }
6564
6565 static int
6566 yyl_rightsquare(pTHX_ char *s)
6567 {
6568     if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6569         TOKEN(0);
6570     s++;
6571     if (PL_lex_brackets <= 0)
6572         /* diag_listed_as: Unmatched right %s bracket */
6573         yyerror("Unmatched right square bracket");
6574     else
6575         --PL_lex_brackets;
6576     PL_lex_allbrackets--;
6577     if (PL_lex_state == LEX_INTERPNORMAL) {
6578         if (PL_lex_brackets == 0) {
6579             if (*s == '-' && s[1] == '>')
6580                 PL_lex_state = LEX_INTERPENDMAYBE;
6581             else if (*s != '[' && *s != '{')
6582                 PL_lex_state = LEX_INTERPEND;
6583         }
6584     }
6585     TERM(PERLY_BRACKET_CLOSE);
6586 }
6587
6588 static int
6589 yyl_tilde(pTHX_ char *s)
6590 {
6591     bool bof;
6592     if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) {
6593         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6594             TOKEN(0);
6595         s += 2;
6596         Perl_ck_warner_d(aTHX_
6597             packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
6598             "Smartmatch is experimental");
6599         NCEop(OP_SMARTMATCH);
6600     }
6601     s++;
6602     if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
6603         s++;
6604         BCop(OP_SCOMPLEMENT);
6605     }
6606     BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
6607 }
6608
6609 static int
6610 yyl_leftparen(pTHX_ char *s)
6611 {
6612     if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6613         PL_oldbufptr = PL_oldoldbufptr;         /* allow print(STDOUT 123) */
6614     else
6615         PL_expect = XTERM;
6616     s = skipspace(s);
6617     PL_lex_allbrackets++;
6618     TOKEN(PERLY_PAREN_OPEN);
6619 }
6620
6621 static int
6622 yyl_rightparen(pTHX_ char *s)
6623 {
6624     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6625         TOKEN(0);
6626     s++;
6627     PL_lex_allbrackets--;
6628     s = skipspace(s);
6629     if (*s == '{')
6630         PREBLOCK(PERLY_PAREN_CLOSE);
6631     TERM(PERLY_PAREN_CLOSE);
6632 }
6633
6634 static int
6635 yyl_leftpointy(pTHX_ char *s)
6636 {
6637     char tmp;
6638
6639     if (PL_expect != XOPERATOR) {
6640         if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6641             check_uni();
6642         if (s[1] == '<' && s[2] != '>')
6643             s = scan_heredoc(s);
6644         else
6645             s = scan_inputsymbol(s);
6646         PL_expect = XOPERATOR;
6647         TOKEN(sublex_start());
6648     }
6649
6650     s++;
6651
6652     tmp = *s++;
6653     if (tmp == '<') {
6654         if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6655             s -= 2;
6656             TOKEN(0);
6657         }
6658         SHop(OP_LEFT_SHIFT);
6659     }
6660     if (tmp == '=') {
6661         tmp = *s++;
6662         if (tmp == '>') {
6663             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6664                 s -= 3;
6665                 TOKEN(0);
6666             }
6667             NCEop(OP_NCMP);
6668         }
6669         s--;
6670         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6671             s -= 2;
6672             TOKEN(0);
6673         }
6674         ChRop(OP_LE);
6675     }
6676
6677     s--;
6678     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6679         s--;
6680         TOKEN(0);
6681     }
6682
6683     ChRop(OP_LT);
6684 }
6685
6686 static int
6687 yyl_rightpointy(pTHX_ char *s)
6688 {
6689     const char tmp = *s++;
6690
6691     if (tmp == '>') {
6692         if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6693             s -= 2;
6694             TOKEN(0);
6695         }
6696         SHop(OP_RIGHT_SHIFT);
6697     }
6698     else if (tmp == '=') {
6699         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6700             s -= 2;
6701             TOKEN(0);
6702         }
6703         ChRop(OP_GE);
6704     }
6705
6706     s--;
6707     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6708         s--;
6709         TOKEN(0);
6710     }
6711
6712     ChRop(OP_GT);
6713 }
6714
6715 static int
6716 yyl_sglquote(pTHX_ char *s)
6717 {
6718     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6719     if (!s)
6720         missingterm(NULL, 0);
6721     COPLINE_SET_FROM_MULTI_END;
6722     DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6723     if (PL_expect == XOPERATOR) {
6724         no_op("String",s);
6725     }
6726     pl_yylval.ival = OP_CONST;
6727     TERM(sublex_start());
6728 }
6729
6730 static int
6731 yyl_dblquote(pTHX_ char *s)
6732 {
6733     char *d;
6734     STRLEN len;
6735     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6736     DEBUG_T( {
6737         if (s)
6738             printbuf("### Saw string before %s\n", s);
6739         else
6740             PerlIO_printf(Perl_debug_log,
6741                          "### Saw unterminated string\n");
6742     } );
6743     if (PL_expect == XOPERATOR) {
6744             no_op("String",s);
6745     }
6746     if (!s)
6747         missingterm(NULL, 0);
6748     pl_yylval.ival = OP_CONST;
6749     /* FIXME. I think that this can be const if char *d is replaced by
6750        more localised variables.  */
6751     for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6752         if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6753             pl_yylval.ival = OP_STRINGIFY;
6754             break;
6755         }
6756     }
6757     if (pl_yylval.ival == OP_CONST)
6758         COPLINE_SET_FROM_MULTI_END;
6759     TERM(sublex_start());
6760 }
6761
6762 static int
6763 yyl_backtick(pTHX_ char *s)
6764 {
6765     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6766     DEBUG_T( {
6767         if (s)
6768             printbuf("### Saw backtick string before %s\n", s);
6769         else
6770             PerlIO_printf(Perl_debug_log,
6771                          "### Saw unterminated backtick string\n");
6772     } );
6773     if (PL_expect == XOPERATOR)
6774         no_op("Backticks",s);
6775     if (!s)
6776         missingterm(NULL, 0);
6777     pl_yylval.ival = OP_BACKTICK;
6778     TERM(sublex_start());
6779 }
6780
6781 static int
6782 yyl_backslash(pTHX_ char *s)
6783 {
6784     if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s))
6785         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6786                        *s, *s);
6787     if (PL_expect == XOPERATOR)
6788         no_op("Backslash",s);
6789     OPERATOR(REFGEN);
6790 }
6791
6792 static void
6793 yyl_data_handle(pTHX)
6794 {
6795     HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6796                             ? PL_curstash
6797                             : PL_defstash;
6798     GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6799
6800     if (!isGV(gv))
6801         gv_init(gv,stash,"DATA",4,0);
6802
6803     GvMULTI_on(gv);
6804     if (!GvIO(gv))
6805         GvIOp(gv) = newIO();
6806     IoIFP(GvIOp(gv)) = PL_rsfp;
6807
6808     /* Mark this internal pseudo-handle as clean */
6809     IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6810     if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6811         IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6812     else
6813         IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6814
6815 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6816     /* if the script was opened in binmode, we need to revert
6817      * it to text mode for compatibility; but only iff it has CRs
6818      * XXX this is a questionable hack at best. */
6819     if (PL_bufend-PL_bufptr > 2
6820         && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6821     {
6822         Off_t loc = 0;
6823         if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6824             loc = PerlIO_tell(PL_rsfp);
6825             (void)PerlIO_seek(PL_rsfp, 0L, 0);
6826         }
6827         if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6828             if (loc > 0)
6829                 PerlIO_seek(PL_rsfp, loc, 0);
6830         }
6831     }
6832 #endif
6833
6834 #ifdef PERLIO_LAYERS
6835     if (!IN_BYTES) {
6836         if (UTF)
6837             PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6838     }
6839 #endif
6840
6841     PL_rsfp = NULL;
6842 }
6843
6844 PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*)
6845     __attribute__noreturn__;
6846
6847 PERL_STATIC_NO_RET void
6848 yyl_croak_unrecognised(pTHX_ char *s)
6849 {
6850     SV *dsv = newSVpvs_flags("", SVs_TEMP);
6851     const char *c;
6852     char *d;
6853     STRLEN len;
6854
6855     if (UTF) {
6856         STRLEN skiplen = UTF8SKIP(s);
6857         STRLEN stravail = PL_bufend - s;
6858         c = sv_uni_display(dsv, newSVpvn_flags(s,
6859                                                skiplen > stravail ? stravail : skiplen,
6860                                                SVs_TEMP | SVf_UTF8),
6861                            10, UNI_DISPLAY_ISPRINT);
6862     }
6863     else {
6864         c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
6865     }
6866
6867     if (s >= PL_linestart) {
6868         d = PL_linestart;
6869     }
6870     else {
6871         /* somehow (probably due to a parse failure), PL_linestart has advanced
6872          * pass PL_bufptr, get a reasonable beginning of line
6873          */
6874         d = s;
6875         while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
6876             --d;
6877     }
6878     len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
6879     if (len > UNRECOGNIZED_PRECEDE_COUNT) {
6880         d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
6881     }
6882
6883     Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
6884                       UTF8fARG(UTF, (s - d), d),
6885                      (int) len + 1);
6886 }
6887
6888 static int
6889 yyl_require(pTHX_ char *s, I32 orig_keyword)
6890 {
6891     s = skipspace(s);
6892     if (isDIGIT(*s)) {
6893         s = force_version(s, FALSE);
6894     }
6895     else if (*s != 'v' || !isDIGIT(s[1])
6896             || (s = force_version(s, TRUE), *s == 'v'))
6897     {
6898         *PL_tokenbuf = '\0';
6899         s = force_word(s,BAREWORD,TRUE,TRUE);
6900         if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
6901                                    PL_tokenbuf + sizeof(PL_tokenbuf),
6902                                    UTF))
6903         {
6904             gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
6905                         GV_ADD | (UTF ? SVf_UTF8 : 0));
6906         }
6907         else if (*s == '<')
6908             yyerror("<> at require-statement should be quotes");
6909     }
6910
6911     if (orig_keyword == KEY_require)
6912         pl_yylval.ival = 1;
6913     else
6914         pl_yylval.ival = 0;
6915
6916     PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
6917     PL_bufptr = s;
6918     PL_last_uni = PL_oldbufptr;
6919     PL_last_lop_op = OP_REQUIRE;
6920     s = skipspace(s);
6921     return REPORT( (int)REQUIRE );
6922 }
6923
6924 static int
6925 yyl_foreach(pTHX_ char *s)
6926 {
6927     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6928         return REPORT(0);
6929     pl_yylval.ival = CopLINE(PL_curcop);
6930     s = skipspace(s);
6931     if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6932         char *p = s;
6933         SSize_t s_off = s - SvPVX(PL_linestr);
6934         bool paren_is_valid = FALSE;
6935         bool maybe_package = FALSE;
6936         bool saw_core = FALSE;
6937         bool core_valid = FALSE;
6938
6939         if (UNLIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "CORE::"))) {
6940             saw_core = TRUE;
6941             p += 6;
6942         }
6943         if (LIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "my"))) {
6944             core_valid = TRUE;
6945             paren_is_valid = TRUE;
6946             if (isSPACE(p[2])) {
6947                 p = skipspace(p + 3);
6948                 maybe_package = TRUE;
6949             }
6950             else {
6951                 p += 2;
6952             }
6953         }
6954         else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")) {
6955             core_valid = TRUE;
6956             if (isSPACE(p[3])) {
6957                 p = skipspace(p + 4);
6958                 maybe_package = TRUE;
6959             }
6960             else {
6961                 p += 3;
6962             }
6963         }
6964         else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "state")) {
6965             core_valid = TRUE;
6966             if (isSPACE(p[5])) {
6967                 p = skipspace(p + 6);
6968             }
6969             else {
6970                 p += 5;
6971             }
6972         }
6973         if (saw_core && !core_valid) {
6974             Perl_croak(aTHX_ "Missing $ on loop variable");
6975         }
6976
6977         if (maybe_package && !saw_core) {
6978             /* skip optional package name, as in "for my abc $x (..)" */
6979             if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) {
6980                 STRLEN len;
6981                 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6982                 p = skipspace(p);
6983                 paren_is_valid = FALSE;
6984             }
6985         }
6986
6987         if (UNLIKELY(paren_is_valid && *p == '(')) {
6988             Perl_ck_warner_d(aTHX_
6989                              packWARN(WARN_EXPERIMENTAL__FOR_LIST),
6990                              "for my (...) is experimental");
6991         }
6992         else if (UNLIKELY(*p != '$' && *p != '\\')) {
6993             /* "for myfoo (" will end up here, but with p pointing at the 'f' */
6994             Perl_croak(aTHX_ "Missing $ on loop variable");
6995         }
6996         /* The buffer may have been reallocated, update s */
6997         s = SvPVX(PL_linestr) + s_off;
6998     }
6999     OPERATOR(FOR);
7000 }
7001
7002 static int
7003 yyl_do(pTHX_ char *s, I32 orig_keyword)
7004 {
7005     s = skipspace(s);
7006     if (*s == '{')
7007         PRETERMBLOCK(DO);
7008     if (*s != '\'') {
7009         char *d;
7010         STRLEN len;
7011         *PL_tokenbuf = '&';
7012         d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7013                       1, &len);
7014         if (len && memNEs(PL_tokenbuf+1, len, "CORE")
7015          && !keyword(PL_tokenbuf + 1, len, 0)) {
7016             SSize_t off = s-SvPVX(PL_linestr);
7017             d = skipspace(d);
7018             s = SvPVX(PL_linestr)+off;
7019             if (*d == '(') {
7020                 force_ident_maybe_lex('&');
7021                 s = d;
7022             }
7023         }
7024     }
7025     if (orig_keyword == KEY_do)
7026         pl_yylval.ival = 1;
7027     else
7028         pl_yylval.ival = 0;
7029     OPERATOR(DO);
7030 }
7031
7032 static int
7033 yyl_my(pTHX_ char *s, I32 my)
7034 {
7035     if (PL_in_my) {
7036         PL_bufptr = s;
7037         yyerror(Perl_form(aTHX_
7038                           "Can't redeclare \"%s\" in \"%s\"",
7039                            my       == KEY_my    ? "my" :
7040                            my       == KEY_state ? "state" : "our",
7041                            PL_in_my == KEY_my    ? "my" :
7042                            PL_in_my == KEY_state ? "state" : "our"));
7043     }
7044     PL_in_my = (U16)my;
7045     s = skipspace(s);
7046     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
7047         STRLEN len;
7048         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7049         if (memEQs(PL_tokenbuf, len, "sub"))
7050             return yyl_sub(aTHX_ s, my);
7051         PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7052         if (!PL_in_my_stash) {
7053             char tmpbuf[1024];
7054             int i;
7055             PL_bufptr = s;
7056             i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7057             PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf));
7058             yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7059         }
7060     }
7061     else if (*s == '\\') {
7062         if (!FEATURE_MYREF_IS_ENABLED)
7063             Perl_croak(aTHX_ "The experimental declared_refs "
7064                              "feature is not enabled");
7065         Perl_ck_warner_d(aTHX_
7066              packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
7067             "Declaring references is experimental");
7068     }
7069     OPERATOR(MY);
7070 }
7071
7072 static int yyl_try(pTHX_ char*);
7073
7074 static bool
7075 yyl_eol_needs_semicolon(pTHX_ char **ps)
7076 {
7077     char *s = *ps;
7078     if (PL_lex_state != LEX_NORMAL
7079         || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
7080     {
7081         const bool in_comment = *s == '#';
7082         char *d;
7083         if (*s == '#' && s == PL_linestart && PL_in_eval
7084          && !PL_rsfp && !PL_parser->filtered) {
7085             /* handle eval qq[#line 1 "foo"\n ...] */
7086             CopLINE_dec(PL_curcop);
7087             incline(s, PL_bufend);
7088         }
7089         d = s;
7090         while (d < PL_bufend && *d != '\n')
7091             d++;
7092         if (d < PL_bufend)
7093             d++;
7094         s = d;
7095         if (in_comment && d == PL_bufend
7096             && PL_lex_state == LEX_INTERPNORMAL
7097             && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
7098             && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
7099         else
7100             incline(s, PL_bufend);
7101         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7102             PL_lex_state = LEX_FORMLINE;
7103             force_next(FORMRBRACK);
7104             *ps = s;
7105             return TRUE;
7106         }
7107     }
7108     else {
7109         while (s < PL_bufend && *s != '\n')
7110             s++;
7111         if (s < PL_bufend) {
7112             s++;
7113             if (s < PL_bufend)
7114                 incline(s, PL_bufend);
7115         }
7116     }
7117     *ps = s;
7118     return FALSE;
7119 }
7120
7121 static int
7122 yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s)
7123 {
7124     char *d;
7125
7126     goto start;
7127
7128     do {
7129         fake_eof = 0;
7130         bof = cBOOL(PL_rsfp);
7131       start:
7132
7133         PL_bufptr = PL_bufend;
7134         COPLINE_INC_WITH_HERELINES;
7135         if (!lex_next_chunk(fake_eof)) {
7136             CopLINE_dec(PL_curcop);
7137             s = PL_bufptr;
7138             TOKEN(PERLY_SEMICOLON);     /* not infinite loop because rsfp is NULL now */
7139         }
7140         CopLINE_dec(PL_curcop);
7141         s = PL_bufptr;
7142         /* If it looks like the start of a BOM or raw UTF-16,
7143          * check if it in fact is. */
7144         if (bof && PL_rsfp
7145             && (   *s == 0
7146                 || *(U8*)s == BOM_UTF8_FIRST_BYTE
7147                 || *(U8*)s >= 0xFE
7148                 || s[1] == 0))
7149         {
7150             Off_t offset = (IV)PerlIO_tell(PL_rsfp);
7151             bof = (offset == (Off_t)SvCUR(PL_linestr));
7152 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
7153             /* offset may include swallowed CR */
7154             if (!bof)
7155                 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
7156 #endif
7157             if (bof) {
7158                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7159                 s = swallow_bom((U8*)s);
7160             }
7161         }
7162         if (PL_parser->in_pod) {
7163             /* Incest with pod. */
7164             if (    memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
7165                 && !isALPHA(s[4]))
7166             {
7167                 SvPVCLEAR(PL_linestr);
7168                 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7169                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7170                 PL_last_lop = PL_last_uni = NULL;
7171                 PL_parser->in_pod = 0;
7172             }
7173         }
7174         if (PL_rsfp || PL_parser->filtered)
7175             incline(s, PL_bufend);
7176     } while (PL_parser->in_pod);
7177
7178     PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
7179     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7180     PL_last_lop = PL_last_uni = NULL;
7181     if (CopLINE(PL_curcop) == 1) {
7182         while (s < PL_bufend && isSPACE(*s))
7183             s++;
7184         if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
7185             s++;
7186         d = NULL;
7187         if (!PL_in_eval) {
7188             if (*s == '#' && *(s+1) == '!')
7189                 d = s + 2;
7190 #ifdef ALTERNATE_SHEBANG
7191             else {
7192                 static char const as[] = ALTERNATE_SHEBANG;
7193                 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
7194                     d = s + (sizeof(as) - 1);
7195             }
7196 #endif /* ALTERNATE_SHEBANG */
7197         }
7198         if (d) {
7199             char *ipath;
7200             char *ipathend;
7201
7202             while (isSPACE(*d))
7203                 d++;
7204             ipath = d;
7205             while (*d && !isSPACE(*d))
7206                 d++;
7207             ipathend = d;
7208
7209 #ifdef ARG_ZERO_IS_SCRIPT
7210             if (ipathend > ipath) {
7211                 /*
7212                  * HP-UX (at least) sets argv[0] to the script name,
7213                  * which makes $^X incorrect.  And Digital UNIX and Linux,
7214                  * at least, set argv[0] to the basename of the Perl
7215                  * interpreter. So, having found "#!", we'll set it right.
7216                  */
7217                 SV* copfilesv = CopFILESV(PL_curcop);
7218                 if (copfilesv) {
7219                     SV * const x =
7220                         GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
7221                                          SVt_PV)); /* $^X */
7222                     assert(SvPOK(x) || SvGMAGICAL(x));
7223                     if (sv_eq(x, copfilesv)) {
7224                         sv_setpvn(x, ipath, ipathend - ipath);
7225                         SvSETMAGIC(x);
7226                     }
7227                     else {
7228                         STRLEN blen;
7229                         STRLEN llen;
7230                         const char *bstart = SvPV_const(copfilesv, blen);
7231                         const char * const lstart = SvPV_const(x, llen);
7232                         if (llen < blen) {
7233                             bstart += blen - llen;
7234                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
7235                                 sv_setpvn(x, ipath, ipathend - ipath);
7236                                 SvSETMAGIC(x);
7237                             }
7238                         }
7239                     }
7240                 }
7241                 else {
7242                     /* Anything to do if no copfilesv? */
7243                 }
7244                 TAINT_NOT;      /* $^X is always tainted, but that's OK */
7245             }
7246 #endif /* ARG_ZERO_IS_SCRIPT */
7247
7248             /*
7249              * Look for options.
7250              */
7251             d = instr(s,"perl -");
7252             if (!d) {
7253                 d = instr(s,"perl");
7254 #if defined(DOSISH)
7255                 /* avoid getting into infinite loops when shebang
7256                  * line contains "Perl" rather than "perl" */
7257                 if (!d) {
7258                     for (d = ipathend-4; d >= ipath; --d) {
7259                         if (isALPHA_FOLD_EQ(*d, 'p')
7260                             && !ibcmp(d, "perl", 4))
7261                         {
7262                             break;
7263                         }
7264                     }
7265                     if (d < ipath)
7266                         d = NULL;
7267                 }
7268 #endif
7269             }
7270 #ifdef ALTERNATE_SHEBANG
7271             /*
7272              * If the ALTERNATE_SHEBANG on this system starts with a
7273              * character that can be part of a Perl expression, then if
7274              * we see it but not "perl", we're probably looking at the
7275              * start of Perl code, not a request to hand off to some
7276              * other interpreter.  Similarly, if "perl" is there, but
7277              * not in the first 'word' of the line, we assume the line
7278              * contains the start of the Perl program.
7279              */
7280             if (d && *s != '#') {
7281                 const char *c = ipath;
7282                 while (*c && !memCHRs("; \t\r\n\f\v#", *c))
7283                     c++;
7284                 if (c < d)
7285                     d = NULL;   /* "perl" not in first word; ignore */
7286                 else
7287                     *s = '#';   /* Don't try to parse shebang line */
7288             }
7289 #endif /* ALTERNATE_SHEBANG */
7290             if (!d
7291                 && *s == '#'
7292                 && ipathend > ipath
7293                 && !PL_minus_c
7294                 && !instr(s,"indir")
7295                 && instr(PL_origargv[0],"perl"))
7296             {
7297                 char **newargv;
7298
7299                 *ipathend = '\0';
7300                 s = ipathend + 1;
7301                 while (s < PL_bufend && isSPACE(*s))
7302                     s++;
7303                 if (s < PL_bufend) {
7304                     Newx(newargv,PL_origargc+3,char*);
7305                     newargv[1] = s;
7306                     while (s < PL_bufend && !isSPACE(*s))
7307                         s++;
7308                     *s = '\0';
7309                     Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
7310                 }
7311                 else
7312                     newargv = PL_origargv;
7313                 newargv[0] = ipath;
7314                 PERL_FPU_PRE_EXEC
7315                 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
7316                 PERL_FPU_POST_EXEC
7317                 Perl_croak(aTHX_ "Can't exec %s", ipath);
7318             }
7319             if (d) {
7320                 while (*d && !isSPACE(*d))
7321                     d++;
7322                 while (SPACE_OR_TAB(*d))
7323                     d++;
7324
7325                 if (*d++ == '-') {
7326                     const bool switches_done = PL_doswitches;
7327                     const U32 oldpdb = PL_perldb;
7328                     const bool oldn = PL_minus_n;
7329                     const bool oldp = PL_minus_p;
7330                     const char *d1 = d;
7331
7332                     do {
7333                         bool baduni = FALSE;
7334                         if (*d1 == 'C') {
7335                             const char *d2 = d1 + 1;
7336                             if (parse_unicode_opts((const char **)&d2)
7337                                 != PL_unicode)
7338                                 baduni = TRUE;
7339                         }
7340                         if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
7341                             const char * const m = d1;
7342                             while (*d1 && !isSPACE(*d1))
7343                                 d1++;
7344                             Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
7345                                   (int)(d1 - m), m);
7346                         }
7347                         d1 = moreswitches(d1);
7348                     } while (d1);
7349                     if (PL_doswitches && !switches_done) {
7350                         int argc = PL_origargc;
7351                         char **argv = PL_origargv;
7352                         do {
7353                             argc--,argv++;
7354                         } while (argc && argv[0][0] == '-' && argv[0][1]);
7355                         init_argv_symbols(argc,argv);
7356                     }
7357                     if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
7358                         || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
7359                           /* if we have already added "LINE: while (<>) {",
7360                              we must not do it again */
7361                     {
7362                         SvPVCLEAR(PL_linestr);
7363                         PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7364                         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7365                         PL_last_lop = PL_last_uni = NULL;
7366                         PL_preambled = FALSE;
7367                         if (PERLDB_LINE_OR_SAVESRC)
7368                             (void)gv_fetchfile(PL_origfilename);
7369                         return YYL_RETRY;
7370                     }
7371                 }
7372             }
7373         }
7374     }
7375
7376     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7377         PL_lex_state = LEX_FORMLINE;
7378         force_next(FORMRBRACK);
7379         TOKEN(PERLY_SEMICOLON);
7380     }
7381
7382     PL_bufptr = s;
7383     return YYL_RETRY;
7384 }
7385
7386 static int
7387 yyl_fatcomma(pTHX_ char *s, STRLEN len)
7388 {
7389     CLINE;
7390     pl_yylval.opval
7391         = newSVOP(OP_CONST, 0,
7392                        S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7393     pl_yylval.opval->op_private = OPpCONST_BARE;
7394     TERM(BAREWORD);
7395 }
7396
7397 static int
7398 yyl_safe_bareword(pTHX_ char *s, const char lastchar)
7399 {
7400     if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7401         && PL_parser->saw_infix_sigil)
7402     {
7403         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7404                          "Operator or semicolon missing before %c%" UTF8f,
7405                          lastchar,
7406                          UTF8fARG(UTF, strlen(PL_tokenbuf),
7407                                   PL_tokenbuf));
7408         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7409                          "Ambiguous use of %c resolved as operator %c",
7410                          lastchar, lastchar);
7411     }
7412     TOKEN(BAREWORD);
7413 }
7414
7415 static int
7416 yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off)
7417 {
7418     if (sv) {
7419         op_free(rv2cv_op);
7420         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7421         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7422         if (SvTYPE(sv) == SVt_PVAV)
7423             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7424                                       pl_yylval.opval);
7425         else {
7426             pl_yylval.opval->op_private = 0;
7427             pl_yylval.opval->op_folded = 1;
7428             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7429         }
7430         TOKEN(BAREWORD);
7431     }
7432
7433     op_free(pl_yylval.opval);
7434     pl_yylval.opval =
7435         off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7436     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7437     PL_last_lop = PL_oldbufptr;
7438     PL_last_lop_op = OP_ENTERSUB;
7439
7440     /* Is there a prototype? */
7441     if (SvPOK(cv)) {
7442         int k = yyl_subproto(aTHX_ s, cv);
7443         if (k != KEY_NULL)
7444             return k;
7445     }
7446
7447     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7448     PL_expect = XTERM;
7449     force_next(off ? PRIVATEREF : BAREWORD);
7450     if (!PL_lex_allbrackets
7451         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7452     {
7453         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7454     }
7455
7456     TOKEN(NOAMP);
7457 }
7458
7459 /* Honour "reserved word" warnings, and enforce strict subs */
7460 static void
7461 yyl_strictwarn_bareword(pTHX_ const char lastchar)
7462 {
7463     /* after "print" and similar functions (corresponding to
7464      * "F? L" in opcode.pl), whatever wasn't already parsed as
7465      * a filehandle should be subject to "strict subs".
7466      * Likewise for the optional indirect-object argument to system
7467      * or exec, which can't be a bareword */
7468     if ((PL_last_lop_op == OP_PRINT
7469             || PL_last_lop_op == OP_PRTF
7470             || PL_last_lop_op == OP_SAY
7471             || PL_last_lop_op == OP_SYSTEM
7472             || PL_last_lop_op == OP_EXEC)
7473         && (PL_hints & HINT_STRICT_SUBS))
7474     {
7475         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7476     }
7477
7478     if (lastchar != '-' && ckWARN(WARN_RESERVED)) {
7479         char *d = PL_tokenbuf;
7480         while (isLOWER(*d))
7481             d++;
7482         if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7483             /* PL_warn_reserved is constant */
7484             GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7485             Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7486                         PL_tokenbuf);
7487             GCC_DIAG_RESTORE_STMT;
7488         }
7489     }
7490 }
7491
7492 static int
7493 yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c)
7494 {
7495     int pkgname = 0;
7496     const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7497     bool safebw;
7498     bool no_op_error = FALSE;
7499     /* Use this var to track whether intuit_method has been
7500        called.  intuit_method returns 0 or > 255.  */
7501     int key = 1;
7502
7503     if (PL_expect == XOPERATOR) {
7504         if (PL_bufptr == PL_linestart) {
7505             CopLINE_dec(PL_curcop);
7506             Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7507             CopLINE_inc(PL_curcop);
7508         }
7509         else
7510             /* We want to call no_op with s pointing after the
7511                bareword, so defer it.  But we want it to come
7512                before the Bad name croak.  */
7513             no_op_error = TRUE;
7514     }
7515
7516     /* Get the rest if it looks like a package qualifier */
7517
7518     if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7519         STRLEN morelen;
7520         s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7521                       TRUE, &morelen);
7522         if (no_op_error) {
7523             no_op("Bareword",s);
7524             no_op_error = FALSE;
7525         }
7526         if (!morelen)
7527             Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7528                     UTF8fARG(UTF, len, PL_tokenbuf),
7529                     *s == '\'' ? "'" : "::");
7530         len += morelen;
7531         pkgname = 1;
7532     }
7533
7534     if (no_op_error)
7535         no_op("Bareword",s);
7536
7537     /* See if the name is "Foo::",
7538        in which case Foo is a bareword
7539        (and a package name). */
7540
7541     if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') {
7542         if (ckWARN(WARN_BAREWORD)
7543             && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7544             Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7545                         "Bareword \"%" UTF8f
7546                         "\" refers to nonexistent package",
7547                         UTF8fARG(UTF, len, PL_tokenbuf));
7548         len -= 2;
7549         PL_tokenbuf[len] = '\0';
7550         c.gv = NULL;
7551         c.gvp = 0;
7552         safebw = TRUE;
7553     }
7554     else {
7555         safebw = FALSE;
7556     }
7557
7558     /* if we saw a global override before, get the right name */
7559
7560     if (!c.sv)
7561         c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len);
7562     if (c.gvp) {
7563         SV *sv = newSVpvs("CORE::GLOBAL::");
7564         sv_catsv(sv, c.sv);
7565         SvREFCNT_dec(c.sv);
7566         c.sv = sv;
7567     }
7568
7569     /* Presume this is going to be a bareword of some sort. */
7570     CLINE;
7571     pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv);
7572     pl_yylval.opval->op_private = OPpCONST_BARE;
7573
7574     /* And if "Foo::", then that's what it certainly is. */
7575     if (safebw)
7576         return yyl_safe_bareword(aTHX_ s, lastchar);
7577
7578     if (!c.off) {
7579         OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv));
7580         const_op->op_private = OPpCONST_BARE;
7581         c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7582         c.cv = c.lex
7583             ? isGV(c.gv)
7584                 ? GvCV(c.gv)
7585                 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV
7586                     ? (CV *)SvRV(c.gv)
7587                     : ((CV *)c.gv)
7588             : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB);
7589     }
7590
7591     /* See if it's the indirect object for a list operator. */
7592
7593     if (PL_oldoldbufptr
7594         && PL_oldoldbufptr < PL_bufptr
7595         && (PL_oldoldbufptr == PL_last_lop
7596             || PL_oldoldbufptr == PL_last_uni)
7597         && /* NO SKIPSPACE BEFORE HERE! */
7598            (PL_expect == XREF
7599             || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7600                                                    == OA_FILEREF))
7601     {
7602         bool immediate_paren = *s == '(';
7603         SSize_t s_off;
7604
7605         /* (Now we can afford to cross potential line boundary.) */
7606         s = skipspace(s);
7607
7608         /* intuit_method() can indirectly call lex_next_chunk(),
7609          * invalidating s
7610          */
7611         s_off = s - SvPVX(PL_linestr);
7612         /* Two barewords in a row may indicate method call. */
7613         if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7614                 || *s == '$')
7615             && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7616         {
7617             /* the code at method: doesn't use s */
7618             goto method;
7619         }
7620         s = SvPVX(PL_linestr) + s_off;
7621
7622         if (((PL_opargs[PL_last_lop_op] >> OASHIFT) & 7) == OA_FILEREF
7623             && !immediate_paren && !c.cv
7624             && !FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) {
7625             no_bareword_filehandle(PL_tokenbuf);
7626         }
7627
7628         /* If not a declared subroutine, it's an indirect object. */
7629         /* (But it's an indir obj regardless for sort.) */
7630         /* Also, if "_" follows a filetest operator, it's a bareword */
7631
7632         if (
7633             ( !immediate_paren && (PL_last_lop_op == OP_SORT
7634              || (!c.cv
7635                  && (PL_last_lop_op != OP_MAPSTART
7636                      && PL_last_lop_op != OP_GREPSTART))))
7637            || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7638                 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7639                                                 == OA_FILESTATOP))
7640            )
7641         {
7642             PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7643             yyl_strictwarn_bareword(aTHX_ lastchar);
7644             op_free(c.rv2cv_op);
7645             return yyl_safe_bareword(aTHX_ s, lastchar);
7646         }
7647     }
7648
7649     PL_expect = XOPERATOR;
7650     s = skipspace(s);
7651
7652     /* Is this a word before a => operator? */
7653     if (*s == '=' && s[1] == '>' && !pkgname) {
7654         op_free(c.rv2cv_op);
7655         CLINE;
7656         if (c.gvp || (c.lex && !c.off)) {
7657             assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7658             /* This is our own scalar, created a few lines
7659                above, so this is safe. */
7660             SvREADONLY_off(c.sv);
7661             sv_setpv(c.sv, PL_tokenbuf);
7662             if (UTF && !IN_BYTES
7663              && is_utf8_string((U8*)PL_tokenbuf, len))
7664                   SvUTF8_on(c.sv);
7665             SvREADONLY_on(c.sv);
7666         }
7667         TERM(BAREWORD);
7668     }
7669
7670     /* If followed by a paren, it's certainly a subroutine. */
7671     if (*s == '(') {
7672         CLINE;
7673         if (c.cv) {
7674             char *d = s + 1;
7675             while (SPACE_OR_TAB(*d))
7676                 d++;
7677             if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv)))
7678                 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off);
7679         }
7680         NEXTVAL_NEXTTOKE.opval =
7681             c.off ? c.rv2cv_op : pl_yylval.opval;
7682         if (c.off)
7683              op_free(pl_yylval.opval), force_next(PRIVATEREF);
7684         else op_free(c.rv2cv_op),      force_next(BAREWORD);
7685         pl_yylval.ival = 0;
7686         TOKEN(PERLY_AMPERSAND);
7687     }
7688
7689     /* If followed by var or block, call it a method (unless sub) */
7690
7691     if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) {
7692         op_free(c.rv2cv_op);
7693         PL_last_lop = PL_oldbufptr;
7694         PL_last_lop_op = OP_METHOD;
7695         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7696             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7697         PL_expect = XBLOCKTERM;
7698         PL_bufptr = s;
7699         return REPORT(METHOD);
7700     }
7701
7702     /* If followed by a bareword, see if it looks like indir obj. */
7703
7704     if (   key == 1
7705         && !orig_keyword
7706         && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7707         && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7708     {
7709       method:
7710         if (c.lex && !c.off) {
7711             assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7712             SvREADONLY_off(c.sv);
7713             sv_setpvn(c.sv, PL_tokenbuf, len);
7714             if (UTF && !IN_BYTES
7715              && is_utf8_string((U8*)PL_tokenbuf, len))
7716                 SvUTF8_on(c.sv);
7717             else SvUTF8_off(c.sv);
7718         }
7719         op_free(c.rv2cv_op);
7720         if (key == METHOD && !PL_lex_allbrackets
7721             && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7722         {
7723             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7724         }
7725         return REPORT(key);
7726     }
7727
7728     /* Not a method, so call it a subroutine (if defined) */
7729
7730     if (c.cv) {
7731         /* Check for a constant sub */
7732         c.sv = cv_const_sv_or_av(c.cv);
7733         return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off);
7734     }
7735
7736     /* Call it a bare word */
7737
7738     if (PL_hints & HINT_STRICT_SUBS)
7739         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7740     else
7741         yyl_strictwarn_bareword(aTHX_ lastchar);
7742
7743     op_free(c.rv2cv_op);
7744
7745     return yyl_safe_bareword(aTHX_ s, lastchar);
7746 }
7747
7748 static int
7749 yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c)
7750 {
7751     switch (key) {
7752     default:                    /* not a keyword */
7753         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7754
7755     case KEY___FILE__:
7756         FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) );
7757
7758     case KEY___LINE__:
7759         FUN0OP(
7760             newSVOP(OP_CONST, 0,
7761                 Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7762         );
7763
7764     case KEY___PACKAGE__:
7765         FUN0OP(
7766             newSVOP(OP_CONST, 0, (PL_curstash
7767                                      ? newSVhek(HvNAME_HEK(PL_curstash))
7768                                      : &PL_sv_undef))
7769         );
7770
7771     case KEY___DATA__:
7772     case KEY___END__:
7773         if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D'))
7774             yyl_data_handle(aTHX);
7775         return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s);
7776
7777     case KEY___SUB__:
7778         FUN0OP(CvCLONE(PL_compcv)
7779                     ? newOP(OP_RUNCV, 0)
7780                     : newPVOP(OP_RUNCV,0,NULL));
7781
7782     case KEY_AUTOLOAD:
7783     case KEY_DESTROY:
7784     case KEY_BEGIN:
7785     case KEY_UNITCHECK:
7786     case KEY_CHECK:
7787     case KEY_INIT:
7788     case KEY_END:
7789         if (PL_expect == XSTATE)
7790             return yyl_sub(aTHX_ PL_bufptr, key);
7791         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7792
7793     case KEY_abs:
7794         UNI(OP_ABS);
7795
7796     case KEY_alarm:
7797         UNI(OP_ALARM);
7798
7799     case KEY_accept:
7800         LOP(OP_ACCEPT,XTERM);
7801
7802     case KEY_and:
7803         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7804             return REPORT(0);
7805         OPERATOR(ANDOP);
7806
7807     case KEY_atan2:
7808         LOP(OP_ATAN2,XTERM);
7809
7810     case KEY_bind:
7811         LOP(OP_BIND,XTERM);
7812
7813     case KEY_binmode:
7814         LOP(OP_BINMODE,XTERM);
7815
7816     case KEY_bless:
7817         LOP(OP_BLESS,XTERM);
7818
7819     case KEY_break:
7820         FUN0(OP_BREAK);
7821
7822     case KEY_catch:
7823         Perl_ck_warner_d(aTHX_
7824             packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
7825         PREBLOCK(CATCH);
7826
7827     case KEY_chop:
7828         UNI(OP_CHOP);
7829
7830     case KEY_continue:
7831         /* We have to disambiguate the two senses of
7832           "continue". If the next token is a '{' then
7833           treat it as the start of a continue block;
7834           otherwise treat it as a control operator.
7835          */
7836         s = skipspace(s);
7837         if (*s == '{')
7838             PREBLOCK(CONTINUE);
7839         else
7840             FUN0(OP_CONTINUE);
7841
7842     case KEY_chdir:
7843         /* may use HOME */
7844         (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7845         UNI(OP_CHDIR);
7846
7847     case KEY_close:
7848         UNI(OP_CLOSE);
7849
7850     case KEY_closedir:
7851         UNI(OP_CLOSEDIR);
7852
7853     case KEY_cmp:
7854         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7855             return REPORT(0);
7856         NCEop(OP_SCMP);
7857
7858     case KEY_caller:
7859         UNI(OP_CALLER);
7860
7861     case KEY_crypt:
7862
7863         LOP(OP_CRYPT,XTERM);
7864
7865     case KEY_chmod:
7866         LOP(OP_CHMOD,XTERM);
7867
7868     case KEY_chown:
7869         LOP(OP_CHOWN,XTERM);
7870
7871     case KEY_connect:
7872         LOP(OP_CONNECT,XTERM);
7873
7874     case KEY_chr:
7875         UNI(OP_CHR);
7876
7877     case KEY_cos:
7878         UNI(OP_COS);
7879
7880     case KEY_chroot:
7881         UNI(OP_CHROOT);
7882
7883     case KEY_default:
7884         PREBLOCK(DEFAULT);
7885
7886     case KEY_defer:
7887         Perl_ck_warner_d(aTHX_
7888             packWARN(WARN_EXPERIMENTAL__DEFER), "defer is experimental");
7889         PREBLOCK(DEFER);
7890
7891     case KEY_do:
7892         return yyl_do(aTHX_ s, orig_keyword);
7893
7894     case KEY_die:
7895         PL_hints |= HINT_BLOCK_SCOPE;
7896         LOP(OP_DIE,XTERM);
7897
7898     case KEY_defined:
7899         UNI(OP_DEFINED);
7900
7901     case KEY_delete:
7902         UNI(OP_DELETE);
7903
7904     case KEY_dbmopen:
7905         Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7906                           STR_WITH_LEN("NDBM_File::"),
7907                           STR_WITH_LEN("DB_File::"),
7908                           STR_WITH_LEN("GDBM_File::"),
7909                           STR_WITH_LEN("SDBM_File::"),
7910                           STR_WITH_LEN("ODBM_File::"),
7911                           NULL);
7912         LOP(OP_DBMOPEN,XTERM);
7913
7914     case KEY_dbmclose:
7915         UNI(OP_DBMCLOSE);
7916
7917     case KEY_dump:
7918         LOOPX(OP_DUMP);
7919
7920     case KEY_else:
7921         PREBLOCK(ELSE);
7922
7923     case KEY_elsif:
7924         pl_yylval.ival = CopLINE(PL_curcop);
7925         OPERATOR(ELSIF);
7926
7927     case KEY_eq:
7928         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7929             return REPORT(0);
7930         ChEop(OP_SEQ);
7931
7932     case KEY_exists:
7933         UNI(OP_EXISTS);
7934
7935     case KEY_exit:
7936         UNI(OP_EXIT);
7937
7938     case KEY_eval:
7939         s = skipspace(s);
7940         if (*s == '{') { /* block eval */
7941             PL_expect = XTERMBLOCK;
7942             UNIBRACK(OP_ENTERTRY);
7943         }
7944         else { /* string eval */
7945             PL_expect = XTERM;
7946             UNIBRACK(OP_ENTEREVAL);
7947         }
7948
7949     case KEY_evalbytes:
7950         PL_expect = XTERM;
7951         UNIBRACK(-OP_ENTEREVAL);
7952
7953     case KEY_eof:
7954         UNI(OP_EOF);
7955
7956     case KEY_exp:
7957         UNI(OP_EXP);
7958
7959     case KEY_each:
7960         UNI(OP_EACH);
7961
7962     case KEY_exec:
7963         LOP(OP_EXEC,XREF);
7964
7965     case KEY_endhostent:
7966         FUN0(OP_EHOSTENT);
7967
7968     case KEY_endnetent:
7969         FUN0(OP_ENETENT);
7970
7971     case KEY_endservent:
7972         FUN0(OP_ESERVENT);
7973
7974     case KEY_endprotoent:
7975         FUN0(OP_EPROTOENT);
7976
7977     case KEY_endpwent:
7978         FUN0(OP_EPWENT);
7979
7980     case KEY_endgrent:
7981         FUN0(OP_EGRENT);
7982
7983     case KEY_finally:
7984         Perl_ck_warner_d(aTHX_
7985             packWARN(WARN_EXPERIMENTAL__TRY), "try/catch/finally is experimental");
7986         PREBLOCK(FINALLY);
7987
7988     case KEY_for:
7989     case KEY_foreach:
7990         return yyl_foreach(aTHX_ s);
7991
7992     case KEY_formline:
7993         LOP(OP_FORMLINE,XTERM);
7994
7995     case KEY_fork:
7996         FUN0(OP_FORK);
7997
7998     case KEY_fc:
7999         UNI(OP_FC);
8000
8001     case KEY_fcntl:
8002         LOP(OP_FCNTL,XTERM);
8003
8004     case KEY_fileno:
8005         UNI(OP_FILENO);
8006
8007     case KEY_flock:
8008         LOP(OP_FLOCK,XTERM);
8009
8010     case KEY_gt:
8011         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8012             return REPORT(0);
8013         ChRop(OP_SGT);
8014
8015     case KEY_ge:
8016         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8017             return REPORT(0);
8018         ChRop(OP_SGE);
8019
8020     case KEY_grep:
8021         LOP(OP_GREPSTART, XREF);
8022
8023     case KEY_goto:
8024         LOOPX(OP_GOTO);
8025
8026     case KEY_gmtime:
8027         UNI(OP_GMTIME);
8028
8029     case KEY_getc:
8030         UNIDOR(OP_GETC);
8031
8032     case KEY_getppid:
8033         FUN0(OP_GETPPID);
8034
8035     case KEY_getpgrp:
8036         UNI(OP_GETPGRP);
8037
8038     case KEY_getpriority:
8039         LOP(OP_GETPRIORITY,XTERM);
8040
8041     case KEY_getprotobyname:
8042         UNI(OP_GPBYNAME);
8043
8044     case KEY_getprotobynumber:
8045         LOP(OP_GPBYNUMBER,XTERM);
8046
8047     case KEY_getprotoent:
8048         FUN0(OP_GPROTOENT);
8049
8050     case KEY_getpwent:
8051         FUN0(OP_GPWENT);
8052
8053     case KEY_getpwnam:
8054         UNI(OP_GPWNAM);
8055
8056     case KEY_getpwuid:
8057         UNI(OP_GPWUID);
8058
8059     case KEY_getpeername:
8060         UNI(OP_GETPEERNAME);
8061
8062     case KEY_gethostbyname:
8063         UNI(OP_GHBYNAME);
8064
8065     case KEY_gethostbyaddr:
8066         LOP(OP_GHBYADDR,XTERM);
8067
8068     case KEY_gethostent:
8069         FUN0(OP_GHOSTENT);
8070
8071     case KEY_getnetbyname:
8072         UNI(OP_GNBYNAME);
8073
8074     case KEY_getnetbyaddr:
8075         LOP(OP_GNBYADDR,XTERM);
8076
8077     case KEY_getnetent:
8078         FUN0(OP_GNETENT);
8079
8080     case KEY_getservbyname:
8081         LOP(OP_GSBYNAME,XTERM);
8082
8083     case KEY_getservbyport:
8084         LOP(OP_GSBYPORT,XTERM);
8085
8086     case KEY_getservent:
8087         FUN0(OP_GSERVENT);
8088
8089     case KEY_getsockname:
8090         UNI(OP_GETSOCKNAME);
8091
8092     case KEY_getsockopt:
8093         LOP(OP_GSOCKOPT,XTERM);
8094
8095     case KEY_getgrent:
8096         FUN0(OP_GGRENT);
8097
8098     case KEY_getgrnam:
8099         UNI(OP_GGRNAM);
8100
8101     case KEY_getgrgid:
8102         UNI(OP_GGRGID);
8103
8104     case KEY_getlogin:
8105         FUN0(OP_GETLOGIN);
8106
8107     case KEY_given:
8108         pl_yylval.ival = CopLINE(PL_curcop);
8109         Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8110                          "given is experimental");
8111         OPERATOR(GIVEN);
8112
8113     case KEY_glob:
8114         LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
8115
8116     case KEY_hex:
8117         UNI(OP_HEX);
8118
8119     case KEY_if:
8120         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8121             return REPORT(0);
8122         pl_yylval.ival = CopLINE(PL_curcop);
8123         OPERATOR(IF);
8124
8125     case KEY_index:
8126         LOP(OP_INDEX,XTERM);
8127
8128     case KEY_int:
8129         UNI(OP_INT);
8130
8131     case KEY_ioctl:
8132         LOP(OP_IOCTL,XTERM);
8133
8134     case KEY_isa:
8135         Perl_ck_warner_d(aTHX_
8136             packWARN(WARN_EXPERIMENTAL__ISA), "isa is experimental");
8137         NCRop(OP_ISA);
8138
8139     case KEY_join:
8140         LOP(OP_JOIN,XTERM);
8141
8142     case KEY_keys:
8143         UNI(OP_KEYS);
8144
8145     case KEY_kill:
8146         LOP(OP_KILL,XTERM);
8147
8148     case KEY_last:
8149         LOOPX(OP_LAST);
8150
8151     case KEY_lc:
8152         UNI(OP_LC);
8153
8154     case KEY_lcfirst:
8155         UNI(OP_LCFIRST);
8156
8157     case KEY_local:
8158         OPERATOR(LOCAL);
8159
8160     case KEY_length:
8161         UNI(OP_LENGTH);
8162
8163     case KEY_lt:
8164         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8165             return REPORT(0);
8166         ChRop(OP_SLT);
8167
8168     case KEY_le:
8169         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8170             return REPORT(0);
8171         ChRop(OP_SLE);
8172
8173     case KEY_localtime:
8174         UNI(OP_LOCALTIME);
8175
8176     case KEY_log:
8177         UNI(OP_LOG);
8178
8179     case KEY_link:
8180         LOP(OP_LINK,XTERM);
8181
8182     case KEY_listen:
8183         LOP(OP_LISTEN,XTERM);
8184
8185     case KEY_lock:
8186         UNI(OP_LOCK);
8187
8188     case KEY_lstat:
8189         UNI(OP_LSTAT);
8190
8191     case KEY_m:
8192         s = scan_pat(s,OP_MATCH);
8193         TERM(sublex_start());
8194
8195     case KEY_map:
8196         LOP(OP_MAPSTART, XREF);
8197
8198     case KEY_mkdir:
8199         LOP(OP_MKDIR,XTERM);
8200
8201     case KEY_msgctl:
8202         LOP(OP_MSGCTL,XTERM);
8203
8204     case KEY_msgget:
8205         LOP(OP_MSGGET,XTERM);
8206
8207     case KEY_msgrcv:
8208         LOP(OP_MSGRCV,XTERM);
8209
8210     case KEY_msgsnd:
8211         LOP(OP_MSGSND,XTERM);
8212
8213     case KEY_our:
8214     case KEY_my:
8215     case KEY_state:
8216         return yyl_my(aTHX_ s, key);
8217
8218     case KEY_next:
8219         LOOPX(OP_NEXT);
8220
8221     case KEY_ne:
8222         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8223             return REPORT(0);
8224         ChEop(OP_SNE);
8225
8226     case KEY_no:
8227         s = tokenize_use(0, s);
8228         TOKEN(USE);
8229
8230     case KEY_not:
8231         if (*s == '(' || (s = skipspace(s), *s == '('))
8232             FUN1(OP_NOT);
8233         else {
8234             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8235                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8236             OPERATOR(NOTOP);
8237         }
8238
8239     case KEY_open:
8240         s = skipspace(s);
8241         if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8242             const char *t;
8243             char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8244             for (t=d; isSPACE(*t);)
8245                 t++;
8246             if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8247                 /* [perl #16184] */
8248                 && !(t[0] == '=' && t[1] == '>')
8249                 && !(t[0] == ':' && t[1] == ':')
8250                 && !keyword(s, d-s, 0)
8251             ) {
8252                 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8253                    "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8254                     UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8255             }
8256         }
8257         LOP(OP_OPEN,XTERM);
8258
8259     case KEY_or:
8260         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8261             return REPORT(0);
8262         pl_yylval.ival = OP_OR;
8263         OPERATOR(OROP);
8264
8265     case KEY_ord:
8266         UNI(OP_ORD);
8267
8268     case KEY_oct:
8269         UNI(OP_OCT);
8270
8271     case KEY_opendir:
8272         LOP(OP_OPEN_DIR,XTERM);
8273
8274     case KEY_print:
8275         checkcomma(s,PL_tokenbuf,"filehandle");
8276         LOP(OP_PRINT,XREF);
8277
8278     case KEY_printf:
8279         checkcomma(s,PL_tokenbuf,"filehandle");
8280         LOP(OP_PRTF,XREF);
8281
8282     case KEY_prototype:
8283         UNI(OP_PROTOTYPE);
8284
8285     case KEY_push:
8286         LOP(OP_PUSH,XTERM);
8287
8288     case KEY_pop:
8289         UNIDOR(OP_POP);
8290
8291     case KEY_pos:
8292         UNIDOR(OP_POS);
8293
8294     case KEY_pack:
8295         LOP(OP_PACK,XTERM);
8296
8297     case KEY_package:
8298         s = force_word(s,BAREWORD,FALSE,TRUE);
8299         s = skipspace(s);
8300         s = force_strict_version(s);
8301         PREBLOCK(PACKAGE);
8302
8303     case KEY_pipe:
8304         LOP(OP_PIPE_OP,XTERM);
8305
8306     case KEY_q:
8307         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8308         if (!s)
8309             missingterm(NULL, 0);
8310         COPLINE_SET_FROM_MULTI_END;
8311         pl_yylval.ival = OP_CONST;
8312         TERM(sublex_start());
8313
8314     case KEY_quotemeta:
8315         UNI(OP_QUOTEMETA);
8316
8317     case KEY_qw:
8318         return yyl_qw(aTHX_ s, len);
8319
8320     case KEY_qq:
8321         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8322         if (!s)
8323             missingterm(NULL, 0);
8324         pl_yylval.ival = OP_STRINGIFY;
8325         if (SvIVX(PL_lex_stuff) == '\'')
8326             SvIV_set(PL_lex_stuff, 0);  /* qq'$foo' should interpolate */
8327         TERM(sublex_start());
8328
8329     case KEY_qr:
8330         s = scan_pat(s,OP_QR);
8331         TERM(sublex_start());
8332
8333     case KEY_qx:
8334         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8335         if (!s)
8336             missingterm(NULL, 0);
8337         pl_yylval.ival = OP_BACKTICK;
8338         TERM(sublex_start());
8339
8340     case KEY_return:
8341         OLDLOP(OP_RETURN);
8342
8343     case KEY_require:
8344         return yyl_require(aTHX_ s, orig_keyword);
8345
8346     case KEY_reset:
8347         UNI(OP_RESET);
8348
8349     case KEY_redo:
8350         LOOPX(OP_REDO);
8351
8352     case KEY_rename:
8353         LOP(OP_RENAME,XTERM);
8354
8355     case KEY_rand:
8356         UNI(OP_RAND);
8357
8358     case KEY_rmdir:
8359         UNI(OP_RMDIR);
8360
8361     case KEY_rindex:
8362         LOP(OP_RINDEX,XTERM);
8363
8364     case KEY_read:
8365         LOP(OP_READ,XTERM);
8366
8367     case KEY_readdir:
8368         UNI(OP_READDIR);
8369
8370     case KEY_readline:
8371         UNIDOR(OP_READLINE);
8372
8373     case KEY_readpipe:
8374         UNIDOR(OP_BACKTICK);
8375
8376     case KEY_rewinddir:
8377         UNI(OP_REWINDDIR);
8378
8379     case KEY_recv:
8380         LOP(OP_RECV,XTERM);
8381
8382     case KEY_reverse:
8383         LOP(OP_REVERSE,XTERM);
8384
8385     case KEY_readlink:
8386         UNIDOR(OP_READLINK);
8387
8388     case KEY_ref:
8389         UNI(OP_REF);
8390
8391     case KEY_s:
8392         s = scan_subst(s);
8393         if (pl_yylval.opval)
8394             TERM(sublex_start());
8395         else
8396             TOKEN(1);   /* force error */
8397
8398     case KEY_say:
8399         checkcomma(s,PL_tokenbuf,"filehandle");
8400         LOP(OP_SAY,XREF);
8401
8402     case KEY_chomp:
8403         UNI(OP_CHOMP);
8404
8405     case KEY_scalar:
8406         UNI(OP_SCALAR);
8407
8408     case KEY_select:
8409         LOP(OP_SELECT,XTERM);
8410
8411     case KEY_seek:
8412         LOP(OP_SEEK,XTERM);
8413
8414     case KEY_semctl:
8415         LOP(OP_SEMCTL,XTERM);
8416
8417     case KEY_semget:
8418         LOP(OP_SEMGET,XTERM);
8419
8420     case KEY_semop:
8421         LOP(OP_SEMOP,XTERM);
8422
8423     case KEY_send:
8424         LOP(OP_SEND,XTERM);
8425
8426     case KEY_setpgrp:
8427         LOP(OP_SETPGRP,XTERM);
8428
8429     case KEY_setpriority:
8430         LOP(OP_SETPRIORITY,XTERM);
8431
8432     case KEY_sethostent:
8433         UNI(OP_SHOSTENT);
8434
8435     case KEY_setnetent:
8436         UNI(OP_SNETENT);
8437
8438     case KEY_setservent:
8439         UNI(OP_SSERVENT);
8440
8441     case KEY_setprotoent:
8442         UNI(OP_SPROTOENT);
8443
8444     case KEY_setpwent:
8445         FUN0(OP_SPWENT);
8446
8447     case KEY_setgrent:
8448         FUN0(OP_SGRENT);
8449
8450     case KEY_seekdir:
8451         LOP(OP_SEEKDIR,XTERM);
8452
8453     case KEY_setsockopt:
8454         LOP(OP_SSOCKOPT,XTERM);
8455
8456     case KEY_shift:
8457         UNIDOR(OP_SHIFT);
8458
8459     case KEY_shmctl:
8460         LOP(OP_SHMCTL,XTERM);
8461
8462     case KEY_shmget:
8463         LOP(OP_SHMGET,XTERM);
8464
8465     case KEY_shmread:
8466         LOP(OP_SHMREAD,XTERM);
8467
8468     case KEY_shmwrite:
8469         LOP(OP_SHMWRITE,XTERM);
8470
8471     case KEY_shutdown:
8472         LOP(OP_SHUTDOWN,XTERM);
8473
8474     case KEY_sin:
8475         UNI(OP_SIN);
8476
8477     case KEY_sleep:
8478         UNI(OP_SLEEP);
8479
8480     case KEY_socket:
8481         LOP(OP_SOCKET,XTERM);
8482
8483     case KEY_socketpair:
8484         LOP(OP_SOCKPAIR,XTERM);
8485
8486     case KEY_sort:
8487         checkcomma(s,PL_tokenbuf,"subroutine name");
8488         s = skipspace(s);
8489         PL_expect = XTERM;
8490         s = force_word(s,BAREWORD,TRUE,TRUE);
8491         LOP(OP_SORT,XREF);
8492
8493     case KEY_split:
8494         LOP(OP_SPLIT,XTERM);
8495
8496     case KEY_sprintf:
8497         LOP(OP_SPRINTF,XTERM);
8498
8499     case KEY_splice:
8500         LOP(OP_SPLICE,XTERM);
8501
8502     case KEY_sqrt:
8503         UNI(OP_SQRT);
8504
8505     case KEY_srand:
8506         UNI(OP_SRAND);
8507
8508     case KEY_stat:
8509         UNI(OP_STAT);
8510
8511     case KEY_study:
8512         UNI(OP_STUDY);
8513
8514     case KEY_substr:
8515         LOP(OP_SUBSTR,XTERM);
8516
8517     case KEY_format:
8518     case KEY_sub:
8519         return yyl_sub(aTHX_ s, key);
8520
8521     case KEY_system:
8522         LOP(OP_SYSTEM,XREF);
8523
8524     case KEY_symlink:
8525         LOP(OP_SYMLINK,XTERM);
8526
8527     case KEY_syscall:
8528         LOP(OP_SYSCALL,XTERM);
8529
8530     case KEY_sysopen:
8531         LOP(OP_SYSOPEN,XTERM);
8532
8533     case KEY_sysseek:
8534         LOP(OP_SYSSEEK,XTERM);
8535
8536     case KEY_sysread:
8537         LOP(OP_SYSREAD,XTERM);
8538
8539     case KEY_syswrite:
8540         LOP(OP_SYSWRITE,XTERM);
8541
8542     case KEY_tr:
8543     case KEY_y:
8544         s = scan_trans(s);
8545         TERM(sublex_start());
8546
8547     case KEY_tell:
8548         UNI(OP_TELL);
8549
8550     case KEY_telldir:
8551         UNI(OP_TELLDIR);
8552
8553     case KEY_tie:
8554         LOP(OP_TIE,XTERM);
8555
8556     case KEY_tied:
8557         UNI(OP_TIED);
8558
8559     case KEY_time:
8560         FUN0(OP_TIME);
8561
8562     case KEY_times:
8563         FUN0(OP_TMS);
8564
8565     case KEY_truncate:
8566         LOP(OP_TRUNCATE,XTERM);
8567
8568     case KEY_try:
8569         pl_yylval.ival = CopLINE(PL_curcop);
8570         Perl_ck_warner_d(aTHX_
8571             packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
8572         PREBLOCK(TRY);
8573
8574     case KEY_uc:
8575         UNI(OP_UC);
8576
8577     case KEY_ucfirst:
8578         UNI(OP_UCFIRST);
8579
8580     case KEY_untie:
8581         UNI(OP_UNTIE);
8582
8583     case KEY_until:
8584         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8585             return REPORT(0);
8586         pl_yylval.ival = CopLINE(PL_curcop);
8587         OPERATOR(UNTIL);
8588
8589     case KEY_unless:
8590         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8591             return REPORT(0);
8592         pl_yylval.ival = CopLINE(PL_curcop);
8593         OPERATOR(UNLESS);
8594
8595     case KEY_unlink:
8596         LOP(OP_UNLINK,XTERM);
8597
8598     case KEY_undef:
8599         UNIDOR(OP_UNDEF);
8600
8601     case KEY_unpack:
8602         LOP(OP_UNPACK,XTERM);
8603
8604     case KEY_utime:
8605         LOP(OP_UTIME,XTERM);
8606
8607     case KEY_umask:
8608         UNIDOR(OP_UMASK);
8609
8610     case KEY_unshift:
8611         LOP(OP_UNSHIFT,XTERM);
8612
8613     case KEY_use:
8614         s = tokenize_use(1, s);
8615         TOKEN(USE);
8616
8617     case KEY_values:
8618         UNI(OP_VALUES);
8619
8620     case KEY_vec:
8621         LOP(OP_VEC,XTERM);
8622
8623     case KEY_when:
8624         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8625             return REPORT(0);
8626         pl_yylval.ival = CopLINE(PL_curcop);
8627         Perl_ck_warner_d(aTHX_
8628             packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8629             "when is experimental");
8630         OPERATOR(WHEN);
8631
8632     case KEY_while:
8633         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8634             return REPORT(0);
8635         pl_yylval.ival = CopLINE(PL_curcop);
8636         OPERATOR(WHILE);
8637
8638     case KEY_warn:
8639         PL_hints |= HINT_BLOCK_SCOPE;
8640         LOP(OP_WARN,XTERM);
8641
8642     case KEY_wait:
8643         FUN0(OP_WAIT);
8644
8645     case KEY_waitpid:
8646         LOP(OP_WAITPID,XTERM);
8647
8648     case KEY_wantarray:
8649         FUN0(OP_WANTARRAY);
8650
8651     case KEY_write:
8652         /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8653          * we use the same number on EBCDIC */
8654         gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8655         UNI(OP_ENTERWRITE);
8656
8657     case KEY_x:
8658         if (PL_expect == XOPERATOR) {
8659             if (*s == '=' && !PL_lex_allbrackets
8660                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8661             {
8662                 return REPORT(0);
8663             }
8664             Mop(OP_REPEAT);
8665         }
8666         check_uni();
8667         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
8668
8669     case KEY_xor:
8670         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8671             return REPORT(0);
8672         pl_yylval.ival = OP_XOR;
8673         OPERATOR(OROP);
8674     }
8675 }
8676
8677 static int
8678 yyl_key_core(pTHX_ char *s, STRLEN len, struct code c)
8679 {
8680     I32 key = 0;
8681     I32 orig_keyword = 0;
8682     STRLEN olen = len;
8683     char *d = s;
8684     s += 2;
8685     s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8686     if ((*s == ':' && s[1] == ':')
8687         || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
8688     {
8689         Copy(PL_bufptr, PL_tokenbuf, olen, char);
8690         return yyl_just_a_word(aTHX_ d, olen, 0, c);
8691     }
8692     if (!key)
8693         Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
8694                           UTF8fARG(UTF, len, PL_tokenbuf));
8695     if (key < 0)
8696         key = -key;
8697     else if (key == KEY_require || key == KEY_do
8698           || key == KEY_glob)
8699         /* that's a way to remember we saw "CORE::" */
8700         orig_keyword = key;
8701
8702     /* Known to be a reserved word at this point */
8703     return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8704 }
8705
8706 static int
8707 yyl_keylookup(pTHX_ char *s, GV *gv)
8708 {
8709     STRLEN len;
8710     bool anydelim;
8711     I32 key;
8712     struct code c = no_code;
8713     I32 orig_keyword = 0;
8714     char *d;
8715
8716     c.gv = gv;
8717
8718     PL_bufptr = s;
8719     s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8720
8721     /* Some keywords can be followed by any delimiter, including ':' */
8722     anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
8723
8724     /* x::* is just a word, unless x is "CORE" */
8725     if (!anydelim && *s == ':' && s[1] == ':') {
8726         if (memEQs(PL_tokenbuf, len, "CORE"))
8727             return yyl_key_core(aTHX_ s, len, c);
8728         return yyl_just_a_word(aTHX_ s, len, 0, c);
8729     }
8730
8731     d = s;
8732     while (d < PL_bufend && isSPACE(*d))
8733             d++;        /* no comments skipped here, or s### is misparsed */
8734
8735     /* Is this a word before a => operator? */
8736     if (*d == '=' && d[1] == '>') {
8737         return yyl_fatcomma(aTHX_ s, len);
8738     }
8739
8740     /* Check for plugged-in keyword */
8741     {
8742         OP *o;
8743         int result;
8744         char *saved_bufptr = PL_bufptr;
8745         PL_bufptr = s;
8746         result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
8747         s = PL_bufptr;
8748         if (result == KEYWORD_PLUGIN_DECLINE) {
8749             /* not a plugged-in keyword */
8750             PL_bufptr = saved_bufptr;
8751         } else if (result == KEYWORD_PLUGIN_STMT) {
8752             pl_yylval.opval = o;
8753             CLINE;
8754             if (!PL_nexttoke) PL_expect = XSTATE;
8755             return REPORT(PLUGSTMT);
8756         } else if (result == KEYWORD_PLUGIN_EXPR) {
8757             pl_yylval.opval = o;
8758             CLINE;
8759             if (!PL_nexttoke) PL_expect = XOPERATOR;
8760             return REPORT(PLUGEXPR);
8761         } else {
8762             Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf);
8763         }
8764     }
8765
8766     /* Is this a label? */
8767     if (!anydelim && PL_expect == XSTATE
8768           && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8769         s = d + 1;
8770         pl_yylval.opval =
8771             newSVOP(OP_CONST, 0,
8772                 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
8773         CLINE;
8774         TOKEN(LABEL);
8775     }
8776
8777     /* Check for lexical sub */
8778     if (PL_expect != XOPERATOR) {
8779         char tmpbuf[sizeof PL_tokenbuf + 1];
8780         *tmpbuf = '&';
8781         Copy(PL_tokenbuf, tmpbuf+1, len, char);
8782         c.off = pad_findmy_pvn(tmpbuf, len+1, 0);
8783         if (c.off != NOT_IN_PAD) {
8784             assert(c.off); /* we assume this is boolean-true below */
8785             if (PAD_COMPNAME_FLAGS_isOUR(c.off)) {
8786                 HV *  const stash = PAD_COMPNAME_OURSTASH(c.off);
8787                 HEK * const stashname = HvNAME_HEK(stash);
8788                 c.sv = newSVhek(stashname);
8789                 sv_catpvs(c.sv, "::");
8790                 sv_catpvn_flags(c.sv, PL_tokenbuf, len,
8791                                 (UTF ? SV_CATUTF8 : SV_CATBYTES));
8792                 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv),
8793                                   SVt_PVCV);
8794                 c.off = 0;
8795                 if (!c.gv) {
8796                     sv_free(c.sv);
8797                     c.sv = NULL;
8798                     return yyl_just_a_word(aTHX_ s, len, 0, c);
8799                 }
8800             }
8801             else {
8802                 c.rv2cv_op = newOP(OP_PADANY, 0);
8803                 c.rv2cv_op->op_targ = c.off;
8804                 c.cv = find_lexical_cv(c.off);
8805             }
8806             c.lex = TRUE;
8807             return yyl_just_a_word(aTHX_ s, len, 0, c);
8808         }
8809         c.off = 0;
8810     }
8811
8812     /* Check for built-in keyword */
8813     key = keyword(PL_tokenbuf, len, 0);
8814
8815     if (key < 0)
8816         key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp);
8817
8818     if (key && key != KEY___DATA__ && key != KEY___END__
8819      && (!anydelim || *s != '#')) {
8820         /* no override, and not s### either; skipspace is safe here
8821          * check for => on following line */
8822         bool arrow;
8823         STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
8824         STRLEN   soff = s         - SvPVX(PL_linestr);
8825         s = peekspace(s);
8826         arrow = *s == '=' && s[1] == '>';
8827         PL_bufptr = SvPVX(PL_linestr) + bufoff;
8828         s         = SvPVX(PL_linestr) +   soff;
8829         if (arrow)
8830             return yyl_fatcomma(aTHX_ s, len);
8831     }
8832
8833     return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8834 }
8835
8836 static int
8837 yyl_try(pTHX_ char *s)
8838 {
8839     char *d;
8840     GV *gv = NULL;
8841     int tok;
8842
8843   retry:
8844     switch (*s) {
8845     default:
8846         if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
8847             if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
8848                 return tok;
8849             goto retry_bufptr;
8850         }
8851         yyl_croak_unrecognised(aTHX_ s);
8852
8853     case 4:
8854     case 26:
8855         /* emulate EOF on ^D or ^Z */
8856         if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY)
8857             return tok;
8858     retry_bufptr:
8859         s = PL_bufptr;
8860         goto retry;
8861
8862     case 0:
8863         if ((!PL_rsfp || PL_lex_inwhat)
8864          && (!PL_parser->filtered || s+1 < PL_bufend)) {
8865             PL_last_uni = 0;
8866             PL_last_lop = 0;
8867             if (PL_lex_brackets
8868                 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
8869             {
8870                 yyerror((const char *)
8871                         (PL_lex_formbrack
8872                          ? "Format not terminated"
8873                          : "Missing right curly or square bracket"));
8874             }
8875             DEBUG_T({
8876                 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
8877             });
8878             TOKEN(0);
8879         }
8880         if (s++ < PL_bufend)
8881             goto retry;  /* ignore stray nulls */
8882         PL_last_uni = 0;
8883         PL_last_lop = 0;
8884         if (!PL_in_eval && !PL_preambled) {
8885             PL_preambled = TRUE;
8886             if (PL_perldb) {
8887                 /* Generate a string of Perl code to load the debugger.
8888                  * If PERL5DB is set, it will return the contents of that,
8889                  * otherwise a compile-time require of perl5db.pl.  */
8890
8891                 const char * const pdb = PerlEnv_getenv("PERL5DB");
8892
8893                 if (pdb) {
8894                     sv_setpv(PL_linestr, pdb);
8895                     sv_catpvs(PL_linestr,";");
8896                 } else {
8897                     SETERRNO(0,SS_NORMAL);
8898                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
8899                 }
8900                 PL_parser->preambling = CopLINE(PL_curcop);
8901             } else
8902                 SvPVCLEAR(PL_linestr);
8903             if (PL_preambleav) {
8904                 SV **svp = AvARRAY(PL_preambleav);
8905                 SV **const end = svp + AvFILLp(PL_preambleav);
8906                 while(svp <= end) {
8907                     sv_catsv(PL_linestr, *svp);
8908                     ++svp;
8909                     sv_catpvs(PL_linestr, ";");
8910                 }
8911                 sv_free(MUTABLE_SV(PL_preambleav));
8912                 PL_preambleav = NULL;
8913             }
8914             if (PL_minus_E)
8915                 sv_catpvs(PL_linestr,
8916                           "use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';");
8917             if (PL_minus_n || PL_minus_p) {
8918                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
8919                 if (PL_minus_l)
8920                     sv_catpvs(PL_linestr,"chomp;");
8921                 if (PL_minus_a) {
8922                     if (PL_minus_F) {
8923                         if (   (   *PL_splitstr == '/'
8924                                 || *PL_splitstr == '\''
8925                                 || *PL_splitstr == '"')
8926                             && strchr(PL_splitstr + 1, *PL_splitstr))
8927                         {
8928                             /* strchr is ok, because -F pattern can't contain
8929                              * embeddded NULs */
8930                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
8931                         }
8932                         else {
8933                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
8934                                bytes can be used as quoting characters.  :-) */
8935                             const char *splits = PL_splitstr;
8936                             sv_catpvs(PL_linestr, "our @F=split(q\0");
8937                             do {
8938                                 /* Need to \ \s  */
8939                                 if (*splits == '\\')
8940                                     sv_catpvn(PL_linestr, splits, 1);
8941                                 sv_catpvn(PL_linestr, splits, 1);
8942                             } while (*splits++);
8943                             /* This loop will embed the trailing NUL of
8944                                PL_linestr as the last thing it does before
8945                                terminating.  */
8946                             sv_catpvs(PL_linestr, ");");
8947                         }
8948                     }
8949                     else
8950                         sv_catpvs(PL_linestr,"our @F=split(' ');");
8951                 }
8952             }
8953             sv_catpvs(PL_linestr, "\n");
8954             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
8955             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
8956             PL_last_lop = PL_last_uni = NULL;
8957             if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
8958                 update_debugger_info(PL_linestr, NULL, 0);
8959             goto retry;
8960         }
8961         if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY)
8962             return tok;
8963         goto retry_bufptr;
8964
8965     case '\r':
8966 #ifdef PERL_STRICT_CR
8967         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
8968         Perl_croak(aTHX_
8969       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
8970 #endif
8971     case ' ': case '\t': case '\f': case '\v':
8972         s++;
8973         goto retry;
8974
8975     case '#':
8976     case '\n': {
8977         const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s);
8978         if (needs_semicolon)
8979             TOKEN(PERLY_SEMICOLON);
8980         else
8981             goto retry;
8982     }
8983
8984     case '-':
8985         return yyl_hyphen(aTHX_ s);
8986
8987     case '+':
8988         return yyl_plus(aTHX_ s);
8989
8990     case '*':
8991         return yyl_star(aTHX_ s);
8992
8993     case '%':
8994         return yyl_percent(aTHX_ s);
8995
8996     case '^':
8997         return yyl_caret(aTHX_ s);
8998
8999     case '[':
9000         return yyl_leftsquare(aTHX_ s);
9001
9002     case '~':
9003         return yyl_tilde(aTHX_ s);
9004
9005     case ',':
9006         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9007             TOKEN(0);
9008         s++;
9009         OPERATOR(PERLY_COMMA);
9010     case ':':
9011         if (s[1] == ':')
9012             return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
9013         return yyl_colon(aTHX_ s + 1);
9014
9015     case '(':
9016         return yyl_leftparen(aTHX_ s + 1);
9017
9018     case ';':
9019         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
9020             TOKEN(0);
9021         CLINE;
9022         s++;
9023         PL_expect = XSTATE;
9024         TOKEN(PERLY_SEMICOLON);
9025
9026     case ')':
9027         return yyl_rightparen(aTHX_ s);
9028
9029     case ']':
9030         return yyl_rightsquare(aTHX_ s);
9031
9032     case '{':
9033         return yyl_leftcurly(aTHX_ s + 1, 0);
9034
9035     case '}':
9036         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
9037             TOKEN(0);
9038         return yyl_rightcurly(aTHX_ s, 0);
9039
9040     case '&':
9041         return yyl_ampersand(aTHX_ s);
9042
9043     case '|':
9044         return yyl_verticalbar(aTHX_ s);
9045
9046     case '=':
9047         if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n')
9048             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "====="))
9049         {
9050             s = vcs_conflict_marker(s + 7);
9051             goto retry;
9052         }
9053
9054         s++;
9055         {
9056             const char tmp = *s++;
9057             if (tmp == '=') {
9058                 if (!PL_lex_allbrackets
9059                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
9060                 {
9061                     s -= 2;
9062                     TOKEN(0);
9063                 }
9064                 ChEop(OP_EQ);
9065             }
9066             if (tmp == '>') {
9067                 if (!PL_lex_allbrackets
9068                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9069                 {
9070                     s -= 2;
9071                     TOKEN(0);
9072                 }
9073                 OPERATOR(PERLY_COMMA);
9074             }
9075             if (tmp == '~')
9076                 PMop(OP_MATCH);
9077             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
9078                 && memCHRs("+-*/%.^&|<",tmp))
9079                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9080                             "Reversed %c= operator",(int)tmp);
9081             s--;
9082             if (PL_expect == XSTATE
9083                 && isALPHA(tmp)
9084                 && (s == PL_linestart+1 || s[-2] == '\n') )
9085             {
9086                 if (   (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
9087                     || PL_lex_state != LEX_NORMAL)
9088                 {
9089                     d = PL_bufend;
9090                     while (s < d) {
9091                         if (*s++ == '\n') {
9092                             incline(s, PL_bufend);
9093                             if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
9094                             {
9095                                 s = (char *) memchr(s,'\n', d - s);
9096                                 if (s)
9097                                     s++;
9098                                 else
9099                                     s = d;
9100                                 incline(s, PL_bufend);
9101                                 goto retry;
9102                             }
9103                         }
9104                     }
9105                     goto retry;
9106                 }
9107                 s = PL_bufend;
9108                 PL_parser->in_pod = 1;
9109                 goto retry;
9110             }
9111         }
9112         if (PL_expect == XBLOCK) {
9113             const char *t = s;
9114 #ifdef PERL_STRICT_CR
9115             while (SPACE_OR_TAB(*t))
9116 #else
9117             while (SPACE_OR_TAB(*t) || *t == '\r')
9118 #endif
9119                 t++;
9120             if (*t == '\n' || *t == '#') {
9121                 ENTER_with_name("lex_format");
9122                 SAVEI8(PL_parser->form_lex_state);
9123                 SAVEI32(PL_lex_formbrack);
9124                 PL_parser->form_lex_state = PL_lex_state;
9125                 PL_lex_formbrack = PL_lex_brackets + 1;
9126                 PL_parser->sub_error_count = PL_error_count;
9127                 return yyl_leftcurly(aTHX_ s, 1);
9128             }
9129         }
9130         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
9131             s--;
9132             TOKEN(0);
9133         }
9134         pl_yylval.ival = 0;
9135         OPERATOR(ASSIGNOP);
9136
9137         case '!':
9138         return yyl_bang(aTHX_ s + 1);
9139
9140     case '<':
9141         if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n')
9142             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<"))
9143         {
9144             s = vcs_conflict_marker(s + 7);
9145             goto retry;
9146         }
9147         return yyl_leftpointy(aTHX_ s);
9148
9149     case '>':
9150         if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n')
9151             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>"))
9152         {
9153             s = vcs_conflict_marker(s + 7);
9154             goto retry;
9155         }
9156         return yyl_rightpointy(aTHX_ s + 1);
9157
9158     case '$':
9159         return yyl_dollar(aTHX_ s);
9160
9161     case '@':
9162         return yyl_snail(aTHX_ s);
9163
9164     case '/':                   /* may be division, defined-or, or pattern */
9165         return yyl_slash(aTHX_ s);
9166
9167      case '?':                  /* conditional */
9168         s++;
9169         if (!PL_lex_allbrackets
9170             && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
9171         {
9172             s--;
9173             TOKEN(0);
9174         }
9175         PL_lex_allbrackets++;
9176         OPERATOR(PERLY_QUESTION_MARK);
9177
9178     case '.':
9179         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
9180 #ifdef PERL_STRICT_CR
9181             && s[1] == '\n'
9182 #else
9183             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
9184 #endif
9185             && (s == PL_linestart || s[-1] == '\n') )
9186         {
9187             PL_expect = XSTATE;
9188             /* formbrack==2 means dot seen where arguments expected */
9189             return yyl_rightcurly(aTHX_ s, 2);
9190         }
9191         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
9192             s += 3;
9193             OPERATOR(YADAYADA);
9194         }
9195         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
9196             char tmp = *s++;
9197             if (*s == tmp) {
9198                 if (!PL_lex_allbrackets
9199                     && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
9200                 {
9201                     s--;
9202                     TOKEN(0);
9203                 }
9204                 s++;
9205                 if (*s == tmp) {
9206                     s++;
9207                     pl_yylval.ival = OPf_SPECIAL;
9208                 }
9209                 else
9210                     pl_yylval.ival = 0;
9211                 OPERATOR(DOTDOT);
9212             }
9213             if (*s == '=' && !PL_lex_allbrackets
9214                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9215             {
9216                 s--;
9217                 TOKEN(0);
9218             }
9219             Aop(OP_CONCAT);
9220         }
9221         /* FALLTHROUGH */
9222     case '0': case '1': case '2': case '3': case '4':
9223     case '5': case '6': case '7': case '8': case '9':
9224         s = scan_num(s, &pl_yylval);
9225         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
9226         if (PL_expect == XOPERATOR)
9227             no_op("Number",s);
9228         TERM(THING);
9229
9230     case '\'':
9231         return yyl_sglquote(aTHX_ s);
9232
9233     case '"':
9234         return yyl_dblquote(aTHX_ s);
9235
9236     case '`':
9237         return yyl_backtick(aTHX_ s);
9238
9239     case '\\':
9240         return yyl_backslash(aTHX_ s + 1);
9241
9242     case 'v':
9243         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
9244             char *start = s + 2;
9245             while (isDIGIT(*start) || *start == '_')
9246                 start++;
9247             if (*start == '.' && isDIGIT(start[1])) {
9248                 s = scan_num(s, &pl_yylval);
9249                 TERM(THING);
9250             }
9251             else if ((*start == ':' && start[1] == ':')
9252                      || (PL_expect == XSTATE && *start == ':')) {
9253                 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9254                     return tok;
9255                 goto retry_bufptr;
9256             }
9257             else if (PL_expect == XSTATE) {
9258                 d = start;
9259                 while (d < PL_bufend && isSPACE(*d)) d++;
9260                 if (*d == ':') {
9261                     if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9262                         return tok;
9263                     goto retry_bufptr;
9264                 }
9265             }
9266             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
9267             if (!isALPHA(*start) && (PL_expect == XTERM
9268                         || PL_expect == XREF || PL_expect == XSTATE
9269                         || PL_expect == XTERMORDORDOR)) {
9270                 GV *const gv = gv_fetchpvn_flags(s, start - s,
9271                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
9272                 if (!gv) {
9273                     s = scan_num(s, &pl_yylval);
9274                     TERM(THING);
9275                 }
9276             }
9277         }
9278         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9279             return tok;
9280         goto retry_bufptr;
9281
9282     case 'x':
9283         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
9284             s++;
9285             Mop(OP_REPEAT);
9286         }
9287         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9288             return tok;
9289         goto retry_bufptr;
9290
9291     case '_':
9292     case 'a': case 'A':
9293     case 'b': case 'B':
9294     case 'c': case 'C':
9295     case 'd': case 'D':
9296     case 'e': case 'E':
9297     case 'f': case 'F':
9298     case 'g': case 'G':
9299     case 'h': case 'H':
9300     case 'i': case 'I':
9301     case 'j': case 'J':
9302     case 'k': case 'K':
9303     case 'l': case 'L':
9304     case 'm': case 'M':
9305     case 'n': case 'N':
9306     case 'o': case 'O':
9307     case 'p': case 'P':
9308     case 'q': case 'Q':
9309     case 'r': case 'R':
9310     case 's': case 'S':
9311     case 't': case 'T':
9312     case 'u': case 'U':
9313               case 'V':
9314     case 'w': case 'W':
9315               case 'X':
9316     case 'y': case 'Y':
9317     case 'z': case 'Z':
9318         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9319             return tok;
9320         goto retry_bufptr;
9321     }
9322 }
9323
9324
9325 /*
9326   yylex
9327
9328   Works out what to call the token just pulled out of the input
9329   stream.  The yacc parser takes care of taking the ops we return and
9330   stitching them into a tree.
9331
9332   Returns:
9333     The type of the next token
9334
9335   Structure:
9336       Check if we have already built the token; if so, use it.
9337       Switch based on the current state:
9338           - if we have a case modifier in a string, deal with that
9339           - handle other cases of interpolation inside a string
9340           - scan the next line if we are inside a format
9341       In the normal state, switch on the next character:
9342           - default:
9343             if alphabetic, go to key lookup
9344             unrecognized character - croak
9345           - 0/4/26: handle end-of-line or EOF
9346           - cases for whitespace
9347           - \n and #: handle comments and line numbers
9348           - various operators, brackets and sigils
9349           - numbers
9350           - quotes
9351           - 'v': vstrings (or go to key lookup)
9352           - 'x' repetition operator (or go to key lookup)
9353           - other ASCII alphanumerics (key lookup begins here):
9354               word before => ?
9355               keyword plugin
9356               scan built-in keyword (but do nothing with it yet)
9357               check for statement label
9358               check for lexical subs
9359                   return yyl_just_a_word if there is one
9360               see whether built-in keyword is overridden
9361               switch on keyword number:
9362                   - default: return yyl_just_a_word:
9363                       not a built-in keyword; handle bareword lookup
9364                       disambiguate between method and sub call
9365                       fall back to bareword
9366                   - cases for built-in keywords
9367 */
9368
9369 int
9370 Perl_yylex(pTHX)
9371 {
9372     char *s = PL_bufptr;
9373
9374     if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
9375         const U8* first_bad_char_loc;
9376         if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
9377                                                         PL_bufend - PL_bufptr,
9378                                                         &first_bad_char_loc)))
9379         {
9380             _force_out_malformed_utf8_message(first_bad_char_loc,
9381                                               (U8 *) PL_bufend,
9382                                               0,
9383                                               1 /* 1 means die */ );
9384             NOT_REACHED; /* NOTREACHED */
9385         }
9386         PL_parser->recheck_utf8_validity = FALSE;
9387     }
9388     DEBUG_T( {
9389         SV* tmp = newSVpvs("");
9390         PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
9391             (IV)CopLINE(PL_curcop),
9392             lex_state_names[PL_lex_state],
9393             exp_name[PL_expect],
9394             pv_display(tmp, s, strlen(s), 0, 60));
9395         SvREFCNT_dec(tmp);
9396     } );
9397
9398     /* when we've already built the next token, just pull it out of the queue */
9399     if (PL_nexttoke) {
9400         PL_nexttoke--;
9401         pl_yylval = PL_nextval[PL_nexttoke];
9402         {
9403             I32 next_type;
9404             next_type = PL_nexttype[PL_nexttoke];
9405             if (next_type & (7<<24)) {
9406                 if (next_type & (1<<24)) {
9407                     if (PL_lex_brackets > 100)
9408                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
9409                     PL_lex_brackstack[PL_lex_brackets++] =
9410                         (char) ((U8) (next_type >> 16));
9411                 }
9412                 if (next_type & (2<<24))
9413                     PL_lex_allbrackets++;
9414                 if (next_type & (4<<24))
9415                     PL_lex_allbrackets--;
9416                 next_type &= 0xffff;
9417             }
9418             return REPORT(next_type == 'p' ? pending_ident() : next_type);
9419         }
9420     }
9421
9422     switch (PL_lex_state) {
9423     case LEX_NORMAL:
9424     case LEX_INTERPNORMAL:
9425         break;
9426
9427     /* interpolated case modifiers like \L \U, including \Q and \E.
9428        when we get here, PL_bufptr is at the \
9429     */
9430     case LEX_INTERPCASEMOD:
9431         /* handle \E or end of string */
9432         return yyl_interpcasemod(aTHX_ s);
9433
9434     case LEX_INTERPPUSH:
9435         return REPORT(sublex_push());
9436
9437     case LEX_INTERPSTART:
9438         if (PL_bufptr == PL_bufend)
9439             return REPORT(sublex_done());
9440         DEBUG_T({
9441             if(*PL_bufptr != '(')
9442                 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
9443         });
9444         PL_expect = XTERM;
9445         /* for /@a/, we leave the joining for the regex engine to do
9446          * (unless we're within \Q etc) */
9447         PL_lex_dojoin = (*PL_bufptr == '@'
9448                             && (!PL_lex_inpat || PL_lex_casemods));
9449         PL_lex_state = LEX_INTERPNORMAL;
9450         if (PL_lex_dojoin) {
9451             NEXTVAL_NEXTTOKE.ival = 0;
9452             force_next(PERLY_COMMA);
9453             force_ident("\"", PERLY_DOLLAR);
9454             NEXTVAL_NEXTTOKE.ival = 0;
9455             force_next(PERLY_DOLLAR);
9456             NEXTVAL_NEXTTOKE.ival = 0;
9457             force_next((2<<24)|PERLY_PAREN_OPEN);
9458             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
9459             force_next(FUNC);
9460         }
9461         /* Convert (?{...}) and friends to 'do {...}' */
9462         if (PL_lex_inpat && *PL_bufptr == '(') {
9463             PL_parser->lex_shared->re_eval_start = PL_bufptr;
9464             PL_bufptr += 2;
9465             if (*PL_bufptr != '{')
9466                 PL_bufptr++;
9467             PL_expect = XTERMBLOCK;
9468             force_next(DO);
9469         }
9470
9471         if (PL_lex_starts++) {
9472             s = PL_bufptr;
9473             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9474             if (!PL_lex_casemods && PL_lex_inpat)
9475                 TOKEN(PERLY_COMMA);
9476             else
9477                 AopNOASSIGN(OP_CONCAT);
9478         }
9479         return yylex();
9480
9481     case LEX_INTERPENDMAYBE:
9482         if (intuit_more(PL_bufptr, PL_bufend)) {
9483             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
9484             break;
9485         }
9486         /* FALLTHROUGH */
9487
9488     case LEX_INTERPEND:
9489         if (PL_lex_dojoin) {
9490             const U8 dojoin_was = PL_lex_dojoin;
9491             PL_lex_dojoin = FALSE;
9492             PL_lex_state = LEX_INTERPCONCAT;
9493             PL_lex_allbrackets--;
9494             return REPORT(dojoin_was == 1 ? (int)PERLY_PAREN_CLOSE : (int)POSTJOIN);
9495         }
9496         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
9497             && SvEVALED(PL_lex_repl))
9498         {
9499             if (PL_bufptr != PL_bufend)
9500                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
9501             PL_lex_repl = NULL;
9502         }
9503         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
9504            re_eval_str.  If the here-doc body’s length equals the previous
9505            value of re_eval_start, re_eval_start will now be null.  So
9506            check re_eval_str as well. */
9507         if (PL_parser->lex_shared->re_eval_start
9508          || PL_parser->lex_shared->re_eval_str) {
9509             SV *sv;
9510             if (*PL_bufptr != ')')
9511                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
9512             PL_bufptr++;
9513             /* having compiled a (?{..}) expression, return the original
9514              * text too, as a const */
9515             if (PL_parser->lex_shared->re_eval_str) {
9516                 sv = PL_parser->lex_shared->re_eval_str;
9517                 PL_parser->lex_shared->re_eval_str = NULL;
9518                 SvCUR_set(sv,
9519                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
9520                 SvPV_shrink_to_cur(sv);
9521             }
9522             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
9523                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
9524             NEXTVAL_NEXTTOKE.opval =
9525                     newSVOP(OP_CONST, 0,
9526                                  sv);
9527             force_next(THING);
9528             PL_parser->lex_shared->re_eval_start = NULL;
9529             PL_expect = XTERM;
9530             return REPORT(PERLY_COMMA);
9531         }
9532
9533         /* FALLTHROUGH */
9534     case LEX_INTERPCONCAT:
9535 #ifdef DEBUGGING
9536         if (PL_lex_brackets)
9537             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
9538                        (long) PL_lex_brackets);
9539 #endif
9540         if (PL_bufptr == PL_bufend)
9541             return REPORT(sublex_done());
9542
9543         /* m'foo' still needs to be parsed for possible (?{...}) */
9544         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
9545             SV *sv = newSVsv(PL_linestr);
9546             sv = tokeq(sv);
9547             pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
9548             s = PL_bufend;
9549         }
9550         else {
9551             int save_error_count = PL_error_count;
9552
9553             s = scan_const(PL_bufptr);
9554
9555             /* Set flag if this was a pattern and there were errors.  op.c will
9556              * refuse to compile a pattern with this flag set.  Otherwise, we
9557              * could get segfaults, etc. */
9558             if (PL_lex_inpat && PL_error_count > save_error_count) {
9559                 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
9560             }
9561             if (*s == '\\')
9562                 PL_lex_state = LEX_INTERPCASEMOD;
9563             else
9564                 PL_lex_state = LEX_INTERPSTART;
9565         }
9566
9567         if (s != PL_bufptr) {
9568             NEXTVAL_NEXTTOKE = pl_yylval;
9569             PL_expect = XTERM;
9570             force_next(THING);
9571             if (PL_lex_starts++) {
9572                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9573                 if (!PL_lex_casemods && PL_lex_inpat)
9574                     TOKEN(PERLY_COMMA);
9575                 else
9576                     AopNOASSIGN(OP_CONCAT);
9577             }
9578             else {
9579                 PL_bufptr = s;
9580                 return yylex();
9581             }
9582         }
9583
9584         return yylex();
9585     case LEX_FORMLINE:
9586         if (PL_parser->sub_error_count != PL_error_count) {
9587             /* There was an error parsing a formline, which tends to
9588                mess up the parser.
9589                Unlike interpolated sub-parsing, we can't treat any of
9590                these as recoverable, so no need to check sub_no_recover.
9591             */
9592             yyquit();
9593         }
9594         assert(PL_lex_formbrack);
9595         s = scan_formline(PL_bufptr);
9596         if (!PL_lex_formbrack)
9597             return yyl_rightcurly(aTHX_ s, 1);
9598         PL_bufptr = s;
9599         return yylex();
9600     }
9601
9602     /* We really do *not* want PL_linestr ever becoming a COW. */
9603     assert (!SvIsCOW(PL_linestr));
9604     s = PL_bufptr;
9605     PL_oldoldbufptr = PL_oldbufptr;
9606     PL_oldbufptr = s;
9607
9608     if (PL_in_my == KEY_sigvar) {
9609         PL_parser->saw_infix_sigil = 0;
9610         return yyl_sigvar(aTHX_ s);
9611     }
9612
9613     {
9614         /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil.
9615            On its return, we then need to set it to indicate whether the token
9616            we just encountered was an infix operator that (if we hadn't been
9617            expecting an operator) have been a sigil.
9618         */
9619         bool expected_operator = (PL_expect == XOPERATOR);
9620         int ret = yyl_try(aTHX_ s);
9621         switch (pl_yylval.ival) {
9622         case OP_BIT_AND:
9623         case OP_MODULO:
9624         case OP_MULTIPLY:
9625         case OP_NBIT_AND:
9626             if (expected_operator) {
9627                 PL_parser->saw_infix_sigil = 1;
9628                 break;
9629             }
9630             /* FALLTHROUGH */
9631         default:
9632             PL_parser->saw_infix_sigil = 0;
9633         }
9634         return ret;
9635     }
9636 }
9637
9638
9639 /*
9640   S_pending_ident
9641
9642   Looks up an identifier in the pad or in a package
9643
9644   PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
9645   rather than a plain pad var.
9646
9647   Returns:
9648     PRIVATEREF if this is a lexical name.
9649     BAREWORD   if this belongs to a package.
9650
9651   Structure:
9652       if we're in a my declaration
9653           croak if they tried to say my($foo::bar)
9654           build the ops for a my() declaration
9655       if it's an access to a my() variable
9656           build ops for access to a my() variable
9657       if in a dq string, and they've said @foo and we can't find @foo
9658           warn
9659       build ops for a bareword
9660 */
9661
9662 static int
9663 S_pending_ident(pTHX)
9664 {
9665     PADOFFSET tmp = 0;
9666     const char pit = (char)pl_yylval.ival;
9667     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9668     /* All routes through this function want to know if there is a colon.  */
9669     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9670
9671     DEBUG_T({ PerlIO_printf(Perl_debug_log,
9672           "### Pending identifier '%s'\n", PL_tokenbuf); });
9673     assert(tokenbuf_len >= 2);
9674
9675     /* if we're in a my(), we can't allow dynamics here.
9676        $foo'bar has already been turned into $foo::bar, so
9677        just check for colons.
9678
9679        if it's a legal name, the OP is a PADANY.
9680     */
9681     if (PL_in_my) {
9682         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
9683             if (has_colon)
9684                 /* diag_listed_as: No package name allowed for variable %s
9685                                    in "our" */
9686                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9687                                   "%s %s in \"our\"",
9688                                   *PL_tokenbuf=='&' ? "subroutine" : "variable",
9689                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9690             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9691         }
9692         else {
9693             OP *o;
9694             if (has_colon) {
9695                 /* "my" variable %s can't be in a package */
9696                 /* PL_no_myglob is constant */
9697                 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
9698                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9699                             PL_in_my == KEY_my ? "my" : "state",
9700                             *PL_tokenbuf == '&' ? "subroutine" : "variable",
9701                             PL_tokenbuf),
9702                             UTF ? SVf_UTF8 : 0);
9703                 GCC_DIAG_RESTORE_STMT;
9704             }
9705
9706             if (PL_in_my == KEY_sigvar) {
9707                 /* A signature 'padop' needs in addition, an op_first to
9708                  * point to a child sigdefelem, and an extra field to hold
9709                  * the signature index. We can achieve both by using an
9710                  * UNOP_AUX and (ab)using the op_aux field to hold the
9711                  * index. If we ever need more fields, use a real malloced
9712                  * aux strut instead.
9713                  */
9714                 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
9715                                     INT2PTR(UNOP_AUX_item *,
9716                                         (PL_parser->sig_elems)));
9717                 o->op_private |= (  PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
9718                                   : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
9719                                   :                         OPpARGELEM_HV);
9720             }
9721             else
9722                 o = newOP(OP_PADANY, 0);
9723             o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9724                                                         UTF ? SVf_UTF8 : 0);
9725             if (PL_in_my == KEY_sigvar)
9726                 PL_in_my = 0;
9727
9728             pl_yylval.opval = o;
9729             return PRIVATEREF;
9730         }
9731     }
9732
9733     /*
9734        build the ops for accesses to a my() variable.
9735     */
9736
9737     if (!has_colon) {
9738         if (!PL_in_my)
9739             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9740                                  0);
9741         if (tmp != NOT_IN_PAD) {
9742             /* might be an "our" variable" */
9743             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9744                 /* build ops for a bareword */
9745                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
9746                 HEK * const stashname = HvNAME_HEK(stash);
9747                 SV *  const sym = newSVhek(stashname);
9748                 sv_catpvs(sym, "::");
9749                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9750                 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9751                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9752                 if (pit != '&')
9753                   gv_fetchsv(sym,
9754                     GV_ADDMULTI,
9755                     ((PL_tokenbuf[0] == '$') ? SVt_PV
9756                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9757                      : SVt_PVHV));
9758                 return BAREWORD;
9759             }
9760
9761             pl_yylval.opval = newOP(OP_PADANY, 0);
9762             pl_yylval.opval->op_targ = tmp;
9763             return PRIVATEREF;
9764         }
9765     }
9766
9767     /*
9768        Whine if they've said @foo or @foo{key} in a doublequoted string,
9769        and @foo (or %foo) isn't a variable we can find in the symbol
9770        table.
9771     */
9772     if (ckWARN(WARN_AMBIGUOUS)
9773         && pit == '@'
9774         && PL_lex_state != LEX_NORMAL
9775         && !PL_lex_brackets)
9776     {
9777         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9778                                          ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9779                                          SVt_PVAV);
9780         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9781            )
9782         {
9783             /* Downgraded from fatal to warning 20000522 mjd */
9784             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9785                         "Possible unintended interpolation of %" UTF8f
9786                         " in string",
9787                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9788         }
9789     }
9790
9791     /* build ops for a bareword */
9792     pl_yylval.opval = newSVOP(OP_CONST, 0,
9793                                    newSVpvn_flags(PL_tokenbuf + 1,
9794                                                       tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9795                                                       UTF ? SVf_UTF8 : 0 ));
9796     pl_yylval.opval->op_private = OPpCONST_ENTERED;
9797     if (pit != '&')
9798         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9799                      (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9800                      | ( UTF ? SVf_UTF8 : 0 ),
9801                      ((PL_tokenbuf[0] == '$') ? SVt_PV
9802                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9803                       : SVt_PVHV));
9804     return BAREWORD;
9805 }
9806
9807 STATIC void
9808 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9809 {
9810     PERL_ARGS_ASSERT_CHECKCOMMA;
9811
9812     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
9813         if (ckWARN(WARN_SYNTAX)) {
9814             int level = 1;
9815             const char *w;
9816             for (w = s+2; *w && level; w++) {
9817                 if (*w == '(')
9818                     ++level;
9819                 else if (*w == ')')
9820                     --level;
9821             }
9822             while (isSPACE(*w))
9823                 ++w;
9824             /* the list of chars below is for end of statements or
9825              * block / parens, boolean operators (&&, ||, //) and branch
9826              * constructs (or, and, if, until, unless, while, err, for).
9827              * Not a very solid hack... */
9828             if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
9829                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9830                             "%s (...) interpreted as function",name);
9831         }
9832     }
9833     while (s < PL_bufend && isSPACE(*s))
9834         s++;
9835     if (*s == '(')
9836         s++;
9837     while (s < PL_bufend && isSPACE(*s))
9838         s++;
9839     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9840         const char * const w = s;
9841         s += UTF ? UTF8SKIP(s) : 1;
9842         while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9843             s += UTF ? UTF8SKIP(s) : 1;
9844         while (s < PL_bufend && isSPACE(*s))
9845             s++;
9846         if (*s == ',') {
9847             GV* gv;
9848             if (keyword(w, s - w, 0))
9849                 return;
9850
9851             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9852             if (gv && GvCVu(gv))
9853                 return;
9854             if (s - w <= 254) {
9855                 PADOFFSET off;
9856                 char tmpbuf[256];
9857                 Copy(w, tmpbuf+1, s - w, char);
9858                 *tmpbuf = '&';
9859                 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9860                 if (off != NOT_IN_PAD) return;
9861             }
9862             Perl_croak(aTHX_ "No comma allowed after %s", what);
9863         }
9864     }
9865 }
9866
9867 /* S_new_constant(): do any overload::constant lookup.
9868
9869    Either returns sv, or mortalizes/frees sv and returns a new SV*.
9870    Best used as sv=new_constant(..., sv, ...).
9871    If s, pv are NULL, calls subroutine with one argument,
9872    and <type> is used with error messages only.
9873    <type> is assumed to be well formed UTF-8.
9874
9875    If error_msg is not NULL, *error_msg will be set to any error encountered.
9876    Otherwise yyerror() will be used to output it */
9877
9878 STATIC SV *
9879 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9880                SV *sv, SV *pv, const char *type, STRLEN typelen,
9881                const char ** error_msg)
9882 {
9883     dSP;
9884     HV * table = GvHV(PL_hintgv);                /* ^H */
9885     SV *res;
9886     SV *errsv = NULL;
9887     SV **cvp;
9888     SV *cv, *typesv;
9889     const char *why1 = "", *why2 = "", *why3 = "";
9890     const char * optional_colon = ":";  /* Only some messages have a colon */
9891     char *msg;
9892
9893     PERL_ARGS_ASSERT_NEW_CONSTANT;
9894     /* We assume that this is true: */
9895     assert(type || s);
9896
9897     sv_2mortal(sv);                     /* Parent created it permanently */
9898
9899     if (   ! table
9900         || ! (PL_hints & HINT_LOCALIZE_HH))
9901     {
9902         why1 = "unknown";
9903         optional_colon = "";
9904         goto report;
9905     }
9906
9907     cvp = hv_fetch(table, key, keylen, FALSE);
9908     if (!cvp || !SvOK(*cvp)) {
9909         why1 = "$^H{";
9910         why2 = key;
9911         why3 = "} is not defined";
9912         goto report;
9913     }
9914
9915     cv = *cvp;
9916     if (!pv && s)
9917         pv = newSVpvn_flags(s, len, SVs_TEMP);
9918     if (type && pv)
9919         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9920     else
9921         typesv = &PL_sv_undef;
9922
9923     PUSHSTACKi(PERLSI_OVERLOAD);
9924     ENTER ;
9925     SAVETMPS;
9926
9927     PUSHMARK(SP) ;
9928     EXTEND(sp, 3);
9929     if (pv)
9930         PUSHs(pv);
9931     PUSHs(sv);
9932     if (pv)
9933         PUSHs(typesv);
9934     PUTBACK;
9935     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9936
9937     SPAGAIN ;
9938
9939     /* Check the eval first */
9940     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9941         STRLEN errlen;
9942         const char * errstr;
9943         sv_catpvs(errsv, "Propagated");
9944         errstr = SvPV_const(errsv, errlen);
9945         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9946         (void)POPs;
9947         res = SvREFCNT_inc_simple_NN(sv);
9948     }
9949     else {
9950         res = POPs;
9951         SvREFCNT_inc_simple_void_NN(res);
9952     }
9953
9954     PUTBACK ;
9955     FREETMPS ;
9956     LEAVE ;
9957     POPSTACK;
9958
9959     if (SvOK(res)) {
9960         return res;
9961     }
9962
9963     sv = res;
9964     (void)sv_2mortal(sv);
9965
9966     why1 = "Call to &{$^H{";
9967     why2 = key;
9968     why3 = "}} did not return a defined value";
9969
9970   report:
9971
9972     msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s",
9973                         (int)(type ? typelen : len),
9974                         (type ? type: s),
9975                         optional_colon,
9976                         why1, why2, why3);
9977     if (error_msg) {
9978         *error_msg = msg;
9979     }
9980     else {
9981         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9982     }
9983     return SvREFCNT_inc_simple_NN(sv);
9984 }
9985
9986 PERL_STATIC_INLINE void
9987 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9988                     bool is_utf8, bool check_dollar, bool tick_warn)
9989 {
9990     int saw_tick = 0;
9991     const char *olds = *s;
9992     PERL_ARGS_ASSERT_PARSE_IDENT;
9993
9994     while (*s < PL_bufend) {
9995         if (*d >= e)
9996             Perl_croak(aTHX_ "%s", ident_too_long);
9997         if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9998              /* The UTF-8 case must come first, otherwise things
9999              * like c\N{COMBINING TILDE} would start failing, as the
10000              * isWORDCHAR_A case below would gobble the 'c' up.
10001              */
10002
10003             char *t = *s + UTF8SKIP(*s);
10004             while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
10005                 t += UTF8SKIP(t);
10006             }
10007             if (*d + (t - *s) > e)
10008                 Perl_croak(aTHX_ "%s", ident_too_long);
10009             Copy(*s, *d, t - *s, char);
10010             *d += t - *s;
10011             *s = t;
10012         }
10013         else if ( isWORDCHAR_A(**s) ) {
10014             do {
10015                 *(*d)++ = *(*s)++;
10016             } while (isWORDCHAR_A(**s) && *d < e);
10017         }
10018         else if (   allow_package
10019                  && **s == '\''
10020                  && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
10021         {
10022             *(*d)++ = ':';
10023             *(*d)++ = ':';
10024             (*s)++;
10025             saw_tick++;
10026         }
10027         else if (allow_package && **s == ':' && (*s)[1] == ':'
10028            /* Disallow things like Foo::$bar. For the curious, this is
10029             * the code path that triggers the "Bad name after" warning
10030             * when looking for barewords.
10031             */
10032            && !(check_dollar && (*s)[2] == '$')) {
10033             *(*d)++ = *(*s)++;
10034             *(*d)++ = *(*s)++;
10035         }
10036         else
10037             break;
10038     }
10039     if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
10040               && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
10041         char *this_d;
10042         char *d2;
10043         Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
10044         d2 = this_d;
10045         SAVEFREEPV(this_d);
10046         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10047                          "Old package separator used in string");
10048         if (olds[-1] == '#')
10049             *d2++ = olds[-2];
10050         *d2++ = olds[-1];
10051         while (olds < *s) {
10052             if (*olds == '\'') {
10053                 *d2++ = '\\';
10054                 *d2++ = *olds++;
10055             }
10056             else
10057                 *d2++ = *olds++;
10058         }
10059         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10060                          "\t(Did you mean \"%" UTF8f "\" instead?)\n",
10061                           UTF8fARG(is_utf8, d2-this_d, this_d));
10062     }
10063     return;
10064 }
10065
10066 /* Returns a NUL terminated string, with the length of the string written to
10067    *slp
10068    */
10069 char *
10070 Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10071 {
10072     char *d = dest;
10073     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
10074     bool is_utf8 = cBOOL(UTF);
10075
10076     PERL_ARGS_ASSERT_SCAN_WORD;
10077
10078     parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
10079     *d = '\0';
10080     *slp = d - dest;
10081     return s;
10082 }
10083
10084 /* Is the byte 'd' a legal single character identifier name?  'u' is true
10085  * iff Unicode semantics are to be used.  The legal ones are any of:
10086  *  a) all ASCII characters except:
10087  *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
10088  *          2) '{'
10089  *     The final case currently doesn't get this far in the program, so we
10090  *     don't test for it.  If that were to change, it would be ok to allow it.
10091  *  b) When not under Unicode rules, any upper Latin1 character
10092  *  c) Otherwise, when unicode rules are used, all XIDS characters.
10093  *
10094  *      Because all ASCII characters have the same representation whether
10095  *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
10096  *      '{' without knowing if is UTF-8 or not. */
10097 #define VALID_LEN_ONE_IDENT(s, e, is_utf8)                                  \
10098     (isGRAPH_A(*(s)) || ((is_utf8)                                          \
10099                          ? isIDFIRST_utf8_safe(s, e)                        \
10100                          : (isGRAPH_L1(*s)                                  \
10101                             && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
10102
10103 STATIC char *
10104 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
10105 {
10106     I32 herelines = PL_parser->herelines;
10107     SSize_t bracket = -1;
10108     char funny = *s++;
10109     char *d = dest;
10110     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
10111     bool is_utf8 = cBOOL(UTF);
10112     I32 orig_copline = 0, tmp_copline = 0;
10113
10114     PERL_ARGS_ASSERT_SCAN_IDENT;
10115
10116     if (isSPACE(*s) || !*s)
10117         s = skipspace(s);
10118     if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
10119         bool is_zero= *s == '0' ? TRUE : FALSE;
10120         char *digit_start= d;
10121         *d++ = *s++;
10122         while (s < PL_bufend && isDIGIT(*s)) {
10123             if (d >= e)
10124                 Perl_croak(aTHX_ "%s", ident_too_long);
10125             *d++ = *s++;
10126         }
10127         if (is_zero && d - digit_start > 1)
10128             Perl_croak(aTHX_ ident_var_zero_multi_digit);
10129     }
10130     else {  /* See if it is a "normal" identifier */
10131         parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
10132     }
10133     *d = '\0';
10134     d = dest;
10135     if (*d) {
10136         /* Either a digit variable, or parse_ident() found an identifier
10137            (anything valid as a bareword), so job done and return.  */
10138         if (PL_lex_state != LEX_NORMAL)
10139             PL_lex_state = LEX_INTERPENDMAYBE;
10140         return s;
10141     }
10142
10143     /* Here, it is not a run-of-the-mill identifier name */
10144
10145     if (*s == '$' && s[1]
10146         && (   isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
10147             || isDIGIT_A((U8)s[1])
10148             || s[1] == '$'
10149             || s[1] == '{'
10150             || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
10151     {
10152         /* Dereferencing a value in a scalar variable.
10153            The alternatives are different syntaxes for a scalar variable.
10154            Using ' as a leading package separator isn't allowed. :: is.   */
10155         return s;
10156     }
10157     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
10158     if (*s == '{') {
10159         bracket = s - SvPVX(PL_linestr);
10160         s++;
10161         orig_copline = CopLINE(PL_curcop);
10162         if (s < PL_bufend && isSPACE(*s)) {
10163             s = skipspace(s);
10164         }
10165     }
10166     if ((s <= PL_bufend - ((is_utf8)
10167                           ? UTF8SKIP(s)
10168                           : 1))
10169         && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
10170     {
10171         if (is_utf8) {
10172             const STRLEN skip = UTF8SKIP(s);
10173             STRLEN i;
10174             d[skip] = '\0';
10175             for ( i = 0; i < skip; i++ )
10176                 d[i] = *s++;
10177         }
10178         else {
10179             *d = *s++;
10180             /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
10181             if (isDIGIT(*d)) {
10182                 bool is_zero= *d == '0' ? TRUE : FALSE;
10183                 char *digit_start= d;
10184                 while (s < PL_bufend && isDIGIT(*s)) {
10185                     d++;
10186                     if (d >= e)
10187                         Perl_croak(aTHX_ "%s", ident_too_long);
10188                     *d= *s++;
10189                 }
10190                 if (is_zero && d - digit_start > 1)
10191                     Perl_croak(aTHX_ ident_var_zero_multi_digit);
10192             }
10193             d[1] = '\0';
10194         }
10195     }
10196     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
10197     if (*d == '^' && *s && isCONTROLVAR(*s)) {
10198         *d = toCTRL(*s);
10199         s++;
10200     }
10201     /* Warn about ambiguous code after unary operators if {...} notation isn't
10202        used.  There's no difference in ambiguity; it's merely a heuristic
10203        about when not to warn.  */
10204     else if (ck_uni && bracket == -1)
10205         check_uni();
10206     if (bracket != -1) {
10207         bool skip;
10208         char *s2;
10209         /* If we were processing {...} notation then...  */
10210         if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
10211             || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
10212                  && isWORDCHAR(*s))
10213         ) {
10214             /* note we have to check for a normal identifier first,
10215              * as it handles utf8 symbols, and only after that has
10216              * been ruled out can we look at the caret words */
10217             if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
10218                 /* if it starts as a valid identifier, assume that it is one.
10219                    (the later check for } being at the expected point will trap
10220                    cases where this doesn't pan out.)  */
10221                 d += is_utf8 ? UTF8SKIP(d) : 1;
10222                 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
10223                 *d = '\0';
10224             }
10225             else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
10226                 d++;
10227                 while (isWORDCHAR(*s) && d < e) {
10228                     *d++ = *s++;
10229                 }
10230                 if (d >= e)
10231                     Perl_croak(aTHX_ "%s", ident_too_long);
10232                 *d = '\0';
10233             }
10234             tmp_copline = CopLINE(PL_curcop);
10235             if (s < PL_bufend && isSPACE(*s)) {
10236                 s = skipspace(s);
10237             }
10238             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10239                 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation.  */
10240                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10241                     const char * const brack =
10242                         (const char *)
10243                         ((*s == '[') ? "[...]" : "{...}");
10244                     orig_copline = CopLINE(PL_curcop);
10245                     CopLINE_set(PL_curcop, tmp_copline);
10246    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
10247                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10248                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10249                         funny, dest, brack, funny, dest, brack);
10250                     CopLINE_set(PL_curcop, orig_copline);
10251                 }
10252                 bracket++;
10253                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10254                 PL_lex_allbrackets++;
10255                 return s;
10256             }
10257         }
10258
10259         if ( !tmp_copline )
10260             tmp_copline = CopLINE(PL_curcop);
10261         if ((skip = s < PL_bufend && isSPACE(*s))) {
10262             /* Avoid incrementing line numbers or resetting PL_linestart,
10263                in case we have to back up.  */
10264             STRLEN s_off = s - SvPVX(PL_linestr);
10265             s2 = peekspace(s);
10266             s = SvPVX(PL_linestr) + s_off;
10267         }
10268         else
10269             s2 = s;
10270
10271         /* Expect to find a closing } after consuming any trailing whitespace.
10272          */
10273         if (*s2 == '}') {
10274             /* Now increment line numbers if applicable.  */
10275             if (skip)
10276                 s = skipspace(s);
10277             s++;
10278             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10279                 PL_lex_state = LEX_INTERPEND;
10280                 PL_expect = XREF;
10281             }
10282             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
10283                 if (ckWARN(WARN_AMBIGUOUS)
10284                     && (keyword(dest, d - dest, 0)
10285                         || get_cvn_flags(dest, d - dest, is_utf8
10286                            ? SVf_UTF8
10287                            : 0)))
10288                 {
10289                     SV *tmp = newSVpvn_flags( dest, d - dest,
10290                                         SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
10291                     if (funny == '#')
10292                         funny = '@';
10293                     orig_copline = CopLINE(PL_curcop);
10294                     CopLINE_set(PL_curcop, tmp_copline);
10295                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10296                         "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
10297                         funny, SVfARG(tmp), funny, SVfARG(tmp));
10298                     CopLINE_set(PL_curcop, orig_copline);
10299                 }
10300             }
10301         }
10302         else {
10303             /* Didn't find the closing } at the point we expected, so restore
10304                state such that the next thing to process is the opening { and */
10305             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
10306             CopLINE_set(PL_curcop, orig_copline);
10307             PL_parser->herelines = herelines;
10308             *dest = '\0';
10309             PL_parser->sub_no_recover = TRUE;
10310         }
10311     }
10312     else if (   PL_lex_state == LEX_INTERPNORMAL
10313              && !PL_lex_brackets
10314              && !intuit_more(s, PL_bufend))
10315         PL_lex_state = LEX_INTERPEND;
10316     return s;
10317 }
10318
10319 static bool
10320 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
10321
10322     /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
10323      * found in the parse starting at 's', based on the subset that are valid
10324      * in this context input to this routine in 'valid_flags'. Advances s.
10325      * Returns TRUE if the input should be treated as a valid flag, so the next
10326      * char may be as well; otherwise FALSE. 'charset' should point to a NUL
10327      * upon first call on the current regex.  This routine will set it to any
10328      * charset modifier found.  The caller shouldn't change it.  This way,
10329      * another charset modifier encountered in the parse can be detected as an
10330      * error, as we have decided to allow only one */
10331
10332     const char c = **s;
10333     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
10334
10335     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
10336         if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
10337             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
10338                        UTF ? SVf_UTF8 : 0);
10339             (*s) += charlen;
10340             /* Pretend that it worked, so will continue processing before
10341              * dieing */
10342             return TRUE;
10343         }
10344         return FALSE;
10345     }
10346
10347     switch (c) {
10348
10349         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
10350         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
10351         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
10352         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
10353         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
10354         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
10355         case LOCALE_PAT_MOD:
10356             if (*charset) {
10357                 goto multiple_charsets;
10358             }
10359             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
10360             *charset = c;
10361             break;
10362         case UNICODE_PAT_MOD:
10363             if (*charset) {
10364                 goto multiple_charsets;
10365             }
10366             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
10367             *charset = c;
10368             break;
10369         case ASCII_RESTRICT_PAT_MOD:
10370             if (! *charset) {
10371                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
10372             }
10373             else {
10374
10375                 /* Error if previous modifier wasn't an 'a', but if it was, see
10376                  * if, and accept, a second occurrence (only) */
10377                 if (*charset != 'a'
10378                     || get_regex_charset(*pmfl)
10379                         != REGEX_ASCII_RESTRICTED_CHARSET)
10380                 {
10381                         goto multiple_charsets;
10382                 }
10383                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
10384             }
10385             *charset = c;
10386             break;
10387         case DEPENDS_PAT_MOD:
10388             if (*charset) {
10389                 goto multiple_charsets;
10390             }
10391             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
10392             *charset = c;
10393             break;
10394     }
10395
10396     (*s)++;
10397     return TRUE;
10398
10399     multiple_charsets:
10400         if (*charset != c) {
10401             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
10402         }
10403         else if (c == 'a') {
10404   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
10405             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
10406         }
10407         else {
10408             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
10409         }
10410
10411         /* Pretend that it worked, so will continue processing before dieing */
10412         (*s)++;
10413         return TRUE;
10414 }
10415
10416 STATIC char *
10417 S_scan_pat(pTHX_ char *start, I32 type)
10418 {
10419     PMOP *pm;
10420     char *s;
10421     const char * const valid_flags =
10422         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10423     char charset = '\0';    /* character set modifier */
10424     unsigned int x_mod_count = 0;
10425
10426     PERL_ARGS_ASSERT_SCAN_PAT;
10427
10428     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
10429     if (!s)
10430         Perl_croak(aTHX_ "Search pattern not terminated");
10431
10432     pm = (PMOP*)newPMOP(type, 0);
10433     if (PL_multi_open == '?') {
10434         /* This is the only point in the code that sets PMf_ONCE:  */
10435         pm->op_pmflags |= PMf_ONCE;
10436
10437         /* Hence it's safe to do this bit of PMOP book-keeping here, which
10438            allows us to restrict the list needed by reset to just the ??
10439            matches.  */
10440         assert(type != OP_TRANS);
10441         if (PL_curstash) {
10442             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10443             U32 elements;
10444             if (!mg) {
10445                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10446                                  0);
10447             }
10448             elements = mg->mg_len / sizeof(PMOP**);
10449             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10450             ((PMOP**)mg->mg_ptr) [elements++] = pm;
10451             mg->mg_len = elements * sizeof(PMOP**);
10452             PmopSTASH_set(pm,PL_curstash);
10453         }
10454     }
10455
10456     /* if qr/...(?{..}).../, then need to parse the pattern within a new
10457      * anon CV. False positives like qr/[(?{]/ are harmless */
10458
10459     if (type == OP_QR) {
10460         STRLEN len;
10461         char *e, *p = SvPV(PL_lex_stuff, len);
10462         e = p + len;
10463         for (; p < e; p++) {
10464             if (p[0] == '(' && p[1] == '?'
10465                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
10466             {
10467                 pm->op_pmflags |= PMf_HAS_CV;
10468                 break;
10469             }
10470         }
10471         pm->op_pmflags |= PMf_IS_QR;
10472     }
10473
10474     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
10475                                 &s, &charset, &x_mod_count))
10476     {};
10477     /* issue a warning if /c is specified,but /g is not */
10478     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
10479     {
10480         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
10481                        "Use of /c modifier is meaningless without /g" );
10482     }
10483
10484     PL_lex_op = (OP*)pm;
10485     pl_yylval.ival = OP_MATCH;
10486     return s;
10487 }
10488
10489 STATIC char *
10490 S_scan_subst(pTHX_ char *start)
10491 {
10492     char *s;
10493     PMOP *pm;
10494     I32 first_start;
10495     line_t first_line;
10496     line_t linediff = 0;
10497     I32 es = 0;
10498     char charset = '\0';    /* character set modifier */
10499     unsigned int x_mod_count = 0;
10500     char *t;
10501
10502     PERL_ARGS_ASSERT_SCAN_SUBST;
10503
10504     pl_yylval.ival = OP_NULL;
10505
10506     s = scan_str(start, TRUE, FALSE, FALSE, &t);
10507
10508     if (!s)
10509         Perl_croak(aTHX_ "Substitution pattern not terminated");
10510
10511     s = t;
10512
10513     first_start = PL_multi_start;
10514     first_line = CopLINE(PL_curcop);
10515     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10516     if (!s) {
10517         SvREFCNT_dec_NN(PL_lex_stuff);
10518         PL_lex_stuff = NULL;
10519         Perl_croak(aTHX_ "Substitution replacement not terminated");
10520     }
10521     PL_multi_start = first_start;       /* so whole substitution is taken together */
10522
10523     pm = (PMOP*)newPMOP(OP_SUBST, 0);
10524
10525
10526     while (*s) {
10527         if (*s == EXEC_PAT_MOD) {
10528             s++;
10529             es++;
10530         }
10531         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
10532                                   &s, &charset, &x_mod_count))
10533         {
10534             break;
10535         }
10536     }
10537
10538     if ((pm->op_pmflags & PMf_CONTINUE)) {
10539         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10540     }
10541
10542     if (es) {
10543         SV * const repl = newSVpvs("");
10544
10545         PL_multi_end = 0;
10546         pm->op_pmflags |= PMf_EVAL;
10547         for (; es > 1; es--) {
10548             sv_catpvs(repl, "eval ");
10549         }
10550         sv_catpvs(repl, "do {");
10551         sv_catsv(repl, PL_parser->lex_sub_repl);
10552         sv_catpvs(repl, "}");
10553         SvREFCNT_dec(PL_parser->lex_sub_repl);
10554         PL_parser->lex_sub_repl = repl;
10555     }
10556
10557
10558     linediff = CopLINE(PL_curcop) - first_line;
10559     if (linediff)
10560         CopLINE_set(PL_curcop, first_line);
10561
10562     if (linediff || es) {
10563         /* the IVX field indicates that the replacement string is a s///e;
10564          * the NVX field indicates how many src code lines the replacement
10565          * spreads over */
10566         sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
10567         ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
10568         ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
10569                                                                     cBOOL(es);
10570     }
10571
10572     PL_lex_op = (OP*)pm;
10573     pl_yylval.ival = OP_SUBST;
10574     return s;
10575 }
10576
10577 STATIC char *
10578 S_scan_trans(pTHX_ char *start)
10579 {
10580     char* s;
10581     OP *o;
10582     U8 squash;
10583     U8 del;
10584     U8 complement;
10585     bool nondestruct = 0;
10586     char *t;
10587
10588     PERL_ARGS_ASSERT_SCAN_TRANS;
10589
10590     pl_yylval.ival = OP_NULL;
10591
10592     s = scan_str(start,FALSE,FALSE,FALSE,&t);
10593     if (!s)
10594         Perl_croak(aTHX_ "Transliteration pattern not terminated");
10595
10596     s = t;
10597
10598     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10599     if (!s) {
10600         SvREFCNT_dec_NN(PL_lex_stuff);
10601         PL_lex_stuff = NULL;
10602         Perl_croak(aTHX_ "Transliteration replacement not terminated");
10603     }
10604
10605     complement = del = squash = 0;
10606     while (1) {
10607         switch (*s) {
10608         case 'c':
10609             complement = OPpTRANS_COMPLEMENT;
10610             break;
10611         case 'd':
10612             del = OPpTRANS_DELETE;
10613             break;
10614         case 's':
10615             squash = OPpTRANS_SQUASH;
10616             break;
10617         case 'r':
10618             nondestruct = 1;
10619             break;
10620         default:
10621             goto no_more;
10622         }
10623         s++;
10624     }
10625   no_more:
10626
10627     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
10628     o->op_private &= ~OPpTRANS_ALL;
10629     o->op_private |= del|squash|complement;
10630
10631     PL_lex_op = o;
10632     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
10633
10634
10635     return s;
10636 }
10637
10638 /* scan_heredoc
10639    Takes a pointer to the first < in <<FOO.
10640    Returns a pointer to the byte following <<FOO.
10641
10642    This function scans a heredoc, which involves different methods
10643    depending on whether we are in a string eval, quoted construct, etc.
10644    This is because PL_linestr could containing a single line of input, or
10645    a whole string being evalled, or the contents of the current quote-
10646    like operator.
10647
10648    The two basic methods are:
10649     - Steal lines from the input stream
10650     - Scan the heredoc in PL_linestr and remove it therefrom
10651
10652    In a file scope or filtered eval, the first method is used; in a
10653    string eval, the second.
10654
10655    In a quote-like operator, we have to choose between the two,
10656    depending on where we can find a newline.  We peek into outer lex-
10657    ing scopes until we find one with a newline in it.  If we reach the
10658    outermost lexing scope and it is a file, we use the stream method.
10659    Otherwise it is treated as an eval.
10660 */
10661
10662 STATIC char *
10663 S_scan_heredoc(pTHX_ char *s)
10664 {
10665     I32 op_type = OP_SCALAR;
10666     I32 len;
10667     SV *tmpstr;
10668     char term;
10669     char *d;
10670     char *e;
10671     char *peek;
10672     char *indent = 0;
10673     I32 indent_len = 0;
10674     bool indented = FALSE;
10675     const bool infile = PL_rsfp || PL_parser->filtered;
10676     const line_t origline = CopLINE(PL_curcop);
10677     LEXSHARED *shared = PL_parser->lex_shared;
10678
10679     PERL_ARGS_ASSERT_SCAN_HEREDOC;
10680
10681     s += 2;
10682     d = PL_tokenbuf + 1;
10683     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10684     *PL_tokenbuf = '\n';
10685     peek = s;
10686
10687     if (*peek == '~') {
10688         indented = TRUE;
10689         peek++; s++;
10690     }
10691
10692     while (SPACE_OR_TAB(*peek))
10693         peek++;
10694
10695     if (*peek == '`' || *peek == '\'' || *peek =='"') {
10696         s = peek;
10697         term = *s++;
10698         s = delimcpy(d, e, s, PL_bufend, term, &len);
10699         if (s == PL_bufend)
10700             Perl_croak(aTHX_ "Unterminated delimiter for here document");
10701         d += len;
10702         s++;
10703     }
10704     else {
10705         if (*s == '\\')
10706             /* <<\FOO is equivalent to <<'FOO' */
10707             s++, term = '\'';
10708         else
10709             term = '"';
10710
10711         if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10712             Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
10713
10714         peek = s;
10715
10716         while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
10717             peek += UTF ? UTF8SKIP(peek) : 1;
10718         }
10719
10720         len = (peek - s >= e - d) ? (e - d) : (peek - s);
10721         Copy(s, d, len, char);
10722         s += len;
10723         d += len;
10724     }
10725
10726     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10727         Perl_croak(aTHX_ "Delimiter for here document is too long");
10728
10729     *d++ = '\n';
10730     *d = '\0';
10731     len = d - PL_tokenbuf;
10732
10733 #ifndef PERL_STRICT_CR
10734     d = (char *) memchr(s, '\r', PL_bufend - s);
10735     if (d) {
10736         char * const olds = s;
10737         s = d;
10738         while (s < PL_bufend) {
10739             if (*s == '\r') {
10740                 *d++ = '\n';
10741                 if (*++s == '\n')
10742                     s++;
10743             }
10744             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
10745                 *d++ = *s++;
10746                 s++;
10747             }
10748             else
10749                 *d++ = *s++;
10750         }
10751         *d = '\0';
10752         PL_bufend = d;
10753         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10754         s = olds;
10755     }
10756 #endif
10757
10758     tmpstr = newSV_type(SVt_PVIV);
10759     SvGROW(tmpstr, 80);
10760     if (term == '\'') {
10761         op_type = OP_CONST;
10762         SvIV_set(tmpstr, -1);
10763     }
10764     else if (term == '`') {
10765         op_type = OP_BACKTICK;
10766         SvIV_set(tmpstr, '\\');
10767     }
10768
10769     PL_multi_start = origline + 1 + PL_parser->herelines;
10770     PL_multi_open = PL_multi_close = '<';
10771
10772     /* inside a string eval or quote-like operator */
10773     if (!infile || PL_lex_inwhat) {
10774         SV *linestr;
10775         char *bufend;
10776         char * const olds = s;
10777         PERL_CONTEXT * const cx = CX_CUR();
10778         /* These two fields are not set until an inner lexing scope is
10779            entered.  But we need them set here. */
10780         shared->ls_bufptr  = s;
10781         shared->ls_linestr = PL_linestr;
10782
10783         if (PL_lex_inwhat) {
10784             /* Look for a newline.  If the current buffer does not have one,
10785              peek into the line buffer of the parent lexing scope, going
10786              up as many levels as necessary to find one with a newline
10787              after bufptr.
10788             */
10789             while (!(s = (char *)memchr(
10790                                 (void *)shared->ls_bufptr, '\n',
10791                                 SvEND(shared->ls_linestr)-shared->ls_bufptr
10792                 )))
10793             {
10794                 shared = shared->ls_prev;
10795                 /* shared is only null if we have gone beyond the outermost
10796                    lexing scope.  In a file, we will have broken out of the
10797                    loop in the previous iteration.  In an eval, the string buf-
10798                    fer ends with "\n;", so the while condition above will have
10799                    evaluated to false.  So shared can never be null.  Or so you
10800                    might think.  Odd syntax errors like s;@{<<; can gobble up
10801                    the implicit semicolon at the end of a flie, causing the
10802                    file handle to be closed even when we are not in a string
10803                    eval.  So shared may be null in that case.
10804                    (Closing '>>}' here to balance the earlier open brace for
10805                    editors that look for matched pairs.) */
10806                 if (UNLIKELY(!shared))
10807                     goto interminable;
10808                 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10809                    most lexing scope.  In a file, shared->ls_linestr at that
10810                    level is just one line, so there is no body to steal. */
10811                 if (infile && !shared->ls_prev) {
10812                     s = olds;
10813                     goto streaming;
10814                 }
10815             }
10816         }
10817         else {  /* eval or we've already hit EOF */
10818             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10819             if (!s)
10820                 goto interminable;
10821         }
10822
10823         linestr = shared->ls_linestr;
10824         bufend = SvEND(linestr);
10825         d = s;
10826         if (indented) {
10827             char *myolds = s;
10828
10829             while (s < bufend - len + 1) {
10830                 if (*s++ == '\n')
10831                     ++PL_parser->herelines;
10832
10833                 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
10834                     char *backup = s;
10835                     indent_len = 0;
10836
10837                     /* Only valid if it's preceded by whitespace only */
10838                     while (backup != myolds && --backup >= myolds) {
10839                         if (! SPACE_OR_TAB(*backup)) {
10840                             break;
10841                         }
10842                         indent_len++;
10843                     }
10844
10845                     /* No whitespace or all! */
10846                     if (backup == s || *backup == '\n') {
10847                         Newx(indent, indent_len + 1, char);
10848                         memcpy(indent, backup + 1, indent_len);
10849                         indent[indent_len] = 0;
10850                         s--; /* before our delimiter */
10851                         PL_parser->herelines--; /* this line doesn't count */
10852                         break;
10853                     }
10854                 }
10855             }
10856         }
10857         else {
10858             while (s < bufend - len + 1
10859                    && memNE(s,PL_tokenbuf,len) )
10860             {
10861                 if (*s++ == '\n')
10862                     ++PL_parser->herelines;
10863             }
10864         }
10865
10866         if (s >= bufend - len + 1) {
10867             goto interminable;
10868         }
10869
10870         sv_setpvn(tmpstr,d+1,s-d);
10871         s += len - 1;
10872         /* the preceding stmt passes a newline */
10873         PL_parser->herelines++;
10874
10875         /* s now points to the newline after the heredoc terminator.
10876            d points to the newline before the body of the heredoc.
10877          */
10878
10879         /* We are going to modify linestr in place here, so set
10880            aside copies of the string if necessary for re-evals or
10881            (caller $n)[6]. */
10882         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10883            check shared->re_eval_str. */
10884         if (shared->re_eval_start || shared->re_eval_str) {
10885             /* Set aside the rest of the regexp */
10886             if (!shared->re_eval_str)
10887                 shared->re_eval_str =
10888                        newSVpvn(shared->re_eval_start,
10889                                 bufend - shared->re_eval_start);
10890             shared->re_eval_start -= s-d;
10891         }
10892
10893         if (cxstack_ix >= 0
10894             && CxTYPE(cx) == CXt_EVAL
10895             && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10896             && cx->blk_eval.cur_text == linestr)
10897         {
10898             cx->blk_eval.cur_text = newSVsv(linestr);
10899             cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10900         }
10901
10902         /* Copy everything from s onwards back to d. */
10903         Move(s,d,bufend-s + 1,char);
10904         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10905         /* Setting PL_bufend only applies when we have not dug deeper
10906            into other scopes, because sublex_done sets PL_bufend to
10907            SvEND(PL_linestr). */
10908         if (shared == PL_parser->lex_shared)
10909             PL_bufend = SvEND(linestr);
10910         s = olds;
10911     }
10912     else {
10913         SV *linestr_save;
10914         char *oldbufptr_save;
10915         char *oldoldbufptr_save;
10916       streaming:
10917         SvPVCLEAR(tmpstr);   /* avoid "uninitialized" warning */
10918         term = PL_tokenbuf[1];
10919         len--;
10920         linestr_save = PL_linestr; /* must restore this afterwards */
10921         d = s;                   /* and this */
10922         oldbufptr_save = PL_oldbufptr;
10923         oldoldbufptr_save = PL_oldoldbufptr;
10924         PL_linestr = newSVpvs("");
10925         PL_bufend = SvPVX(PL_linestr);
10926
10927         while (1) {
10928             PL_bufptr = PL_bufend;
10929             CopLINE_set(PL_curcop,
10930                         origline + 1 + PL_parser->herelines);
10931
10932             if (   !lex_next_chunk(LEX_NO_TERM)
10933                 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
10934             {
10935                 /* Simply freeing linestr_save might seem simpler here, as it
10936                    does not matter what PL_linestr points to, since we are
10937                    about to croak; but in a quote-like op, linestr_save
10938                    will have been prospectively freed already, via
10939                    SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
10940                    restore PL_linestr. */
10941                 SvREFCNT_dec_NN(PL_linestr);
10942                 PL_linestr = linestr_save;
10943                 PL_oldbufptr = oldbufptr_save;
10944                 PL_oldoldbufptr = oldoldbufptr_save;
10945                 goto interminable;
10946             }
10947
10948             CopLINE_set(PL_curcop, origline);
10949
10950             if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10951                 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10952                 /* ^That should be enough to avoid this needing to grow:  */
10953                 sv_catpvs(PL_linestr, "\n\0");
10954                 assert(s == SvPVX(PL_linestr));
10955                 PL_bufend = SvEND(PL_linestr);
10956             }
10957
10958             s = PL_bufptr;
10959             PL_parser->herelines++;
10960             PL_last_lop = PL_last_uni = NULL;
10961
10962 #ifndef PERL_STRICT_CR
10963             if (PL_bufend - PL_linestart >= 2) {
10964                 if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10965                     || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10966                 {
10967                     PL_bufend[-2] = '\n';
10968                     PL_bufend--;
10969                     SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10970                 }
10971                 else if (PL_bufend[-1] == '\r')
10972                     PL_bufend[-1] = '\n';
10973             }
10974             else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10975                 PL_bufend[-1] = '\n';
10976 #endif
10977
10978             if (indented && (PL_bufend-s) >= len) {
10979                 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
10980
10981                 if (found) {
10982                     char *backup = found;
10983                     indent_len = 0;
10984
10985                     /* Only valid if it's preceded by whitespace only */
10986                     while (backup != s && --backup >= s) {
10987                         if (! SPACE_OR_TAB(*backup)) {
10988                             break;
10989                         }
10990                         indent_len++;
10991                     }
10992
10993                     /* All whitespace or none! */
10994                     if (backup == found || SPACE_OR_TAB(*backup)) {
10995                         Newx(indent, indent_len + 1, char);
10996                         memcpy(indent, backup, indent_len);
10997                         indent[indent_len] = 0;
10998                         SvREFCNT_dec(PL_linestr);
10999                         PL_linestr = linestr_save;
11000                         PL_linestart = SvPVX(linestr_save);
11001                         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11002                         PL_oldbufptr = oldbufptr_save;
11003                         PL_oldoldbufptr = oldoldbufptr_save;
11004                         s = d;
11005                         break;
11006                     }
11007                 }
11008
11009                 /* Didn't find it */
11010                 sv_catsv(tmpstr,PL_linestr);
11011             }
11012             else {
11013                 if (*s == term && PL_bufend-s >= len
11014                     && memEQ(s,PL_tokenbuf + 1,len))
11015                 {
11016                     SvREFCNT_dec(PL_linestr);
11017                     PL_linestr = linestr_save;
11018                     PL_linestart = SvPVX(linestr_save);
11019                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11020                     PL_oldbufptr = oldbufptr_save;
11021                     PL_oldoldbufptr = oldoldbufptr_save;
11022                     s = d;
11023                     break;
11024                 }
11025                 else {
11026                     sv_catsv(tmpstr,PL_linestr);
11027                 }
11028             }
11029         } /* while (1) */
11030     }
11031
11032     PL_multi_end = origline + PL_parser->herelines;
11033
11034     if (indented && indent) {
11035         STRLEN linecount = 1;
11036         STRLEN herelen = SvCUR(tmpstr);
11037         char *ss = SvPVX(tmpstr);
11038         char *se = ss + herelen;
11039         SV *newstr = newSV(herelen+1);
11040         SvPOK_on(newstr);
11041
11042         /* Trim leading whitespace */
11043         while (ss < se) {
11044             /* newline only? Copy and move on */
11045             if (*ss == '\n') {
11046                 sv_catpvs(newstr,"\n");
11047                 ss++;
11048                 linecount++;
11049
11050             /* Found our indentation? Strip it */
11051             }
11052             else if (se - ss >= indent_len
11053                        && memEQ(ss, indent, indent_len))
11054             {
11055                 STRLEN le = 0;
11056                 ss += indent_len;
11057
11058                 while ((ss + le) < se && *(ss + le) != '\n')
11059                     le++;
11060
11061                 sv_catpvn(newstr, ss, le);
11062                 ss += le;
11063
11064             /* Line doesn't begin with our indentation? Croak */
11065             }
11066             else {
11067                 Safefree(indent);
11068                 Perl_croak(aTHX_
11069                     "Indentation on line %d of here-doc doesn't match delimiter",
11070                     (int)linecount
11071                 );
11072             }
11073         } /* while */
11074
11075         /* avoid sv_setsv() as we dont wan't to COW here */
11076         sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
11077         Safefree(indent);
11078         SvREFCNT_dec_NN(newstr);
11079     }
11080
11081     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11082         SvPV_shrink_to_cur(tmpstr);
11083     }
11084
11085     if (!IN_BYTES) {
11086         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11087             SvUTF8_on(tmpstr);
11088     }
11089
11090     PL_lex_stuff = tmpstr;
11091     pl_yylval.ival = op_type;
11092     return s;
11093
11094   interminable:
11095     if (indent)
11096         Safefree(indent);
11097     SvREFCNT_dec(tmpstr);
11098     CopLINE_set(PL_curcop, origline);
11099     missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
11100 }
11101
11102
11103 /* scan_inputsymbol
11104    takes: position of first '<' in input buffer
11105    returns: position of first char following the matching '>' in
11106             input buffer
11107    side-effects: pl_yylval and lex_op are set.
11108
11109    This code handles:
11110
11111    <>           read from ARGV
11112    <<>>         read from ARGV without magic open
11113    <FH>         read from filehandle
11114    <pkg::FH>    read from package qualified filehandle
11115    <pkg'FH>     read from package qualified filehandle
11116    <$fh>        read from filehandle in $fh
11117    <*.h>        filename glob
11118
11119 */
11120
11121 STATIC char *
11122 S_scan_inputsymbol(pTHX_ char *start)
11123 {
11124     char *s = start;            /* current position in buffer */
11125     char *end;
11126     I32 len;
11127     bool nomagicopen = FALSE;
11128     char *d = PL_tokenbuf;                                      /* start of temp holding space */
11129     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
11130
11131     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
11132
11133     end = (char *) memchr(s, '\n', PL_bufend - s);
11134     if (!end)
11135         end = PL_bufend;
11136     if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
11137         nomagicopen = TRUE;
11138         *d = '\0';
11139         len = 0;
11140         s += 3;
11141     }
11142     else
11143         s = delimcpy(d, e, s + 1, end, '>', &len);      /* extract until > */
11144
11145     /* die if we didn't have space for the contents of the <>,
11146        or if it didn't end, or if we see a newline
11147     */
11148
11149     if (len >= (I32)sizeof PL_tokenbuf)
11150         Perl_croak(aTHX_ "Excessively long <> operator");
11151     if (s >= end)
11152         Perl_croak(aTHX_ "Unterminated <> operator");
11153
11154     s++;
11155
11156     /* check for <$fh>
11157        Remember, only scalar variables are interpreted as filehandles by
11158        this code.  Anything more complex (e.g., <$fh{$num}>) will be
11159        treated as a glob() call.
11160        This code makes use of the fact that except for the $ at the front,
11161        a scalar variable and a filehandle look the same.
11162     */
11163     if (*d == '$' && d[1]) d++;
11164
11165     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11166     while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
11167         d += UTF ? UTF8SKIP(d) : 1;
11168     }
11169
11170     /* If we've tried to read what we allow filehandles to look like, and
11171        there's still text left, then it must be a glob() and not a getline.
11172        Use scan_str to pull out the stuff between the <> and treat it
11173        as nothing more than a string.
11174     */
11175
11176     if (d - PL_tokenbuf != len) {
11177         pl_yylval.ival = OP_GLOB;
11178         s = scan_str(start,FALSE,FALSE,FALSE,NULL);
11179         if (!s)
11180            Perl_croak(aTHX_ "Glob not terminated");
11181         return s;
11182     }
11183     else {
11184         bool readline_overriden = FALSE;
11185         GV *gv_readline;
11186         /* we're in a filehandle read situation */
11187         d = PL_tokenbuf;
11188
11189         /* turn <> into <ARGV> */
11190         if (!len)
11191             Copy("ARGV",d,5,char);
11192
11193         /* Check whether readline() is overriden */
11194         if ((gv_readline = gv_override("readline",8)))
11195             readline_overriden = TRUE;
11196
11197         /* if <$fh>, create the ops to turn the variable into a
11198            filehandle
11199         */
11200         if (*d == '$') {
11201             /* try to find it in the pad for this block, otherwise find
11202                add symbol table ops
11203             */
11204             const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
11205             if (tmp != NOT_IN_PAD) {
11206                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11207                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11208                     HEK * const stashname = HvNAME_HEK(stash);
11209                     SV * const sym = sv_2mortal(newSVhek(stashname));
11210                     sv_catpvs(sym, "::");
11211                     sv_catpv(sym, d+1);
11212                     d = SvPVX(sym);
11213                     goto intro_sym;
11214                 }
11215                 else {
11216                     OP * const o = newOP(OP_PADSV, 0);
11217                     o->op_targ = tmp;
11218                     PL_lex_op = readline_overriden
11219                         ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11220                                 op_append_elem(OP_LIST, o,
11221                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11222                         : newUNOP(OP_READLINE, 0, o);
11223                 }
11224             }
11225             else {
11226                 GV *gv;
11227                 ++d;
11228               intro_sym:
11229                 gv = gv_fetchpv(d,
11230                                 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
11231                                 SVt_PV);
11232                 PL_lex_op = readline_overriden
11233                     ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11234                             op_append_elem(OP_LIST,
11235                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11236                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11237                     : newUNOP(OP_READLINE, 0,
11238                             newUNOP(OP_RV2SV, 0,
11239                                 newGVOP(OP_GV, 0, gv)));
11240             }
11241             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11242             pl_yylval.ival = OP_NULL;
11243         }
11244
11245         /* If it's none of the above, it must be a literal filehandle
11246            (<Foo::BAR> or <FOO>) so build a simple readline OP */
11247         else {
11248             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
11249             PL_lex_op = readline_overriden
11250                 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11251                         op_append_elem(OP_LIST,
11252                             newGVOP(OP_GV, 0, gv),
11253                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11254                 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
11255             pl_yylval.ival = OP_NULL;
11256
11257             /* leave the token generation above to avoid confusing the parser */
11258             if (!FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) {
11259                 no_bareword_filehandle(d);
11260             }
11261         }
11262     }
11263
11264     return s;
11265 }
11266
11267
11268 /* scan_str
11269    takes:
11270         start                   position in buffer
11271         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
11272                                 only if they are of the open/close form
11273         keep_delims             preserve the delimiters around the string
11274         re_reparse              compiling a run-time /(?{})/:
11275                                    collapse // to /,  and skip encoding src
11276         delimp                  if non-null, this is set to the position of
11277                                 the closing delimiter, or just after it if
11278                                 the closing and opening delimiters differ
11279                                 (i.e., the opening delimiter of a substitu-
11280                                 tion replacement)
11281    returns: position to continue reading from buffer
11282    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11283         updates the read buffer.
11284
11285    This subroutine pulls a string out of the input.  It is called for:
11286         q               single quotes           q(literal text)
11287         '               single quotes           'literal text'
11288         qq              double quotes           qq(interpolate $here please)
11289         "               double quotes           "interpolate $here please"
11290         qx              backticks               qx(/bin/ls -l)
11291         `               backticks               `/bin/ls -l`
11292         qw              quote words             @EXPORT_OK = qw( func() $spam )
11293         m//             regexp match            m/this/
11294         s///            regexp substitute       s/this/that/
11295         tr///           string transliterate    tr/this/that/
11296         y///            string transliterate    y/this/that/
11297         ($*@)           sub prototypes          sub foo ($)
11298         (stuff)         sub attr parameters     sub foo : attr(stuff)
11299         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
11300
11301    In most of these cases (all but <>, patterns and transliterate)
11302    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
11303    calls scan_str().  s/// makes yylex() call scan_subst() which calls
11304    scan_str().  tr/// and y/// make yylex() call scan_trans() which
11305    calls scan_str().
11306
11307    It skips whitespace before the string starts, and treats the first
11308    character as the delimiter.  If the delimiter is one of ([{< then
11309    the corresponding "close" character )]}> is used as the closing
11310    delimiter.  It allows quoting of delimiters, and if the string has
11311    balanced delimiters ([{<>}]) it allows nesting.
11312
11313    On success, the SV with the resulting string is put into lex_stuff or,
11314    if that is already non-NULL, into lex_repl. The second case occurs only
11315    when parsing the RHS of the special constructs s/// and tr/// (y///).
11316    For convenience, the terminating delimiter character is stuffed into
11317    SvIVX of the SV.
11318 */
11319
11320 char *
11321 Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
11322                  char **delimp
11323     )
11324 {
11325     SV *sv;                     /* scalar value: string */
11326     const char *tmps;           /* temp string, used for delimiter matching */
11327     char *s = start;            /* current position in the buffer */
11328     char term;                  /* terminating character */
11329     char *to;                   /* current position in the sv's data */
11330     I32 brackets = 1;           /* bracket nesting level */
11331     bool d_is_utf8 = FALSE;     /* is there any utf8 content? */
11332     IV termcode;                /* terminating char. code */
11333     U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
11334     STRLEN termlen;             /* length of terminating string */
11335     line_t herelines;
11336
11337     /* The delimiters that have a mirror-image closing one */
11338     const char * opening_delims = "([{<";
11339     const char * closing_delims = ")]}>";
11340
11341     /* The only non-UTF character that isn't a stand alone grapheme is
11342      * white-space, hence can't be a delimiter. */
11343     const char * non_grapheme_msg = "Use of unassigned code point or"
11344                                     " non-standalone grapheme for a delimiter"
11345                                     " is not allowed";
11346     PERL_ARGS_ASSERT_SCAN_STR;
11347
11348     /* skip space before the delimiter */
11349     if (isSPACE(*s)) {
11350         s = skipspace(s);
11351     }
11352
11353     /* mark where we are, in case we need to report errors */
11354     CLINE;
11355
11356     /* after skipping whitespace, the next character is the terminator */
11357     term = *s;
11358     if (!UTF || UTF8_IS_INVARIANT(term)) {
11359         termcode = termstr[0] = term;
11360         termlen = 1;
11361     }
11362     else {
11363         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
11364         if (UTF && UNLIKELY(! is_grapheme((U8 *) start,
11365                                            (U8 *) s,
11366                                            (U8 *) PL_bufend,
11367                                                   termcode)))
11368         {
11369             yyerror(non_grapheme_msg);
11370         }
11371
11372         Copy(s, termstr, termlen, U8);
11373     }
11374
11375     /* mark where we are */
11376     PL_multi_start = CopLINE(PL_curcop);
11377     PL_multi_open = termcode;
11378     herelines = PL_parser->herelines;
11379
11380     /* If the delimiter has a mirror-image closing one, get it */
11381     if (term && (tmps = strchr(opening_delims, term))) {
11382         termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
11383     }
11384
11385     PL_multi_close = termcode;
11386
11387     if (PL_multi_open == PL_multi_close) {
11388         keep_bracketed_quoted = FALSE;
11389     }
11390
11391     /* create a new SV to hold the contents.  79 is the SV's initial length.
11392        What a random number. */
11393     sv = newSV_type(SVt_PVIV);
11394     SvGROW(sv, 80);
11395     SvIV_set(sv, termcode);
11396     (void)SvPOK_only(sv);               /* validate pointer */
11397
11398     /* move past delimiter and try to read a complete string */
11399     if (keep_delims)
11400         sv_catpvn(sv, s, termlen);
11401     s += termlen;
11402     for (;;) {
11403         /* extend sv if need be */
11404         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11405         /* set 'to' to the next character in the sv's string */
11406         to = SvPVX(sv)+SvCUR(sv);
11407
11408         /* if open delimiter is the close delimiter read unbridle */
11409         if (PL_multi_open == PL_multi_close) {
11410             for (; s < PL_bufend; s++,to++) {
11411                 /* embedded newlines increment the current line number */
11412                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11413                     COPLINE_INC_WITH_HERELINES;
11414                 /* handle quoted delimiters */
11415                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11416                     if (!keep_bracketed_quoted
11417                         && (s[1] == term
11418                             || (re_reparse && s[1] == '\\'))
11419                     )
11420                         s++;
11421                     else /* any other quotes are simply copied straight through */
11422                         *to++ = *s++;
11423                 }
11424                 /* terminate when run out of buffer (the for() condition), or
11425                    have found the terminator */
11426                 else if (*s == term) {  /* First byte of terminator matches */
11427                     if (termlen == 1)   /* If is the only byte, are done */
11428                         break;
11429
11430                     /* If the remainder of the terminator matches, also are
11431                      * done, after checking that is a separate grapheme */
11432                     if (   s + termlen <= PL_bufend
11433                         && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
11434                     {
11435                         if (   UTF
11436                             && UNLIKELY(! is_grapheme((U8 *) start,
11437                                                        (U8 *) s,
11438                                                        (U8 *) PL_bufend,
11439                                                               termcode)))
11440                         {
11441                             yyerror(non_grapheme_msg);
11442                         }
11443                         break;
11444                     }
11445                 }
11446                 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
11447                     d_is_utf8 = TRUE;
11448                 }
11449
11450                 *to = *s;
11451             }
11452         }
11453
11454         /* if the terminator isn't the same as the start character (e.g.,
11455            matched brackets), we have to allow more in the quoting, and
11456            be prepared for nested brackets.
11457         */
11458         else {
11459             /* read until we run out of string, or we find the terminator */
11460             for (; s < PL_bufend; s++,to++) {
11461                 /* embedded newlines increment the line count */
11462                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11463                     COPLINE_INC_WITH_HERELINES;
11464                 /* backslashes can escape the open or closing characters */
11465                 if (*s == '\\' && s+1 < PL_bufend) {
11466                     if (!keep_bracketed_quoted
11467                        && ( ((UV)s[1] == PL_multi_open)
11468                          || ((UV)s[1] == PL_multi_close) ))
11469                     {
11470                         s++;
11471                     }
11472                     else
11473                         *to++ = *s++;
11474                 }
11475                 /* allow nested opens and closes */
11476                 else if ((UV)*s == PL_multi_close && --brackets <= 0)
11477                     break;
11478                 else if ((UV)*s == PL_multi_open)
11479                     brackets++;
11480                 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11481                     d_is_utf8 = TRUE;
11482                 *to = *s;
11483             }
11484         }
11485         /* terminate the copied string and update the sv's end-of-string */
11486         *to = '\0';
11487         SvCUR_set(sv, to - SvPVX_const(sv));
11488
11489         /*
11490          * this next chunk reads more into the buffer if we're not done yet
11491          */
11492
11493         if (s < PL_bufend)
11494             break;              /* handle case where we are done yet :-) */
11495
11496 #ifndef PERL_STRICT_CR
11497         if (to - SvPVX_const(sv) >= 2) {
11498             if (   (to[-2] == '\r' && to[-1] == '\n')
11499                 || (to[-2] == '\n' && to[-1] == '\r'))
11500             {
11501                 to[-2] = '\n';
11502                 to--;
11503                 SvCUR_set(sv, to - SvPVX_const(sv));
11504             }
11505             else if (to[-1] == '\r')
11506                 to[-1] = '\n';
11507         }
11508         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11509             to[-1] = '\n';
11510 #endif
11511
11512         /* if we're out of file, or a read fails, bail and reset the current
11513            line marker so we can report where the unterminated string began
11514         */
11515         COPLINE_INC_WITH_HERELINES;
11516         PL_bufptr = PL_bufend;
11517         if (!lex_next_chunk(0)) {
11518             sv_free(sv);
11519             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11520             return NULL;
11521         }
11522         s = start = PL_bufptr;
11523     }
11524
11525     /* at this point, we have successfully read the delimited string */
11526
11527     if (keep_delims)
11528             sv_catpvn(sv, s, termlen);
11529     s += termlen;
11530
11531     if (d_is_utf8)
11532         SvUTF8_on(sv);
11533
11534     PL_multi_end = CopLINE(PL_curcop);
11535     CopLINE_set(PL_curcop, PL_multi_start);
11536     PL_parser->herelines = herelines;
11537
11538     /* if we allocated too much space, give some back */
11539     if (SvCUR(sv) + 5 < SvLEN(sv)) {
11540         SvLEN_set(sv, SvCUR(sv) + 1);
11541         SvPV_shrink_to_cur(sv);
11542     }
11543
11544     /* decide whether this is the first or second quoted string we've read
11545        for this op
11546     */
11547
11548     if (PL_lex_stuff)
11549         PL_parser->lex_sub_repl = sv;
11550     else
11551         PL_lex_stuff = sv;
11552     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
11553     return s;
11554 }
11555
11556 /*
11557   scan_num
11558   takes: pointer to position in buffer
11559   returns: pointer to new position in buffer
11560   side-effects: builds ops for the constant in pl_yylval.op
11561
11562   Read a number in any of the formats that Perl accepts:
11563
11564   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
11565   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
11566   0b[01](_?[01])*                                       binary integers
11567   0o?[0-7](_?[0-7])*                                    octal integers
11568   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
11569   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
11570
11571   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11572   thing it reads.
11573
11574   If it reads a number without a decimal point or an exponent, it will
11575   try converting the number to an integer and see if it can do so
11576   without loss of precision.
11577 */
11578
11579 char *
11580 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11581 {
11582     const char *s = start;      /* current position in buffer */
11583     char *d;                    /* destination in temp buffer */
11584     char *e;                    /* end of temp buffer */
11585     NV nv;                              /* number read, as a double */
11586     SV *sv = NULL;                      /* place to put the converted number */
11587     bool floatit;                       /* boolean: int or float? */
11588     const char *lastub = NULL;          /* position of last underbar */
11589     static const char* const number_too_long = "Number too long";
11590     bool warned_about_underscore = 0;
11591     I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
11592 #define WARN_ABOUT_UNDERSCORE() \
11593         do { \
11594             if (!warned_about_underscore) { \
11595                 warned_about_underscore = 1; \
11596                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
11597                                "Misplaced _ in number"); \
11598             } \
11599         } while(0)
11600     /* Hexadecimal floating point.
11601      *
11602      * In many places (where we have quads and NV is IEEE 754 double)
11603      * we can fit the mantissa bits of a NV into an unsigned quad.
11604      * (Note that UVs might not be quads even when we have quads.)
11605      * This will not work everywhere, though (either no quads, or
11606      * using long doubles), in which case we have to resort to NV,
11607      * which will probably mean horrible loss of precision due to
11608      * multiple fp operations. */
11609     bool hexfp = FALSE;
11610     int total_bits = 0;
11611     int significant_bits = 0;
11612 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
11613 #  define HEXFP_UQUAD
11614     Uquad_t hexfp_uquad = 0;
11615     int hexfp_frac_bits = 0;
11616 #else
11617 #  define HEXFP_NV
11618     NV hexfp_nv = 0.0;
11619 #endif
11620     NV hexfp_mult = 1.0;
11621     UV high_non_zero = 0; /* highest digit */
11622     int non_zero_integer_digits = 0;
11623     bool new_octal = FALSE;     /* octal with "0o" prefix */
11624
11625     PERL_ARGS_ASSERT_SCAN_NUM;
11626
11627     /* We use the first character to decide what type of number this is */
11628
11629     switch (*s) {
11630     default:
11631         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11632
11633     /* if it starts with a 0, it could be an octal number, a decimal in
11634        0.13 disguise, or a hexadecimal number, or a binary number. */
11635     case '0':
11636         {
11637           /* variables:
11638              u          holds the "number so far"
11639              overflowed was the number more than we can hold?
11640
11641              Shift is used when we add a digit.  It also serves as an "are
11642              we in octal/hex/binary?" indicator to disallow hex characters
11643              when in octal mode.
11644            */
11645             NV n = 0.0;
11646             UV u = 0;
11647             bool overflowed = FALSE;
11648             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
11649             bool has_digs = FALSE;
11650             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11651             static const char* const bases[5] =
11652               { "", "binary", "", "octal", "hexadecimal" };
11653             static const char* const Bases[5] =
11654               { "", "Binary", "", "Octal", "Hexadecimal" };
11655             static const char* const maxima[5] =
11656               { "",
11657                 "0b11111111111111111111111111111111",
11658                 "",
11659                 "037777777777",
11660                 "0xffffffff" };
11661
11662             /* check for hex */
11663             if (isALPHA_FOLD_EQ(s[1], 'x')) {
11664                 shift = 4;
11665                 s += 2;
11666                 just_zero = FALSE;
11667             } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
11668                 shift = 1;
11669                 s += 2;
11670                 just_zero = FALSE;
11671             }
11672             /* check for a decimal in disguise */
11673             else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
11674                 goto decimal;
11675             /* so it must be octal */
11676             else {
11677                 shift = 3;
11678                 s++;
11679                 if (isALPHA_FOLD_EQ(*s, 'o')) {
11680                     s++;
11681                     just_zero = FALSE;
11682                     new_octal = TRUE;
11683                 }
11684             }
11685
11686             if (*s == '_') {
11687                 WARN_ABOUT_UNDERSCORE();
11688                lastub = s++;
11689             }
11690
11691             /* read the rest of the number */
11692             for (;;) {
11693                 /* x is used in the overflow test,
11694                    b is the digit we're adding on. */
11695                 UV x, b;
11696
11697                 switch (*s) {
11698
11699                 /* if we don't mention it, we're done */
11700                 default:
11701                     goto out;
11702
11703                 /* _ are ignored -- but warned about if consecutive */
11704                 case '_':
11705                     if (lastub && s == lastub + 1)
11706                         WARN_ABOUT_UNDERSCORE();
11707                     lastub = s++;
11708                     break;
11709
11710                 /* 8 and 9 are not octal */
11711                 case '8': case '9':
11712                     if (shift == 3)
11713                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11714                     /* FALLTHROUGH */
11715
11716                 /* octal digits */
11717                 case '2': case '3': case '4':
11718                 case '5': case '6': case '7':
11719                     if (shift == 1)
11720                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11721                     /* FALLTHROUGH */
11722
11723                 case '0': case '1':
11724                     b = *s++ & 15;              /* ASCII digit -> value of digit */
11725                     goto digit;
11726
11727                 /* hex digits */
11728                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11729                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11730                     /* make sure they said 0x */
11731                     if (shift != 4)
11732                         goto out;
11733                     b = (*s++ & 7) + 9;
11734
11735                     /* Prepare to put the digit we have onto the end
11736                        of the number so far.  We check for overflows.
11737                     */
11738
11739                   digit:
11740                     just_zero = FALSE;
11741                     has_digs = TRUE;
11742                     if (!overflowed) {
11743                         assert(shift >= 0);
11744                         x = u << shift; /* make room for the digit */
11745
11746                         total_bits += shift;
11747
11748                         if ((x >> shift) != u
11749                             && !(PL_hints & HINT_NEW_BINARY)) {
11750                             overflowed = TRUE;
11751                             n = (NV) u;
11752                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11753                                              "Integer overflow in %s number",
11754                                              bases[shift]);
11755                         } else
11756                             u = x | b;          /* add the digit to the end */
11757                     }
11758                     if (overflowed) {
11759                         n *= nvshift[shift];
11760                         /* If an NV has not enough bits in its
11761                          * mantissa to represent an UV this summing of
11762                          * small low-order numbers is a waste of time
11763                          * (because the NV cannot preserve the
11764                          * low-order bits anyway): we could just
11765                          * remember when did we overflow and in the
11766                          * end just multiply n by the right
11767                          * amount. */
11768                         n += (NV) b;
11769                     }
11770
11771                     if (high_non_zero == 0 && b > 0)
11772                         high_non_zero = b;
11773
11774                     if (high_non_zero)
11775                         non_zero_integer_digits++;
11776
11777                     /* this could be hexfp, but peek ahead
11778                      * to avoid matching ".." */
11779                     if (UNLIKELY(HEXFP_PEEK(s))) {
11780                         goto out;
11781                     }
11782
11783                     break;
11784                 }
11785             }
11786
11787           /* if we get here, we had success: make a scalar value from
11788              the number.
11789           */
11790           out:
11791
11792             /* final misplaced underbar check */
11793             if (s[-1] == '_')
11794                 WARN_ABOUT_UNDERSCORE();
11795
11796             if (UNLIKELY(HEXFP_PEEK(s))) {
11797                 /* Do sloppy (on the underbars) but quick detection
11798                  * (and value construction) for hexfp, the decimal
11799                  * detection will shortly be more thorough with the
11800                  * underbar checks. */
11801                 const char* h = s;
11802                 significant_bits = non_zero_integer_digits * shift;
11803 #ifdef HEXFP_UQUAD
11804                 hexfp_uquad = u;
11805 #else /* HEXFP_NV */
11806                 hexfp_nv = u;
11807 #endif
11808                 /* Ignore the leading zero bits of
11809                  * the high (first) non-zero digit. */
11810                 if (high_non_zero) {
11811                     if (high_non_zero < 0x8)
11812                         significant_bits--;
11813                     if (high_non_zero < 0x4)
11814                         significant_bits--;
11815                     if (high_non_zero < 0x2)
11816                         significant_bits--;
11817                 }
11818
11819                 if (*h == '.') {
11820 #ifdef HEXFP_NV
11821                     NV nv_mult = 1.0;
11822 #endif
11823                     bool accumulate = TRUE;
11824                     U8 b;
11825                     int lim = 1 << shift;
11826                     for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
11827                                *h == '_'); h++) {
11828                         if (isXDIGIT(*h)) {
11829                             significant_bits += shift;
11830 #ifdef HEXFP_UQUAD
11831                             if (accumulate) {
11832                                 if (significant_bits < NV_MANT_DIG) {
11833                                     /* We are in the long "run" of xdigits,
11834                                      * accumulate the full four bits. */
11835                                     assert(shift >= 0);
11836                                     hexfp_uquad <<= shift;
11837                                     hexfp_uquad |= b;
11838                                     hexfp_frac_bits += shift;
11839                                 } else if (significant_bits - shift < NV_MANT_DIG) {
11840                                     /* We are at a hexdigit either at,
11841                                      * or straddling, the edge of mantissa.
11842                                      * We will try grabbing as many as
11843                                      * possible bits. */
11844                                     int tail =
11845                                       significant_bits - NV_MANT_DIG;
11846                                     if (tail <= 0)
11847                                        tail += shift;
11848                                     assert(tail >= 0);
11849                                     hexfp_uquad <<= tail;
11850                                     assert((shift - tail) >= 0);
11851                                     hexfp_uquad |= b >> (shift - tail);
11852                                     hexfp_frac_bits += tail;
11853
11854                                     /* Ignore the trailing zero bits
11855                                      * of the last non-zero xdigit.
11856                                      *
11857                                      * The assumption here is that if
11858                                      * one has input of e.g. the xdigit
11859                                      * eight (0x8), there is only one
11860                                      * bit being input, not the full
11861                                      * four bits.  Conversely, if one
11862                                      * specifies a zero xdigit, the
11863                                      * assumption is that one really
11864                                      * wants all those bits to be zero. */
11865                                     if (b) {
11866                                         if ((b & 0x1) == 0x0) {
11867                                             significant_bits--;
11868                                             if ((b & 0x2) == 0x0) {
11869                                                 significant_bits--;
11870                                                 if ((b & 0x4) == 0x0) {
11871                                                     significant_bits--;
11872                                                 }
11873                                             }
11874                                         }
11875                                     }
11876
11877                                     accumulate = FALSE;
11878                                 }
11879                             } else {
11880                                 /* Keep skipping the xdigits, and
11881                                  * accumulating the significant bits,
11882                                  * but do not shift the uquad
11883                                  * (which would catastrophically drop
11884                                  * high-order bits) or accumulate the
11885                                  * xdigits anymore. */
11886                             }
11887 #else /* HEXFP_NV */
11888                             if (accumulate) {
11889                                 nv_mult /= nvshift[shift];
11890                                 if (nv_mult > 0.0)
11891                                     hexfp_nv += b * nv_mult;
11892                                 else
11893                                     accumulate = FALSE;
11894                             }
11895 #endif
11896                         }
11897                         if (significant_bits >= NV_MANT_DIG)
11898                             accumulate = FALSE;
11899                     }
11900                 }
11901
11902                 if ((total_bits > 0 || significant_bits > 0) &&
11903                     isALPHA_FOLD_EQ(*h, 'p')) {
11904                     bool negexp = FALSE;
11905                     h++;
11906                     if (*h == '+')
11907                         h++;
11908                     else if (*h == '-') {
11909                         negexp = TRUE;
11910                         h++;
11911                     }
11912                     if (isDIGIT(*h)) {
11913                         I32 hexfp_exp = 0;
11914                         while (isDIGIT(*h) || *h == '_') {
11915                             if (isDIGIT(*h)) {
11916                                 hexfp_exp *= 10;
11917                                 hexfp_exp += *h - '0';
11918 #ifdef NV_MIN_EXP
11919                                 if (negexp
11920                                     && -hexfp_exp < NV_MIN_EXP - 1) {
11921                                     /* NOTE: this means that the exponent
11922                                      * underflow warning happens for
11923                                      * the IEEE 754 subnormals (denormals),
11924                                      * because DBL_MIN_EXP etc are the lowest
11925                                      * possible binary (or, rather, DBL_RADIX-base)
11926                                      * exponent for normals, not subnormals.
11927                                      *
11928                                      * This may or may not be a good thing. */
11929                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11930                                                    "Hexadecimal float: exponent underflow");
11931                                     break;
11932                                 }
11933 #endif
11934 #ifdef NV_MAX_EXP
11935                                 if (!negexp
11936                                     && hexfp_exp > NV_MAX_EXP - 1) {
11937                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11938                                                    "Hexadecimal float: exponent overflow");
11939                                     break;
11940                                 }
11941 #endif
11942                             }
11943                             h++;
11944                         }
11945                         if (negexp)
11946                             hexfp_exp = -hexfp_exp;
11947 #ifdef HEXFP_UQUAD
11948                         hexfp_exp -= hexfp_frac_bits;
11949 #endif
11950                         hexfp_mult = Perl_pow(2.0, hexfp_exp);
11951                         hexfp = TRUE;
11952                         goto decimal;
11953                     }
11954                 }
11955             }
11956
11957             if (!just_zero && !has_digs) {
11958                 /* 0x, 0o or 0b with no digits, treat it as an error.
11959                    Originally this backed up the parse before the b or
11960                    x, but that has the potential for silent changes in
11961                    behaviour, like for: "0x.3" and "0x+$foo".
11962                 */
11963                 const char *d = s;
11964                 char *oldbp = PL_bufptr;
11965                 if (*d) ++d; /* so the user sees the bad non-digit */
11966                 PL_bufptr = (char *)d; /* so yyerror reports the context */
11967                 yyerror(Perl_form(aTHX_ "No digits found for %s literal",
11968                                   bases[shift]));
11969                 PL_bufptr = oldbp;
11970             }
11971
11972             if (overflowed) {
11973                 if (n > 4294967295.0)
11974                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11975                                    "%s number > %s non-portable",
11976                                    Bases[shift],
11977                                    new_octal ? "0o37777777777" : maxima[shift]);
11978                 sv = newSVnv(n);
11979             }
11980             else {
11981 #if UVSIZE > 4
11982                 if (u > 0xffffffff)
11983                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11984                                    "%s number > %s non-portable",
11985                                    Bases[shift],
11986                                    new_octal ? "0o37777777777" : maxima[shift]);
11987 #endif
11988                 sv = newSVuv(u);
11989             }
11990             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11991                 sv = new_constant(start, s - start, "integer",
11992                                   sv, NULL, NULL, 0, NULL);
11993             else if (PL_hints & HINT_NEW_BINARY)
11994                 sv = new_constant(start, s - start, "binary",
11995                                   sv, NULL, NULL, 0, NULL);
11996         }
11997         break;
11998
11999     /*
12000       handle decimal numbers.
12001       we're also sent here when we read a 0 as the first digit
12002     */
12003     case '1': case '2': case '3': case '4': case '5':
12004     case '6': case '7': case '8': case '9': case '.':
12005       decimal:
12006         d = PL_tokenbuf;
12007         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12008         floatit = FALSE;
12009         if (hexfp) {
12010             floatit = TRUE;
12011             *d++ = '0';
12012             switch (shift) {
12013             case 4:
12014                 *d++ = 'x';
12015                 s = start + 2;
12016                 break;
12017             case 3:
12018                 if (new_octal) {
12019                     *d++ = 'o';
12020                     s = start + 2;
12021                     break;
12022                 }
12023                 s = start + 1;
12024                 break;
12025             case 1:
12026                 *d++ = 'b';
12027                 s = start + 2;
12028                 break;
12029             default:
12030                 NOT_REACHED; /* NOTREACHED */
12031             }
12032         }
12033
12034         /* read next group of digits and _ and copy into d */
12035         while (isDIGIT(*s)
12036                || *s == '_'
12037                || UNLIKELY(hexfp && isXDIGIT(*s)))
12038         {
12039             /* skip underscores, checking for misplaced ones
12040                if -w is on
12041             */
12042             if (*s == '_') {
12043                 if (lastub && s == lastub + 1)
12044                     WARN_ABOUT_UNDERSCORE();
12045                 lastub = s++;
12046             }
12047             else {
12048                 /* check for end of fixed-length buffer */
12049                 if (d >= e)
12050                     Perl_croak(aTHX_ "%s", number_too_long);
12051                 /* if we're ok, copy the character */
12052                 *d++ = *s++;
12053             }
12054         }
12055
12056         /* final misplaced underbar check */
12057         if (lastub && s == lastub + 1)
12058             WARN_ABOUT_UNDERSCORE();
12059
12060         /* read a decimal portion if there is one.  avoid
12061            3..5 being interpreted as the number 3. followed
12062            by .5
12063         */
12064         if (*s == '.' && s[1] != '.') {
12065             floatit = TRUE;
12066             *d++ = *s++;
12067
12068             if (*s == '_') {
12069                 WARN_ABOUT_UNDERSCORE();
12070                 lastub = s;
12071             }
12072
12073             /* copy, ignoring underbars, until we run out of digits.
12074             */
12075             for (; isDIGIT(*s)
12076                    || *s == '_'
12077                    || UNLIKELY(hexfp && isXDIGIT(*s));
12078                  s++)
12079             {
12080                 /* fixed length buffer check */
12081                 if (d >= e)
12082                     Perl_croak(aTHX_ "%s", number_too_long);
12083                 if (*s == '_') {
12084                    if (lastub && s == lastub + 1)
12085                         WARN_ABOUT_UNDERSCORE();
12086                    lastub = s;
12087                 }
12088                 else
12089                     *d++ = *s;
12090             }
12091             /* fractional part ending in underbar? */
12092             if (s[-1] == '_')
12093                 WARN_ABOUT_UNDERSCORE();
12094             if (*s == '.' && isDIGIT(s[1])) {
12095                 /* oops, it's really a v-string, but without the "v" */
12096                 s = start;
12097                 goto vstring;
12098             }
12099         }
12100
12101         /* read exponent part, if present */
12102         if ((isALPHA_FOLD_EQ(*s, 'e')
12103               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
12104             && memCHRs("+-0123456789_", s[1]))
12105         {
12106             int exp_digits = 0;
12107             const char *save_s = s;
12108             char * save_d = d;
12109
12110             /* regardless of whether user said 3E5 or 3e5, use lower 'e',
12111                ditto for p (hexfloats) */
12112             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
12113                 /* At least some Mach atof()s don't grok 'E' */
12114                 *d++ = 'e';
12115             }
12116             else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
12117                 *d++ = 'p';
12118             }
12119
12120             s++;
12121
12122
12123             /* stray preinitial _ */
12124             if (*s == '_') {
12125                 WARN_ABOUT_UNDERSCORE();
12126                 lastub = s++;
12127             }
12128
12129             /* allow positive or negative exponent */
12130             if (*s == '+' || *s == '-')
12131                 *d++ = *s++;
12132
12133             /* stray initial _ */
12134             if (*s == '_') {
12135                 WARN_ABOUT_UNDERSCORE();
12136                 lastub = s++;
12137             }
12138
12139             /* read digits of exponent */
12140             while (isDIGIT(*s) || *s == '_') {
12141                 if (isDIGIT(*s)) {
12142                     ++exp_digits;
12143                     if (d >= e)
12144                         Perl_croak(aTHX_ "%s", number_too_long);
12145                     *d++ = *s++;
12146                 }
12147                 else {
12148                    if (((lastub && s == lastub + 1)
12149                         || (!isDIGIT(s[1]) && s[1] != '_')))
12150                         WARN_ABOUT_UNDERSCORE();
12151                    lastub = s++;
12152                 }
12153             }
12154
12155             if (!exp_digits) {
12156                 /* no exponent digits, the [eEpP] could be for something else,
12157                  * though in practice we don't get here for p since that's preparsed
12158                  * earlier, and results in only the 0xX being consumed, so behave similarly
12159                  * for decimal floats and consume only the D.DD, leaving the [eE] to the
12160                  * next token.
12161                  */
12162                 s = save_s;
12163                 d = save_d;
12164             }
12165             else {
12166                 floatit = TRUE;
12167             }
12168         }
12169
12170
12171         /*
12172            We try to do an integer conversion first if no characters
12173            indicating "float" have been found.
12174          */
12175
12176         if (!floatit) {
12177             UV uv;
12178             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12179
12180             if (flags == IS_NUMBER_IN_UV) {
12181               if (uv <= IV_MAX)
12182                 sv = newSViv(uv); /* Prefer IVs over UVs. */
12183               else
12184                 sv = newSVuv(uv);
12185             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12186               if (uv <= (UV) IV_MIN)
12187                 sv = newSViv(-(IV)uv);
12188               else
12189                 floatit = TRUE;
12190             } else
12191               floatit = TRUE;
12192         }
12193         if (floatit) {
12194             /* terminate the string */
12195             *d = '\0';
12196             if (UNLIKELY(hexfp)) {
12197 #  ifdef NV_MANT_DIG
12198                 if (significant_bits > NV_MANT_DIG)
12199                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12200                                    "Hexadecimal float: mantissa overflow");
12201 #  endif
12202 #ifdef HEXFP_UQUAD
12203                 nv = hexfp_uquad * hexfp_mult;
12204 #else /* HEXFP_NV */
12205                 nv = hexfp_nv * hexfp_mult;
12206 #endif
12207             } else {
12208                 nv = Atof(PL_tokenbuf);
12209             }
12210             sv = newSVnv(nv);
12211         }
12212
12213         if ( floatit
12214              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12215             const char *const key = floatit ? "float" : "integer";
12216             const STRLEN keylen = floatit ? 5 : 7;
12217             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12218                                 key, keylen, sv, NULL, NULL, 0, NULL);
12219         }
12220         break;
12221
12222     /* if it starts with a v, it could be a v-string */
12223     case 'v':
12224     vstring:
12225                 sv = newSV(5); /* preallocate storage space */
12226                 ENTER_with_name("scan_vstring");
12227                 SAVEFREESV(sv);
12228                 s = scan_vstring(s, PL_bufend, sv);
12229                 SvREFCNT_inc_simple_void_NN(sv);
12230                 LEAVE_with_name("scan_vstring");
12231         break;
12232     }
12233
12234     /* make the op for the constant and return */
12235
12236     if (sv)
12237         lvalp->opval = newSVOP(OP_CONST, 0, sv);
12238     else
12239         lvalp->opval = NULL;
12240
12241     return (char *)s;
12242 }
12243
12244 STATIC char *
12245 S_scan_formline(pTHX_ char *s)
12246 {
12247     SV * const stuff = newSVpvs("");
12248     bool needargs = FALSE;
12249     bool eofmt = FALSE;
12250
12251     PERL_ARGS_ASSERT_SCAN_FORMLINE;
12252
12253     while (!needargs) {
12254         char *eol;
12255         if (*s == '.') {
12256             char *t = s+1;
12257 #ifdef PERL_STRICT_CR
12258             while (SPACE_OR_TAB(*t))
12259                 t++;
12260 #else
12261             while (SPACE_OR_TAB(*t) || *t == '\r')
12262                 t++;
12263 #endif
12264             if (*t == '\n' || t == PL_bufend) {
12265                 eofmt = TRUE;
12266                 break;
12267             }
12268         }
12269         eol = (char *) memchr(s,'\n',PL_bufend-s);
12270         if (!eol++)
12271                 eol = PL_bufend;
12272         if (*s != '#') {
12273             char *t;
12274             for (t = s; t < eol; t++) {
12275                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12276                     needargs = FALSE;
12277                     goto enough;        /* ~~ must be first line in formline */
12278                 }
12279                 if (*t == '@' || *t == '^')
12280                     needargs = TRUE;
12281             }
12282             if (eol > s) {
12283                 sv_catpvn(stuff, s, eol-s);
12284 #ifndef PERL_STRICT_CR
12285                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12286                     char *end = SvPVX(stuff) + SvCUR(stuff);
12287                     end[-2] = '\n';
12288                     end[-1] = '\0';
12289                     SvCUR_set(stuff, SvCUR(stuff) - 1);
12290                 }
12291 #endif
12292             }
12293             else
12294               break;
12295         }
12296         s = (char*)eol;
12297         if ((PL_rsfp || PL_parser->filtered)
12298          && PL_parser->form_lex_state == LEX_NORMAL) {
12299             bool got_some;
12300             PL_bufptr = PL_bufend;
12301             COPLINE_INC_WITH_HERELINES;
12302             got_some = lex_next_chunk(0);
12303             CopLINE_dec(PL_curcop);
12304             s = PL_bufptr;
12305             if (!got_some)
12306                 break;
12307         }
12308         incline(s, PL_bufend);
12309     }
12310   enough:
12311     if (!SvCUR(stuff) || needargs)
12312         PL_lex_state = PL_parser->form_lex_state;
12313     if (SvCUR(stuff)) {
12314         PL_expect = XSTATE;
12315         if (needargs) {
12316             const char *s2 = s;
12317             while (isSPACE(*s2) && *s2 != '\n')
12318                 s2++;
12319             if (*s2 == '{') {
12320                 PL_expect = XTERMBLOCK;
12321                 NEXTVAL_NEXTTOKE.ival = 0;
12322                 force_next(DO);
12323             }
12324             NEXTVAL_NEXTTOKE.ival = 0;
12325             force_next(FORMLBRACK);
12326         }
12327         if (!IN_BYTES) {
12328             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12329                 SvUTF8_on(stuff);
12330         }
12331         NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
12332         force_next(THING);
12333     }
12334     else {
12335         SvREFCNT_dec(stuff);
12336         if (eofmt)
12337             PL_lex_formbrack = 0;
12338     }
12339     return s;
12340 }
12341
12342 I32
12343 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12344 {
12345     const I32 oldsavestack_ix = PL_savestack_ix;
12346     CV* const outsidecv = PL_compcv;
12347
12348     SAVEI32(PL_subline);
12349     save_item(PL_subname);
12350     SAVESPTR(PL_compcv);
12351
12352     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12353     CvFLAGS(PL_compcv) |= flags;
12354
12355     PL_subline = CopLINE(PL_curcop);
12356     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12357     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12358     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12359     if (outsidecv && CvPADLIST(outsidecv))
12360         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
12361
12362     return oldsavestack_ix;
12363 }
12364
12365
12366 /* Do extra initialisation of a CV (typically one just created by
12367  * start_subparse()) if that CV is for a named sub
12368  */
12369
12370 void
12371 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
12372 {
12373     PERL_ARGS_ASSERT_INIT_NAMED_CV;
12374
12375     if (nameop->op_type == OP_CONST) {
12376         const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
12377         if (   strEQ(name, "BEGIN")
12378             || strEQ(name, "END")
12379             || strEQ(name, "INIT")
12380             || strEQ(name, "CHECK")
12381             || strEQ(name, "UNITCHECK")
12382         )
12383           CvSPECIAL_on(cv);
12384     }
12385     else
12386     /* State subs inside anonymous subs need to be
12387      clonable themselves. */
12388     if (   CvANON(CvOUTSIDE(cv))
12389         || CvCLONE(CvOUTSIDE(cv))
12390         || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
12391                         CvOUTSIDE(cv)
12392                      ))[nameop->op_targ])
12393     )
12394       CvCLONE_on(cv);
12395 }
12396
12397
12398 static int
12399 S_yywarn(pTHX_ const char *const s, U32 flags)
12400 {
12401     PERL_ARGS_ASSERT_YYWARN;
12402
12403     PL_in_eval |= EVAL_WARNONLY;
12404     yyerror_pv(s, flags);
12405     return 0;
12406 }
12407
12408 void
12409 Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
12410 {
12411     PERL_ARGS_ASSERT_ABORT_EXECUTION;
12412
12413     if (PL_minus_c)
12414         Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
12415     else {
12416         Perl_croak(aTHX_
12417                 "%sExecution of %s aborted due to compilation errors.\n", msg, name);
12418     }
12419     NOT_REACHED; /* NOTREACHED */
12420 }
12421
12422 void
12423 Perl_yyquit(pTHX)
12424 {
12425     /* Called, after at least one error has been found, to abort the parse now,
12426      * instead of trying to forge ahead */
12427
12428     yyerror_pvn(NULL, 0, 0);
12429 }
12430
12431 int
12432 Perl_yyerror(pTHX_ const char *const s)
12433 {
12434     PERL_ARGS_ASSERT_YYERROR;
12435     return yyerror_pvn(s, strlen(s), 0);
12436 }
12437
12438 int
12439 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
12440 {
12441     PERL_ARGS_ASSERT_YYERROR_PV;
12442     return yyerror_pvn(s, strlen(s), flags);
12443 }
12444
12445 int
12446 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
12447 {
12448     const char *context = NULL;
12449     int contlen = -1;
12450     SV *msg;
12451     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
12452     int yychar  = PL_parser->yychar;
12453
12454     /* Output error message 's' with length 'len'.  'flags' are SV flags that
12455      * apply.  If the number of errors found is large enough, it abandons
12456      * parsing.  If 's' is NULL, there is no message, and it abandons
12457      * processing unconditionally */
12458
12459     if (s != NULL) {
12460         if (!yychar || (yychar == PERLY_SEMICOLON && !PL_rsfp))
12461             sv_catpvs(where_sv, "at EOF");
12462         else if (   PL_oldoldbufptr
12463                  && PL_bufptr > PL_oldoldbufptr
12464                  && PL_bufptr - PL_oldoldbufptr < 200
12465                  && PL_oldoldbufptr != PL_oldbufptr
12466                  && PL_oldbufptr != PL_bufptr)
12467         {
12468             while (isSPACE(*PL_oldoldbufptr))
12469                 PL_oldoldbufptr++;
12470             context = PL_oldoldbufptr;
12471             contlen = PL_bufptr - PL_oldoldbufptr;
12472         }
12473         else if (  PL_oldbufptr
12474                 && PL_bufptr > PL_oldbufptr
12475                 && PL_bufptr - PL_oldbufptr < 200
12476                 && PL_oldbufptr != PL_bufptr)
12477         {
12478             while (isSPACE(*PL_oldbufptr))
12479                 PL_oldbufptr++;
12480             context = PL_oldbufptr;
12481             contlen = PL_bufptr - PL_oldbufptr;
12482         }
12483         else if (yychar > 255)
12484             sv_catpvs(where_sv, "next token ???");
12485         else if (yychar == YYEMPTY) {
12486             if (PL_lex_state == LEX_NORMAL)
12487                 sv_catpvs(where_sv, "at end of line");
12488             else if (PL_lex_inpat)
12489                 sv_catpvs(where_sv, "within pattern");
12490             else
12491                 sv_catpvs(where_sv, "within string");
12492         }
12493         else {
12494             sv_catpvs(where_sv, "next char ");
12495             if (yychar < 32)
12496                 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12497             else if (isPRINT_LC(yychar)) {
12498                 const char string = yychar;
12499                 sv_catpvn(where_sv, &string, 1);
12500             }
12501             else
12502                 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12503         }
12504         msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
12505         Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
12506             OutCopFILE(PL_curcop),
12507             (IV)(PL_parser->preambling == NOLINE
12508                    ? CopLINE(PL_curcop)
12509                    : PL_parser->preambling));
12510         if (context)
12511             Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
12512                                  UTF8fARG(UTF, contlen, context));
12513         else
12514             Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
12515         if (   PL_multi_start < PL_multi_end
12516             && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
12517         {
12518             Perl_sv_catpvf(aTHX_ msg,
12519             "  (Might be a runaway multi-line %c%c string starting on"
12520             " line %" IVdf ")\n",
12521                     (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12522             PL_multi_end = 0;
12523         }
12524         if (PL_in_eval & EVAL_WARNONLY) {
12525             PL_in_eval &= ~EVAL_WARNONLY;
12526             Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
12527         }
12528         else {
12529             qerror(msg);
12530         }
12531     }
12532     if (s == NULL || PL_error_count >= 10) {
12533         const char * msg = "";
12534         const char * const name = OutCopFILE(PL_curcop);
12535
12536         if (PL_in_eval) {
12537             SV * errsv = ERRSV;
12538             if (SvCUR(errsv)) {
12539                 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
12540             }
12541         }
12542
12543         if (s == NULL) {
12544             abort_execution(msg, name);
12545         }
12546         else {
12547             Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
12548         }
12549     }
12550     PL_in_my = 0;
12551     PL_in_my_stash = NULL;
12552     return 0;
12553 }
12554
12555 STATIC char*
12556 S_swallow_bom(pTHX_ U8 *s)
12557 {
12558     const STRLEN slen = SvCUR(PL_linestr);
12559
12560     PERL_ARGS_ASSERT_SWALLOW_BOM;
12561
12562     switch (s[0]) {
12563     case 0xFF:
12564         if (s[1] == 0xFE) {
12565             /* UTF-16 little-endian? (or UTF-32LE?) */
12566             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
12567                 /* diag_listed_as: Unsupported script encoding %s */
12568                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
12569 #ifndef PERL_NO_UTF16_FILTER
12570 #ifdef DEBUGGING
12571             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
12572 #endif
12573             s += 2;
12574             if (PL_bufend > (char*)s) {
12575                 s = add_utf16_textfilter(s, TRUE);
12576             }
12577 #else
12578             /* diag_listed_as: Unsupported script encoding %s */
12579             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12580 #endif
12581         }
12582         break;
12583     case 0xFE:
12584         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
12585 #ifndef PERL_NO_UTF16_FILTER
12586 #ifdef DEBUGGING
12587             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12588 #endif
12589             s += 2;
12590             if (PL_bufend > (char *)s) {
12591                 s = add_utf16_textfilter(s, FALSE);
12592             }
12593 #else
12594             /* diag_listed_as: Unsupported script encoding %s */
12595             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12596 #endif
12597         }
12598         break;
12599     case BOM_UTF8_FIRST_BYTE: {
12600         if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
12601 #ifdef DEBUGGING
12602             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12603 #endif
12604             s += sizeof(BOM_UTF8) - 1;                     /* UTF-8 */
12605         }
12606         break;
12607     }
12608     case 0:
12609         if (slen > 3) {
12610              if (s[1] == 0) {
12611                   if (s[2] == 0xFE && s[3] == 0xFF) {
12612                        /* UTF-32 big-endian */
12613                        /* diag_listed_as: Unsupported script encoding %s */
12614                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
12615                   }
12616              }
12617              else if (s[2] == 0 && s[3] != 0) {
12618                   /* Leading bytes
12619                    * 00 xx 00 xx
12620                    * are a good indicator of UTF-16BE. */
12621 #ifndef PERL_NO_UTF16_FILTER
12622 #ifdef DEBUGGING
12623                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12624 #endif
12625                   s = add_utf16_textfilter(s, FALSE);
12626 #else
12627                   /* diag_listed_as: Unsupported script encoding %s */
12628                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12629 #endif
12630              }
12631         }
12632         break;
12633
12634     default:
12635          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12636                   /* Leading bytes
12637                    * xx 00 xx 00
12638                    * are a good indicator of UTF-16LE. */
12639 #ifndef PERL_NO_UTF16_FILTER
12640 #ifdef DEBUGGING
12641               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12642 #endif
12643               s = add_utf16_textfilter(s, TRUE);
12644 #else
12645               /* diag_listed_as: Unsupported script encoding %s */
12646               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12647 #endif
12648          }
12649     }
12650     return (char*)s;
12651 }
12652
12653
12654 #ifndef PERL_NO_UTF16_FILTER
12655 static I32
12656 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12657 {
12658     SV *const filter = FILTER_DATA(idx);
12659     /* We re-use this each time round, throwing the contents away before we
12660        return.  */
12661     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
12662     SV *const utf8_buffer = filter;
12663     IV status = IoPAGE(filter);
12664     const bool reverse = cBOOL(IoLINES(filter));
12665     I32 retval;
12666
12667     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
12668
12669     /* As we're automatically added, at the lowest level, and hence only called
12670        from this file, we can be sure that we're not called in block mode. Hence
12671        don't bother writing code to deal with block mode.  */
12672     if (maxlen) {
12673         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
12674     }
12675     if (status < 0) {
12676         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
12677     }
12678     DEBUG_P(PerlIO_printf(Perl_debug_log,
12679                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12680                           FPTR2DPTR(void *, S_utf16_textfilter),
12681                           reverse ? 'l' : 'b', idx, maxlen, status,
12682                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12683
12684     while (1) {
12685         STRLEN chars;
12686         STRLEN have;
12687         Size_t newlen;
12688         U8 *end;
12689         /* First, look in our buffer of existing UTF-8 data:  */
12690         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
12691
12692         if (nl) {
12693             ++nl;
12694         } else if (status == 0) {
12695             /* EOF */
12696             IoPAGE(filter) = 0;
12697             nl = SvEND(utf8_buffer);
12698         }
12699         if (nl) {
12700             STRLEN got = nl - SvPVX(utf8_buffer);
12701             /* Did we have anything to append?  */
12702             retval = got != 0;
12703             sv_catpvn(sv, SvPVX(utf8_buffer), got);
12704             /* Everything else in this code works just fine if SVp_POK isn't
12705                set.  This, however, needs it, and we need it to work, else
12706                we loop infinitely because the buffer is never consumed.  */
12707             sv_chop(utf8_buffer, nl);
12708             break;
12709         }
12710
12711         /* OK, not a complete line there, so need to read some more UTF-16.
12712            Read an extra octect if the buffer currently has an odd number. */
12713         while (1) {
12714             if (status <= 0)
12715                 break;
12716             if (SvCUR(utf16_buffer) >= 2) {
12717                 /* Location of the high octet of the last complete code point.
12718                    Gosh, UTF-16 is a pain. All the benefits of variable length,
12719                    *coupled* with all the benefits of partial reads and
12720                    endianness.  */
12721                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
12722                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
12723
12724                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
12725                     break;
12726                 }
12727
12728                 /* We have the first half of a surrogate. Read more.  */
12729                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
12730             }
12731
12732             status = FILTER_READ(idx + 1, utf16_buffer,
12733                                  160 + (SvCUR(utf16_buffer) & 1));
12734             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
12735             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
12736             if (status < 0) {
12737                 /* Error */
12738                 IoPAGE(filter) = status;
12739                 return status;
12740             }
12741         }
12742
12743         /* 'chars' isn't quite the right name, as code points above 0xFFFF
12744          * require 4 bytes per char */
12745         chars = SvCUR(utf16_buffer) >> 1;
12746         have = SvCUR(utf8_buffer);
12747
12748         /* Assume the worst case size as noted by the functions: twice the
12749          * number of input bytes */
12750         SvGROW(utf8_buffer, have + chars * 4 + 1);
12751
12752         if (reverse) {
12753             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
12754                                          (U8*)SvPVX_const(utf8_buffer) + have,
12755                                          chars * 2, &newlen);
12756         } else {
12757             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
12758                                 (U8*)SvPVX_const(utf8_buffer) + have,
12759                                 chars * 2, &newlen);
12760         }
12761         SvCUR_set(utf8_buffer, have + newlen);
12762         *end = '\0';
12763
12764         /* No need to keep this SV "well-formed" with a '\0' after the end, as
12765            it's private to us, and utf16_to_utf8{,reversed} take a
12766            (pointer,length) pair, rather than a NUL-terminated string.  */
12767         if(SvCUR(utf16_buffer) & 1) {
12768             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
12769             SvCUR_set(utf16_buffer, 1);
12770         } else {
12771             SvCUR_set(utf16_buffer, 0);
12772         }
12773     }
12774     DEBUG_P(PerlIO_printf(Perl_debug_log,
12775                           "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12776                           status,
12777                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12778     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
12779     return retval;
12780 }
12781
12782 static U8 *
12783 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
12784 {
12785     SV *filter = filter_add(S_utf16_textfilter, NULL);
12786
12787     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
12788
12789     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
12790     SvPVCLEAR(filter);
12791     IoLINES(filter) = reversed;
12792     IoPAGE(filter) = 1; /* Not EOF */
12793
12794     /* Sadly, we have to return a valid pointer, come what may, so we have to
12795        ignore any error return from this.  */
12796     SvCUR_set(PL_linestr, 0);
12797     if (FILTER_READ(0, PL_linestr, 0)) {
12798         SvUTF8_on(PL_linestr);
12799     } else {
12800         SvUTF8_on(PL_linestr);
12801     }
12802     PL_bufend = SvEND(PL_linestr);
12803     return (U8*)SvPVX(PL_linestr);
12804 }
12805 #endif
12806
12807 /*
12808 Returns a pointer to the next character after the parsed
12809 vstring, as well as updating the passed in sv.
12810
12811 Function must be called like
12812
12813         sv = sv_2mortal(newSV(5));
12814         s = scan_vstring(s,e,sv);
12815
12816 where s and e are the start and end of the string.
12817 The sv should already be large enough to store the vstring
12818 passed in, for performance reasons.
12819
12820 This function may croak if fatal warnings are enabled in the
12821 calling scope, hence the sv_2mortal in the example (to prevent
12822 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
12823 sv_2mortal.
12824
12825 */
12826
12827 char *
12828 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12829 {
12830     const char *pos = s;
12831     const char *start = s;
12832
12833     PERL_ARGS_ASSERT_SCAN_VSTRING;
12834
12835     if (*pos == 'v') pos++;  /* get past 'v' */
12836     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12837         pos++;
12838     if ( *pos != '.') {
12839         /* this may not be a v-string if followed by => */
12840         const char *next = pos;
12841         while (next < e && isSPACE(*next))
12842             ++next;
12843         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12844             /* return string not v-string */
12845             sv_setpvn(sv,(char *)s,pos-s);
12846             return (char *)pos;
12847         }
12848     }
12849
12850     if (!isALPHA(*pos)) {
12851         U8 tmpbuf[UTF8_MAXBYTES+1];
12852
12853         if (*s == 'v')
12854             s++;  /* get past 'v' */
12855
12856         SvPVCLEAR(sv);
12857
12858         for (;;) {
12859             /* this is atoi() that tolerates underscores */
12860             U8 *tmpend;
12861             UV rev = 0;
12862             const char *end = pos;
12863             UV mult = 1;
12864             while (--end >= s) {
12865                 if (*end != '_') {
12866                     const UV orev = rev;
12867                     rev += (*end - '0') * mult;
12868                     mult *= 10;
12869                     if (orev > rev)
12870                         /* diag_listed_as: Integer overflow in %s number */
12871                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12872                                          "Integer overflow in decimal number");
12873                 }
12874             }
12875
12876             /* Append native character for the rev point */
12877             tmpend = uvchr_to_utf8(tmpbuf, rev);
12878             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12879             if (!UVCHR_IS_INVARIANT(rev))
12880                  SvUTF8_on(sv);
12881             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12882                  s = ++pos;
12883             else {
12884                  s = pos;
12885                  break;
12886             }
12887             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12888                  pos++;
12889         }
12890         SvPOK_on(sv);
12891         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12892         SvRMAGICAL_on(sv);
12893     }
12894     return (char *)s;
12895 }
12896
12897 int
12898 Perl_keyword_plugin_standard(pTHX_
12899         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12900 {
12901     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
12902     PERL_UNUSED_CONTEXT;
12903     PERL_UNUSED_ARG(keyword_ptr);
12904     PERL_UNUSED_ARG(keyword_len);
12905     PERL_UNUSED_ARG(op_ptr);
12906     return KEYWORD_PLUGIN_DECLINE;
12907 }
12908
12909 /*
12910 =for apidoc wrap_keyword_plugin
12911
12912 Puts a C function into the chain of keyword plugins.  This is the
12913 preferred way to manipulate the L</PL_keyword_plugin> variable.
12914 C<new_plugin> is a pointer to the C function that is to be added to the
12915 keyword plugin chain, and C<old_plugin_p> points to the storage location
12916 where a pointer to the next function in the chain will be stored.  The
12917 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
12918 while the value previously stored there is written to C<*old_plugin_p>.
12919
12920 L</PL_keyword_plugin> is global to an entire process, and a module wishing
12921 to hook keyword parsing may find itself invoked more than once per
12922 process, typically in different threads.  To handle that situation, this
12923 function is idempotent.  The location C<*old_plugin_p> must initially
12924 (once per process) contain a null pointer.  A C variable of static
12925 duration (declared at file scope, typically also marked C<static> to give
12926 it internal linkage) will be implicitly initialised appropriately, if it
12927 does not have an explicit initialiser.  This function will only actually
12928 modify the plugin chain if it finds C<*old_plugin_p> to be null.  This
12929 function is also thread safe on the small scale.  It uses appropriate
12930 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
12931
12932 When this function is called, the function referenced by C<new_plugin>
12933 must be ready to be called, except for C<*old_plugin_p> being unfilled.
12934 In a threading situation, C<new_plugin> may be called immediately, even
12935 before this function has returned.  C<*old_plugin_p> will always be
12936 appropriately set before C<new_plugin> is called.  If C<new_plugin>
12937 decides not to do anything special with the identifier that it is given
12938 (which is the usual case for most calls to a keyword plugin), it must
12939 chain the plugin function referenced by C<*old_plugin_p>.
12940
12941 Taken all together, XS code to install a keyword plugin should typically
12942 look something like this:
12943
12944     static Perl_keyword_plugin_t next_keyword_plugin;
12945     static OP *my_keyword_plugin(pTHX_
12946         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12947     {
12948         if (memEQs(keyword_ptr, keyword_len,
12949                    "my_new_keyword")) {
12950             ...
12951         } else {
12952             return next_keyword_plugin(aTHX_
12953                 keyword_ptr, keyword_len, op_ptr);
12954         }
12955     }
12956     BOOT:
12957         wrap_keyword_plugin(my_keyword_plugin,
12958                             &next_keyword_plugin);
12959
12960 Direct access to L</PL_keyword_plugin> should be avoided.
12961
12962 =cut
12963 */
12964
12965 void
12966 Perl_wrap_keyword_plugin(pTHX_
12967     Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
12968 {
12969
12970     PERL_UNUSED_CONTEXT;
12971     PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
12972     if (*old_plugin_p) return;
12973     KEYWORD_PLUGIN_MUTEX_LOCK;
12974     if (!*old_plugin_p) {
12975         *old_plugin_p = PL_keyword_plugin;
12976         PL_keyword_plugin = new_plugin;
12977     }
12978     KEYWORD_PLUGIN_MUTEX_UNLOCK;
12979 }
12980
12981 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12982 static void
12983 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12984 {
12985     SAVEI32(PL_lex_brackets);
12986     if (PL_lex_brackets > 100)
12987         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12988     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12989     SAVEI32(PL_lex_allbrackets);
12990     PL_lex_allbrackets = 0;
12991     SAVEI8(PL_lex_fakeeof);
12992     PL_lex_fakeeof = (U8)fakeeof;
12993     if(yyparse(gramtype) && !PL_parser->error_count)
12994         qerror(Perl_mess(aTHX_ "Parse error"));
12995 }
12996
12997 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12998 static OP *
12999 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
13000 {
13001     OP *o;
13002     ENTER;
13003     SAVEVPTR(PL_eval_root);
13004     PL_eval_root = NULL;
13005     parse_recdescent(gramtype, fakeeof);
13006     o = PL_eval_root;
13007     LEAVE;
13008     return o;
13009 }
13010
13011 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
13012 static OP *
13013 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
13014 {
13015     OP *exprop;
13016     if (flags & ~PARSE_OPTIONAL)
13017         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
13018     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
13019     if (!exprop && !(flags & PARSE_OPTIONAL)) {
13020         if (!PL_parser->error_count)
13021             qerror(Perl_mess(aTHX_ "Parse error"));
13022         exprop = newOP(OP_NULL, 0);
13023     }
13024     return exprop;
13025 }
13026
13027 /*
13028 =for apidoc parse_arithexpr
13029
13030 Parse a Perl arithmetic expression.  This may contain operators of precedence
13031 down to the bit shift operators.  The expression must be followed (and thus
13032 terminated) either by a comparison or lower-precedence operator or by
13033 something that would normally terminate an expression such as semicolon.
13034 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13035 otherwise it is mandatory.  It is up to the caller to ensure that the
13036 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13037 the source of the code to be parsed and the lexical context for the
13038 expression.
13039
13040 The op tree representing the expression is returned.  If an optional
13041 expression is absent, a null pointer is returned, otherwise the pointer
13042 will be non-null.
13043
13044 If an error occurs in parsing or compilation, in most cases a valid op
13045 tree is returned anyway.  The error is reflected in the parser state,
13046 normally resulting in a single exception at the top level of parsing
13047 which covers all the compilation errors that occurred.  Some compilation
13048 errors, however, will throw an exception immediately.
13049
13050 =for apidoc Amnh||PARSE_OPTIONAL
13051
13052 =cut
13053
13054 */
13055
13056 OP *
13057 Perl_parse_arithexpr(pTHX_ U32 flags)
13058 {
13059     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
13060 }
13061
13062 /*
13063 =for apidoc parse_termexpr
13064
13065 Parse a Perl term expression.  This may contain operators of precedence
13066 down to the assignment operators.  The expression must be followed (and thus
13067 terminated) either by a comma or lower-precedence operator or by
13068 something that would normally terminate an expression such as semicolon.
13069 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13070 otherwise it is mandatory.  It is up to the caller to ensure that the
13071 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13072 the source of the code to be parsed and the lexical context for the
13073 expression.
13074
13075 The op tree representing the expression is returned.  If an optional
13076 expression is absent, a null pointer is returned, otherwise the pointer
13077 will be non-null.
13078
13079 If an error occurs in parsing or compilation, in most cases a valid op
13080 tree is returned anyway.  The error is reflected in the parser state,
13081 normally resulting in a single exception at the top level of parsing
13082 which covers all the compilation errors that occurred.  Some compilation
13083 errors, however, will throw an exception immediately.
13084
13085 =cut
13086 */
13087
13088 OP *
13089 Perl_parse_termexpr(pTHX_ U32 flags)
13090 {
13091     return parse_expr(LEX_FAKEEOF_COMMA, flags);
13092 }
13093
13094 /*
13095 =for apidoc parse_listexpr
13096
13097 Parse a Perl list expression.  This may contain operators of precedence
13098 down to the comma operator.  The expression must be followed (and thus
13099 terminated) either by a low-precedence logic operator such as C<or> or by
13100 something that would normally terminate an expression such as semicolon.
13101 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13102 otherwise it is mandatory.  It is up to the caller to ensure that the
13103 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13104 the source of the code to be parsed and the lexical context for the
13105 expression.
13106
13107 The op tree representing the expression is returned.  If an optional
13108 expression is absent, a null pointer is returned, otherwise the pointer
13109 will be non-null.
13110
13111 If an error occurs in parsing or compilation, in most cases a valid op
13112 tree is returned anyway.  The error is reflected in the parser state,
13113 normally resulting in a single exception at the top level of parsing
13114 which covers all the compilation errors that occurred.  Some compilation
13115 errors, however, will throw an exception immediately.
13116
13117 =cut
13118 */
13119
13120 OP *
13121 Perl_parse_listexpr(pTHX_ U32 flags)
13122 {
13123     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
13124 }
13125
13126 /*
13127 =for apidoc parse_fullexpr
13128
13129 Parse a single complete Perl expression.  This allows the full
13130 expression grammar, including the lowest-precedence operators such
13131 as C<or>.  The expression must be followed (and thus terminated) by a
13132 token that an expression would normally be terminated by: end-of-file,
13133 closing bracketing punctuation, semicolon, or one of the keywords that
13134 signals a postfix expression-statement modifier.  If C<flags> has the
13135 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
13136 mandatory.  It is up to the caller to ensure that the dynamic parser
13137 state (L</PL_parser> et al) is correctly set to reflect the source of
13138 the code to be parsed and the lexical context for the expression.
13139
13140 The op tree representing the expression is returned.  If an optional
13141 expression is absent, a null pointer is returned, otherwise the pointer
13142 will be non-null.
13143
13144 If an error occurs in parsing or compilation, in most cases a valid op
13145 tree is returned anyway.  The error is reflected in the parser state,
13146 normally resulting in a single exception at the top level of parsing
13147 which covers all the compilation errors that occurred.  Some compilation
13148 errors, however, will throw an exception immediately.
13149
13150 =cut
13151 */
13152
13153 OP *
13154 Perl_parse_fullexpr(pTHX_ U32 flags)
13155 {
13156     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
13157 }
13158
13159 /*
13160 =for apidoc parse_block
13161
13162 Parse a single complete Perl code block.  This consists of an opening
13163 brace, a sequence of statements, and a closing brace.  The block
13164 constitutes a lexical scope, so C<my> variables and various compile-time
13165 effects can be contained within it.  It is up to the caller to ensure
13166 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13167 reflect the source of the code to be parsed and the lexical context for
13168 the statement.
13169
13170 The op tree representing the code block is returned.  This is always a
13171 real op, never a null pointer.  It will normally be a C<lineseq> list,
13172 including C<nextstate> or equivalent ops.  No ops to construct any kind
13173 of runtime scope are included by virtue of it being a block.
13174
13175 If an error occurs in parsing or compilation, in most cases a valid op
13176 tree (most likely null) is returned anyway.  The error is reflected in
13177 the parser state, normally resulting in a single exception at the top
13178 level of parsing which covers all the compilation errors that occurred.
13179 Some compilation errors, however, will throw an exception immediately.
13180
13181 The C<flags> parameter is reserved for future use, and must always
13182 be zero.
13183
13184 =cut
13185 */
13186
13187 OP *
13188 Perl_parse_block(pTHX_ U32 flags)
13189 {
13190     if (flags)
13191         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
13192     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
13193 }
13194
13195 /*
13196 =for apidoc parse_barestmt
13197
13198 Parse a single unadorned Perl statement.  This may be a normal imperative
13199 statement or a declaration that has compile-time effect.  It does not
13200 include any label or other affixture.  It is up to the caller to ensure
13201 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13202 reflect the source of the code to be parsed and the lexical context for
13203 the statement.
13204
13205 The op tree representing the statement is returned.  This may be a
13206 null pointer if the statement is null, for example if it was actually
13207 a subroutine definition (which has compile-time side effects).  If not
13208 null, it will be ops directly implementing the statement, suitable to
13209 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
13210 equivalent op (except for those embedded in a scope contained entirely
13211 within the statement).
13212
13213 If an error occurs in parsing or compilation, in most cases a valid op
13214 tree (most likely null) is returned anyway.  The error is reflected in
13215 the parser state, normally resulting in a single exception at the top
13216 level of parsing which covers all the compilation errors that occurred.
13217 Some compilation errors, however, will throw an exception immediately.
13218
13219 The C<flags> parameter is reserved for future use, and must always
13220 be zero.
13221
13222 =cut
13223 */
13224
13225 OP *
13226 Perl_parse_barestmt(pTHX_ U32 flags)
13227 {
13228     if (flags)
13229         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
13230     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
13231 }
13232
13233 /*
13234 =for apidoc parse_label
13235
13236 Parse a single label, possibly optional, of the type that may prefix a
13237 Perl statement.  It is up to the caller to ensure that the dynamic parser
13238 state (L</PL_parser> et al) is correctly set to reflect the source of
13239 the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
13240 label is optional, otherwise it is mandatory.
13241
13242 The name of the label is returned in the form of a fresh scalar.  If an
13243 optional label is absent, a null pointer is returned.
13244
13245 If an error occurs in parsing, which can only occur if the label is
13246 mandatory, a valid label is returned anyway.  The error is reflected in
13247 the parser state, normally resulting in a single exception at the top
13248 level of parsing which covers all the compilation errors that occurred.
13249
13250 =cut
13251 */
13252
13253 SV *
13254 Perl_parse_label(pTHX_ U32 flags)
13255 {
13256     if (flags & ~PARSE_OPTIONAL)
13257         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
13258     if (PL_nexttoke) {
13259         PL_parser->yychar = yylex();
13260         if (PL_parser->yychar == LABEL) {
13261             SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
13262             PL_parser->yychar = YYEMPTY;
13263             cSVOPx(pl_yylval.opval)->op_sv = NULL;
13264             op_free(pl_yylval.opval);
13265             return labelsv;
13266         } else {
13267             yyunlex();
13268             goto no_label;
13269         }
13270     } else {
13271         char *s, *t;
13272         STRLEN wlen, bufptr_pos;
13273         lex_read_space(0);
13274         t = s = PL_bufptr;
13275         if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
13276             goto no_label;
13277         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
13278         if (word_takes_any_delimiter(s, wlen))
13279             goto no_label;
13280         bufptr_pos = s - SvPVX(PL_linestr);
13281         PL_bufptr = t;
13282         lex_read_space(LEX_KEEP_PREVIOUS);
13283         t = PL_bufptr;
13284         s = SvPVX(PL_linestr) + bufptr_pos;
13285         if (t[0] == ':' && t[1] != ':') {
13286             PL_oldoldbufptr = PL_oldbufptr;
13287             PL_oldbufptr = s;
13288             PL_bufptr = t+1;
13289             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
13290         } else {
13291             PL_bufptr = s;
13292             no_label:
13293             if (flags & PARSE_OPTIONAL) {
13294                 return NULL;
13295             } else {
13296                 qerror(Perl_mess(aTHX_ "Parse error"));
13297                 return newSVpvs("x");
13298             }
13299         }
13300     }
13301 }
13302
13303 /*
13304 =for apidoc parse_fullstmt
13305
13306 Parse a single complete Perl statement.  This may be a normal imperative
13307 statement or a declaration that has compile-time effect, and may include
13308 optional labels.  It is up to the caller to ensure that the dynamic
13309 parser state (L</PL_parser> et al) is correctly set to reflect the source
13310 of the code to be parsed and the lexical context for the statement.
13311
13312 The op tree representing the statement is returned.  This may be a
13313 null pointer if the statement is null, for example if it was actually
13314 a subroutine definition (which has compile-time side effects).  If not
13315 null, it will be the result of a L</newSTATEOP> call, normally including
13316 a C<nextstate> or equivalent op.
13317
13318 If an error occurs in parsing or compilation, in most cases a valid op
13319 tree (most likely null) is returned anyway.  The error is reflected in
13320 the parser state, normally resulting in a single exception at the top
13321 level of parsing which covers all the compilation errors that occurred.
13322 Some compilation errors, however, will throw an exception immediately.
13323
13324 The C<flags> parameter is reserved for future use, and must always
13325 be zero.
13326
13327 =cut
13328 */
13329
13330 OP *
13331 Perl_parse_fullstmt(pTHX_ U32 flags)
13332 {
13333     if (flags)
13334         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13335     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
13336 }
13337
13338 /*
13339 =for apidoc parse_stmtseq
13340
13341 Parse a sequence of zero or more Perl statements.  These may be normal
13342 imperative statements, including optional labels, or declarations
13343 that have compile-time effect, or any mixture thereof.  The statement
13344 sequence ends when a closing brace or end-of-file is encountered in a
13345 place where a new statement could have validly started.  It is up to
13346 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
13347 is correctly set to reflect the source of the code to be parsed and the
13348 lexical context for the statements.
13349
13350 The op tree representing the statement sequence is returned.  This may
13351 be a null pointer if the statements were all null, for example if there
13352 were no statements or if there were only subroutine definitions (which
13353 have compile-time side effects).  If not null, it will be a C<lineseq>
13354 list, normally including C<nextstate> or equivalent ops.
13355
13356 If an error occurs in parsing or compilation, in most cases a valid op
13357 tree is returned anyway.  The error is reflected in the parser state,
13358 normally resulting in a single exception at the top level of parsing
13359 which covers all the compilation errors that occurred.  Some compilation
13360 errors, however, will throw an exception immediately.
13361
13362 The C<flags> parameter is reserved for future use, and must always
13363 be zero.
13364
13365 =cut
13366 */
13367
13368 OP *
13369 Perl_parse_stmtseq(pTHX_ U32 flags)
13370 {
13371     OP *stmtseqop;
13372     I32 c;
13373     if (flags)
13374         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
13375     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
13376     c = lex_peek_unichar(0);
13377     if (c != -1 && c != /*{*/'}')
13378         qerror(Perl_mess(aTHX_ "Parse error"));
13379     return stmtseqop;
13380 }
13381
13382 /*
13383 =for apidoc parse_subsignature
13384
13385 Parse a subroutine signature declaration. This is the contents of the
13386 parentheses following a named or anonymous subroutine declaration when the
13387 C<signatures> feature is enabled. Note that this function neither expects
13388 nor consumes the opening and closing parentheses around the signature; it
13389 is the caller's job to handle these.
13390
13391 This function must only be called during parsing of a subroutine; after
13392 L</start_subparse> has been called. It might allocate lexical variables on
13393 the pad for the current subroutine.
13394
13395 The op tree to unpack the arguments from the stack at runtime is returned.
13396 This op tree should appear at the beginning of the compiled function. The
13397 caller may wish to use L</op_append_list> to build their function body
13398 after it, or splice it together with the body before calling L</newATTRSUB>.
13399
13400 The C<flags> parameter is reserved for future use, and must always
13401 be zero.
13402
13403 =cut
13404 */
13405
13406 OP *
13407 Perl_parse_subsignature(pTHX_ U32 flags)
13408 {
13409     if (flags)
13410         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
13411     return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
13412 }
13413
13414 /*
13415  * ex: set ts=8 sts=4 sw=4 et:
13416  */