toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26 This is the lower layer of the Perl parser, managing characters and tokens.
  27
  28 =for apidoc AmnU|yy_parser *|PL_parser
  29
  30 Pointer to a structure encapsulating the state of the parsing operation
  31 currently in progress.  The pointer can be locally changed to perform
  32 a nested parse without interfering with the state of an outer parse.
  33 Individual members of C<PL_parser> have their own documentation.
  34
  35 =cut
  36 */
  37
  38 #include "EXTERN.h"
  39 #define PERL_IN_TOKE_C
  40 #include "perl.h"
  41 #include "invlist_inline.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g, h)  \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* XXX temporary backwards compatibility */
  49 #define PL_lex_brackets         (PL_parser->lex_brackets)
  50 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  51 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  52 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  53 #define PL_lex_casemods         (PL_parser->lex_casemods)
  54 #define PL_lex_casestack        (PL_parser->lex_casestack)
  55 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  56 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  57 #define PL_lex_inpat            (PL_parser->lex_inpat)
  58 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  59 #define PL_lex_op               (PL_parser->lex_op)
  60 #define PL_lex_repl             (PL_parser->lex_repl)
  61 #define PL_lex_starts           (PL_parser->lex_starts)
  62 #define PL_lex_stuff            (PL_parser->lex_stuff)
  63 #define PL_multi_start          (PL_parser->multi_start)
  64 #define PL_multi_open           (PL_parser->multi_open)
  65 #define PL_multi_close          (PL_parser->multi_close)
  66 #define PL_preambled            (PL_parser->preambled)
  67 #define PL_linestr              (PL_parser->linestr)
  68 #define PL_expect               (PL_parser->expect)
  69 #define PL_copline              (PL_parser->copline)
  70 #define PL_bufptr               (PL_parser->bufptr)
  71 #define PL_oldbufptr            (PL_parser->oldbufptr)
  72 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  73 #define PL_linestart            (PL_parser->linestart)
  74 #define PL_bufend               (PL_parser->bufend)
  75 #define PL_last_uni             (PL_parser->last_uni)
  76 #define PL_last_lop             (PL_parser->last_lop)
  77 #define PL_last_lop_op          (PL_parser->last_lop_op)
  78 #define PL_lex_state            (PL_parser->lex_state)
  79 #define PL_rsfp                 (PL_parser->rsfp)
  80 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  81 #define PL_in_my                (PL_parser->in_my)
  82 #define PL_in_my_stash          (PL_parser->in_my_stash)
  83 #define PL_tokenbuf             (PL_parser->tokenbuf)
  84 #define PL_multi_end            (PL_parser->multi_end)
  85 #define PL_error_count          (PL_parser->error_count)
  86
  87 #  define PL_nexttoke           (PL_parser->nexttoke)
  88 #  define PL_nexttype           (PL_parser->nexttype)
  89 #  define PL_nextval            (PL_parser->nextval)
  90
  91
  92 #define SvEVALED(sv) \
  93     (SvTYPE(sv) >= SVt_PVNV \
  94     && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
  95
  96 static const char ident_too_long[] = "Identifier too long";
  97 static const char ident_var_zero_multi_digit[] = "Numeric variables with more than one digit may not start with '0'";
  98
  99 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 100
 101 #define XENUMMASK  0x3f
 102 #define XFAKEEOF   0x40
 103 #define XFAKEBRACK 0x80
 104
 105 #ifdef USE_UTF8_SCRIPTS
 106 #   define UTF cBOOL(!IN_BYTES)
 107 #else
 108 #   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 109 #endif
 110
 111 /* The maximum number of characters preceding the unrecognized one to display */
 112 #define UNRECOGNIZED_PRECEDE_COUNT 10
 113
 114 /* In variables named $^X, these are the legal values for X.
 115  * 1999-02-27 mjd-perl-patch@plover.com */
 116 #define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x)))
 117
 118 #define SPACE_OR_TAB(c) isBLANK_A(c)
 119
 120 #define HEXFP_PEEK(s)     \
 121     (((s[0] == '.') && \
 122       (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
 123      isALPHA_FOLD_EQ(s[0], 'p'))
 124
 125 /* LEX_* are values for PL_lex_state, the state of the lexer.
 126  * They are arranged oddly so that the guard on the switch statement
 127  * can get by with a single comparison (if the compiler is smart enough).
 128  *
 129  * These values refer to the various states within a sublex parse,
 130  * i.e. within a double quotish string
 131  */
 132
 133 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 134
 135 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 136 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 137 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 138 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 139 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 140
 141                                    /* at end of code, eg "$x" followed by:  */
 142 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 143 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 144
 145 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 146                                         string or after \E, $foo, etc       */
 147 #define LEX_INTERPCONST          2 /* NOT USED */
 148 #define LEX_FORMLINE             1 /* expecting a format line               */
 149
 150 /* returned to yyl_try() to request it to retry the parse loop, expected to only
 151    be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof()
 152    can also return it.
 153
 154    yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1,
 155    other token values are 258 or higher (see perly.h), so -1 should be
 156    a safe value here.
 157 */
 158 #define YYL_RETRY (-1)
 159
 160 #ifdef DEBUGGING
 161 static const char* const lex_state_names[] = {
 162     "KNOWNEXT",
 163     "FORMLINE",
 164     "INTERPCONST",
 165     "INTERPCONCAT",
 166     "INTERPENDMAYBE",
 167     "INTERPEND",
 168     "INTERPSTART",
 169     "INTERPPUSH",
 170     "INTERPCASEMOD",
 171     "INTERPNORMAL",
 172     "NORMAL"
 173 };
 174 #endif
 175
 176 #include "keywords.h"
 177
 178 /* CLINE is a macro that ensures PL_copline has a sane value */
 179
 180 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 181
 182 /*
 183  * Convenience functions to return different tokens and prime the
 184  * lexer for the next token.  They all take an argument.
 185  *
 186  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 187  * OPERATOR     : generic operator
 188  * AOPERATOR    : assignment operator
 189  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 190  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 191  * PREREF       : *EXPR where EXPR is not a simple identifier
 192  * TERM         : expression term
 193  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 194  * LOOPX        : loop exiting command (goto, last, dump, etc)
 195  * FTST         : file test operator
 196  * FUN0         : zero-argument function
 197  * FUN0OP       : zero-argument function, with its op created in this file
 198  * FUN1         : not used, except for not, which isn't a UNIOP
 199  * BOop         : bitwise or or xor
 200  * BAop         : bitwise and
 201  * BCop         : bitwise complement
 202  * SHop         : shift operator
 203  * PWop         : power operator
 204  * PMop         : pattern-matching operator
 205  * Aop          : addition-level operator
 206  * AopNOASSIGN  : addition-level operator that is never part of .=
 207  * Mop          : multiplication-level operator
 208  * ChEop        : chaining equality-testing operator
 209  * NCEop        : non-chaining comparison operator at equality precedence
 210  * ChRop        : chaining relational operator <= != gt
 211  * NCRop        : non-chaining relational operator isa
 212  *
 213  * Also see LOP and lop() below.
 214  */
 215
 216 #ifdef DEBUGGING /* Serve -DT. */
 217 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 218 #else
 219 #   define REPORT(retval) (retval)
 220 #endif
 221
 222 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 223 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 224 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
 225 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 226 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 227 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 228 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 229 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 230 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
 231                          pl_yylval.ival=f, \
 232                          PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
 233                          REPORT((int)LOOPEX))
 234 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 235 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 236 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 237 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 238 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
 239 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
 240 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
 241                        REPORT(PERLY_TILDE)
 242 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
 243 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
 244 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 245 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
 246 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
 247 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
 248 #define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP))
 249 #define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP))
 250 #define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP))
 251 #define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP))
 252
 253 /* This bit of chicanery makes a unary function followed by
 254  * a parenthesis into a function with one argument, highest precedence.
 255  * The UNIDOR macro is for unary functions that can be followed by the //
 256  * operator (such as C<shift // 0>).
 257  */
 258 #define UNI3(f,x,have_x) { \
 259         pl_yylval.ival = f; \
 260         if (have_x) PL_expect = x; \
 261         PL_bufptr = s; \
 262         PL_last_uni = PL_oldbufptr; \
 263         PL_last_lop_op = (f) < 0 ? -(f) : (f); \
 264         if (*s == '(') \
 265             return REPORT( (int)FUNC1 ); \
 266         s = skipspace(s); \
 267         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 268         }
 269 #define UNI(f)    UNI3(f,XTERM,1)
 270 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 271 #define UNIPROTO(f,optional) { \
 272         if (optional) PL_last_uni = PL_oldbufptr; \
 273         OPERATOR(f); \
 274         }
 275
 276 #define UNIBRACK(f) UNI3(f,0,0)
 277
 278 /* return has special case parsing.
 279  *
 280  * List operators have low precedence. Functions have high precedence.
 281  * Every built in, *except return*, if written with () around its arguments, is
 282  * parsed as a function. Hence every other list built in:
 283  *
 284  * $ perl -lwe 'sub foo { join 2,4,6 * 1.5 } print for foo()' # join 2,4,9
 285  * 429
 286  * $ perl -lwe 'sub foo { join(2,4,6) * 1.5 } print for foo()' # 426 * 1.5
 287  * 639
 288  * $ perl -lwe 'sub foo { join+(2,4,6) * 1.5 } print for foo()'
 289  * Useless use of a constant (2) in void context at -e line 1.
 290  * Useless use of a constant (4) in void context at -e line 1.
 291  *
 292  * $
 293  *
 294  * empty line output because C<(2, 4, 6) * 1.5> is the comma operator, not a
 295  * list. * forces scalar context, 6 * 1.5 is 9, and join(9) is the empty string.
 296  *
 297  * Whereas return:
 298  *
 299  * $ perl -lwe 'sub foo { return 2,4,6 * 1.5 } print for foo()'
 300  * 2
 301  * 4
 302  * 9
 303  * $ perl -lwe 'sub foo { return(2,4,6) * 1.5 } print for foo()'
 304  * Useless use of a constant (2) in void context at -e line 1.
 305  * Useless use of a constant (4) in void context at -e line 1.
 306  * 9
 307  * $ perl -lwe 'sub foo { return+(2,4,6) * 1.5 } print for foo()'
 308  * Useless use of a constant (2) in void context at -e line 1.
 309  * Useless use of a constant (4) in void context at -e line 1.
 310  * 9
 311  * $
 312  *
 313  * and:
 314  * $ perl -lwe 'sub foo { return(2,4,6) } print for foo()'
 315  * 2
 316  * 4
 317  * 6
 318  *
 319  * This last example is what we expect, but it's clearly inconsistent with how
 320  * C<return(2,4,6) * 1.5> *ought* to behave, if the rules were consistently
 321  * followed.
 322  *
 323  *
 324  * Perl 3 attempted to be consistent:
 325  *
 326  *   The rules are more consistent about where parens are needed and
 327  *   where they are not.  In particular, unary operators and list operators now
 328  *   behave like functions if they're called like functions.
 329  *
 330  * However, the behaviour for return was reverted to the "old" parsing with
 331  * patches 9-12:
 332  *
 333  *   The construct
 334  *   return (1,2,3);
 335  *   did not do what was expected, since return was swallowing the
 336  *   parens in order to consider itself a function.  The solution,
 337  *   since return never wants any trailing expression such as
 338  *   return (1,2,3) + 2;
 339  *   is to simply make return an exception to the paren-makes-a-function
 340  *   rule, and treat it the way it always was, so that it doesn't
 341  *   strip the parens.
 342  *
 343  * To demonstrate the special-case parsing, replace OLDLOP(OP_RETURN); with
 344  * LOP(OP_RETURN, XTERM);
 345  *
 346  * and constructs such as
 347  *
 348  *     return (Internals::V())[2]
 349  *
 350  * turn into syntax errors
 351  */
 352
 353 #define OLDLOP(f) \
 354         do { \
 355             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 356                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 357             pl_yylval.ival = (f); \
 358             PL_expect = XTERM; \
 359             PL_bufptr = s; \
 360             return (int)LSTOP; \
 361         } while(0)
 362
 363 #define COPLINE_INC_WITH_HERELINES                  \
 364     STMT_START {                                     \
 365         CopLINE_inc(PL_curcop);                       \
 366         if (PL_parser->herelines)                      \
 367             CopLINE(PL_curcop) += PL_parser->herelines, \
 368             PL_parser->herelines = 0;                    \
 369     } STMT_END
 370 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 371  * is no sublex_push to follow. */
 372 #define COPLINE_SET_FROM_MULTI_END            \
 373     STMT_START {                               \
 374         CopLINE_set(PL_curcop, PL_multi_end);   \
 375         if (PL_multi_end != PL_multi_start)      \
 376             PL_parser->herelines = 0;             \
 377     } STMT_END
 378
 379
 380 /* A file-local structure for passing around information about subroutines and
 381  * related definable words */
 382 struct code {
 383     SV *sv;
 384     CV *cv;
 385     GV *gv, **gvp;
 386     OP *rv2cv_op;
 387     PADOFFSET off;
 388     bool lex;
 389 };
 390
 391 static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE };
 392
 393 #ifdef DEBUGGING
 394
 395 /* how to interpret the pl_yylval associated with the token */
 396 enum token_type {
 397     TOKENTYPE_NONE,
 398     TOKENTYPE_IVAL,
 399     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 400     TOKENTYPE_PVAL,
 401     TOKENTYPE_OPVAL
 402 };
 403
 404 #define DEBUG_TOKEN(Type, Name)                                         \
 405     { Name, TOKENTYPE_##Type, #Name }
 406
 407 static struct debug_tokens {
 408     const int token;
 409     enum token_type type;
 410     const char *name;
 411 } const debug_tokens[] =
 412 {
 413     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 414     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 415     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 416     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 417     { ANON_SIGSUB,      TOKENTYPE_IVAL,         "ANON_SIGSUB" },
 418     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 419     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 420     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 421     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 422     { CATCH,            TOKENTYPE_IVAL,         "CATCH" },
 423     { CHEQOP,           TOKENTYPE_OPNUM,        "CHEQOP" },
 424     { CHRELOP,          TOKENTYPE_OPNUM,        "CHRELOP" },
 425     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 426     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 427     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 428     { DO,               TOKENTYPE_NONE,         "DO" },
 429     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 430     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 431     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 432     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 433     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 434     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 435     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 436     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 437     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 438     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 439     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 440     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 441     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 442     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 443     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 444     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 445     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 446     { IF,               TOKENTYPE_IVAL,         "IF" },
 447     { LABEL,            TOKENTYPE_OPVAL,        "LABEL" },
 448     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 449     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 450     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 451     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 452     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 453     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 454     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 455     { MY,               TOKENTYPE_IVAL,         "MY" },
 456     { NCEQOP,           TOKENTYPE_OPNUM,        "NCEQOP" },
 457     { NCRELOP,          TOKENTYPE_OPNUM,        "NCRELOP" },
 458     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 459     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 460     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 461     { OROR,             TOKENTYPE_NONE,         "OROR" },
 462     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 463     DEBUG_TOKEN (IVAL, PERLY_AMPERSAND),
 464     DEBUG_TOKEN (IVAL, PERLY_BRACE_CLOSE),
 465     DEBUG_TOKEN (IVAL, PERLY_BRACE_OPEN),
 466     DEBUG_TOKEN (IVAL, PERLY_BRACKET_CLOSE),
 467     DEBUG_TOKEN (IVAL, PERLY_BRACKET_OPEN),
 468     DEBUG_TOKEN (IVAL, PERLY_COLON),
 469     DEBUG_TOKEN (IVAL, PERLY_COMMA),
 470     DEBUG_TOKEN (IVAL, PERLY_DOT),
 471     DEBUG_TOKEN (IVAL, PERLY_EQUAL_SIGN),
 472     DEBUG_TOKEN (IVAL, PERLY_EXCLAMATION_MARK),
 473     DEBUG_TOKEN (IVAL, PERLY_MINUS),
 474     DEBUG_TOKEN (IVAL, PERLY_PAREN_OPEN),
 475     DEBUG_TOKEN (IVAL, PERLY_PERCENT_SIGN),
 476     DEBUG_TOKEN (IVAL, PERLY_PLUS),
 477     DEBUG_TOKEN (IVAL, PERLY_QUESTION_MARK),
 478     DEBUG_TOKEN (IVAL, PERLY_SEMICOLON),
 479     DEBUG_TOKEN (IVAL, PERLY_SLASH),
 480     DEBUG_TOKEN (IVAL, PERLY_SNAIL),
 481     DEBUG_TOKEN (IVAL, PERLY_STAR),
 482     DEBUG_TOKEN (IVAL, PERLY_TILDE),
 483     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 484     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 485     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 486     { POSTJOIN,         TOKENTYPE_NONE,         "POSTJOIN" },
 487     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 488     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 489     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 490     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 491     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 492     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 493     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 494     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 495     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 496     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 497     { SIGSUB,           TOKENTYPE_NONE,         "SIGSUB" },
 498     { SUB,              TOKENTYPE_NONE,         "SUB" },
 499     { SUBLEXEND,        TOKENTYPE_NONE,         "SUBLEXEND" },
 500     { SUBLEXSTART,      TOKENTYPE_NONE,         "SUBLEXSTART" },
 501     { THING,            TOKENTYPE_OPVAL,        "THING" },
 502     { TRY,              TOKENTYPE_IVAL,         "TRY" },
 503     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 504     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 505     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 506     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 507     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 508     { USE,              TOKENTYPE_IVAL,         "USE" },
 509     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 510     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 511     { BAREWORD,         TOKENTYPE_OPVAL,        "BAREWORD" },
 512     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 513     { 0,                TOKENTYPE_NONE,         NULL }
 514 };
 515
 516 #undef DEBUG_TOKEN
 517
 518 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 519
 520 STATIC int
 521 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 522 {
 523     PERL_ARGS_ASSERT_TOKEREPORT;
 524
 525     if (DEBUG_T_TEST) {
 526         const char *name = NULL;
 527         enum token_type type = TOKENTYPE_NONE;
 528         const struct debug_tokens *p;
 529         SV* const report = newSVpvs("<== ");
 530
 531         for (p = debug_tokens; p->token; p++) {
 532             if (p->token == (int)rv) {
 533                 name = p->name;
 534                 type = p->type;
 535                 break;
 536             }
 537         }
 538         if (name)
 539             Perl_sv_catpv(aTHX_ report, name);
 540         else if (isGRAPH(rv))
 541         {
 542             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 543             if ((char)rv == 'p')
 544                 sv_catpvs(report, " (pending identifier)");
 545         }
 546         else if (!rv)
 547             sv_catpvs(report, "EOF");
 548         else
 549             Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
 550         switch (type) {
 551         case TOKENTYPE_NONE:
 552             break;
 553         case TOKENTYPE_IVAL:
 554             Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
 555             break;
 556         case TOKENTYPE_OPNUM:
 557             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 558                                     PL_op_name[lvalp->ival]);
 559             break;
 560         case TOKENTYPE_PVAL:
 561             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 562             break;
 563         case TOKENTYPE_OPVAL:
 564             if (lvalp->opval) {
 565                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 566                                     PL_op_name[lvalp->opval->op_type]);
 567                 if (lvalp->opval->op_type == OP_CONST) {
 568                     Perl_sv_catpvf(aTHX_ report, " %s",
 569                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 570                 }
 571
 572             }
 573             else
 574                 sv_catpvs(report, "(opval=null)");
 575             break;
 576         }
 577         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 578     };
 579     return (int)rv;
 580 }
 581
 582
 583 /* print the buffer with suitable escapes */
 584
 585 STATIC void
 586 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 587 {
 588     SV* const tmp = newSVpvs("");
 589
 590     PERL_ARGS_ASSERT_PRINTBUF;
 591
 592     GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
 593     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 594     GCC_DIAG_RESTORE_STMT;
 595     SvREFCNT_dec(tmp);
 596 }
 597
 598 #endif
 599
 600 /*
 601  * S_ao
 602  *
 603  * This subroutine looks for an '=' next to the operator that has just been
 604  * parsed and turns it into an ASSIGNOP if it finds one.
 605  */
 606
 607 STATIC int
 608 S_ao(pTHX_ int toketype)
 609 {
 610     if (*PL_bufptr == '=') {
 611         PL_bufptr++;
 612
 613         switch (toketype) {
 614             case ANDAND: pl_yylval.ival = OP_ANDASSIGN; break;
 615             case OROR:   pl_yylval.ival = OP_ORASSIGN;  break;
 616             case DORDOR: pl_yylval.ival = OP_DORASSIGN; break;
 617         }
 618
 619         toketype = ASSIGNOP;
 620     }
 621     return REPORT(toketype);
 622 }
 623
 624 /*
 625  * S_no_op
 626  * When Perl expects an operator and finds something else, no_op
 627  * prints the warning.  It always prints "<something> found where
 628  * operator expected.  It prints "Missing semicolon on previous line?"
 629  * if the surprise occurs at the start of the line.  "do you need to
 630  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 631  * where the compiler doesn't know if foo is a method call or a function.
 632  * It prints "Missing operator before end of line" if there's nothing
 633  * after the missing operator, or "... before <...>" if there is something
 634  * after the missing operator.
 635  *
 636  * PL_bufptr is expected to point to the start of the thing that was found,
 637  * and s after the next token or partial token.
 638  */
 639
 640 STATIC void
 641 S_no_op(pTHX_ const char *const what, char *s)
 642 {
 643     char * const oldbp = PL_bufptr;
 644     const bool is_first = (PL_oldbufptr == PL_linestart);
 645
 646     PERL_ARGS_ASSERT_NO_OP;
 647
 648     if (!s)
 649         s = oldbp;
 650     else
 651         PL_bufptr = s;
 652     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 653     if (ckWARN_d(WARN_SYNTAX)) {
 654         if (is_first)
 655             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 656                     "\t(Missing semicolon on previous line?)\n");
 657         else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
 658                                                            PL_bufend,
 659                                                            UTF))
 660         {
 661             const char *t;
 662             for (t = PL_oldoldbufptr;
 663                  (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
 664                  t += UTF ? UTF8SKIP(t) : 1)
 665             {
 666                 NOOP;
 667             }
 668             if (t < PL_bufptr && isSPACE(*t))
 669                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 670                         "\t(Do you need to predeclare %" UTF8f "?)\n",
 671                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 672         }
 673         else {
 674             assert(s >= oldbp);
 675             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 676                     "\t(Missing operator before %" UTF8f "?)\n",
 677                      UTF8fARG(UTF, s - oldbp, oldbp));
 678         }
 679     }
 680     PL_bufptr = oldbp;
 681 }
 682
 683 /*
 684  * S_missingterm
 685  * Complain about missing quote/regexp/heredoc terminator.
 686  * If it's called with NULL then it cauterizes the line buffer.
 687  * If we're in a delimited string and the delimiter is a control
 688  * character, it's reformatted into a two-char sequence like ^C.
 689  * This is fatal.
 690  */
 691
 692 STATIC void
 693 S_missingterm(pTHX_ char *s, STRLEN len)
 694 {
 695     char tmpbuf[UTF8_MAXBYTES + 1];
 696     char q;
 697     bool uni = FALSE;
 698     SV *sv;
 699     if (s) {
 700         char * const nl = (char *) my_memrchr(s, '\n', len);
 701         if (nl) {
 702             *nl = '\0';
 703             len = nl - s;
 704         }
 705         uni = UTF;
 706     }
 707     else if (PL_multi_close < 32) {
 708         *tmpbuf = '^';
 709         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 710         tmpbuf[2] = '\0';
 711         s = tmpbuf;
 712         len = 2;
 713     }
 714     else {
 715         if (LIKELY(PL_multi_close < 256)) {
 716             *tmpbuf = (char)PL_multi_close;
 717             tmpbuf[1] = '\0';
 718             len = 1;
 719         }
 720         else {
 721             char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
 722             *end = '\0';
 723             len = end - tmpbuf;
 724             uni = TRUE;
 725         }
 726         s = tmpbuf;
 727     }
 728     q = memchr(s, '"', len) ? '\'' : '"';
 729     sv = newSVpvn_flags(s, len, SVs_TEMP);
 730     if (uni)
 731         SvUTF8_on(sv);
 732     Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c"
 733                      " anywhere before EOF", q, SVfARG(sv), q);
 734 }
 735
 736 #include "feature.h"
 737
 738 /*
 739  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 740  * utf16-to-utf8-reversed.
 741  */
 742
 743 #ifdef PERL_CR_FILTER
 744 static void
 745 strip_return(SV *sv)
 746 {
 747     const char *s = SvPVX_const(sv);
 748     const char * const e = s + SvCUR(sv);
 749
 750     PERL_ARGS_ASSERT_STRIP_RETURN;
 751
 752     /* outer loop optimized to do nothing if there are no CR-LFs */
 753     while (s < e) {
 754         if (*s++ == '\r' && *s == '\n') {
 755             /* hit a CR-LF, need to copy the rest */
 756             char *d = s - 1;
 757             *d++ = *s++;
 758             while (s < e) {
 759                 if (*s == '\r' && s[1] == '\n')
 760                     s++;
 761                 *d++ = *s++;
 762             }
 763             SvCUR(sv) -= s - d;
 764             return;
 765         }
 766     }
 767 }
 768
 769 STATIC I32
 770 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 771 {
 772     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 773     if (count > 0 && !maxlen)
 774         strip_return(sv);
 775     return count;
 776 }
 777 #endif
 778
 779 /*
 780 =for apidoc lex_start
 781
 782 Creates and initialises a new lexer/parser state object, supplying
 783 a context in which to lex and parse from a new source of Perl code.
 784 A pointer to the new state object is placed in L</PL_parser>.  An entry
 785 is made on the save stack so that upon unwinding, the new state object
 786 will be destroyed and the former value of L</PL_parser> will be restored.
 787 Nothing else need be done to clean up the parsing context.
 788
 789 The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
 790 non-null, provides a string (in SV form) containing code to be parsed.
 791 A copy of the string is made, so subsequent modification of C<line>
 792 does not affect parsing.  C<rsfp>, if non-null, provides an input stream
 793 from which code will be read to be parsed.  If both are non-null, the
 794 code in C<line> comes first and must consist of complete lines of input,
 795 and C<rsfp> supplies the remainder of the source.
 796
 797 The C<flags> parameter is reserved for future use.  Currently it is only
 798 used by perl internally, so extensions should always pass zero.
 799
 800 =cut
 801 */
 802
 803 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 804    can share filters with the current parser.
 805    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 806    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 807    destruction. This is used to handle the case of defaulting to reading the
 808    script from the standard input because no filename was given on the command
 809    line (without getting confused by situation where STDIN has been closed, so
 810    the script handle is opened on fd 0)  */
 811
 812 void
 813 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 814 {
 815     const char *s = NULL;
 816     yy_parser *parser, *oparser;
 817
 818     if (flags && flags & ~LEX_START_FLAGS)
 819         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 820
 821     /* create and initialise a parser */
 822
 823     Newxz(parser, 1, yy_parser);
 824     parser->old_parser = oparser = PL_parser;
 825     PL_parser = parser;
 826
 827     parser->stack = NULL;
 828     parser->stack_max1 = NULL;
 829     parser->ps = NULL;
 830
 831     /* on scope exit, free this parser and restore any outer one */
 832     SAVEPARSER(parser);
 833     parser->saved_curcop = PL_curcop;
 834
 835     /* initialise lexer state */
 836
 837     parser->nexttoke = 0;
 838     parser->error_count = oparser ? oparser->error_count : 0;
 839     parser->copline = parser->preambling = NOLINE;
 840     parser->lex_state = LEX_NORMAL;
 841     parser->expect = XSTATE;
 842     parser->rsfp = rsfp;
 843     parser->recheck_utf8_validity = TRUE;
 844     parser->rsfp_filters =
 845       !(flags & LEX_START_SAME_FILTER) || !oparser
 846         ? NULL
 847         : MUTABLE_AV(SvREFCNT_inc(
 848             oparser->rsfp_filters
 849              ? oparser->rsfp_filters
 850              : (oparser->rsfp_filters = newAV())
 851           ));
 852
 853     Newx(parser->lex_brackstack, 120, char);
 854     Newx(parser->lex_casestack, 12, char);
 855     *parser->lex_casestack = '\0';
 856     Newxz(parser->lex_shared, 1, LEXSHARED);
 857
 858     if (line) {
 859         STRLEN len;
 860         const U8* first_bad_char_loc;
 861
 862         s = SvPV_const(line, len);
 863
 864         if (   SvUTF8(line)
 865             && UNLIKELY(! is_utf8_string_loc((U8 *) s,
 866                                              SvCUR(line),
 867                                              &first_bad_char_loc)))
 868         {
 869             _force_out_malformed_utf8_message(first_bad_char_loc,
 870                                               (U8 *) s + SvCUR(line),
 871                                               0,
 872                                               1 /* 1 means die */ );
 873             NOT_REACHED; /* NOTREACHED */
 874         }
 875
 876         parser->linestr = flags & LEX_START_COPIED
 877                             ? SvREFCNT_inc_simple_NN(line)
 878                             : newSVpvn_flags(s, len, SvUTF8(line));
 879         if (!rsfp)
 880             sv_catpvs(parser->linestr, "\n;");
 881     } else {
 882         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 883     }
 884
 885     parser->oldoldbufptr =
 886         parser->oldbufptr =
 887         parser->bufptr =
 888         parser->linestart = SvPVX(parser->linestr);
 889     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 890     parser->last_lop = parser->last_uni = NULL;
 891
 892     STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 893                                                         |LEX_DONT_CLOSE_RSFP));
 894     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 895                                                         |LEX_DONT_CLOSE_RSFP));
 896
 897     parser->in_pod = parser->filtered = 0;
 898 }
 899
 900
 901 /* delete a parser object */
 902
 903 void
 904 Perl_parser_free(pTHX_  const yy_parser *parser)
 905 {
 906     PERL_ARGS_ASSERT_PARSER_FREE;
 907
 908     PL_curcop = parser->saved_curcop;
 909     SvREFCNT_dec(parser->linestr);
 910
 911     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 912         PerlIO_clearerr(parser->rsfp);
 913     else if (parser->rsfp && (!parser->old_parser
 914           || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 915         PerlIO_close(parser->rsfp);
 916     SvREFCNT_dec(parser->rsfp_filters);
 917     SvREFCNT_dec(parser->lex_stuff);
 918     SvREFCNT_dec(parser->lex_sub_repl);
 919
 920     Safefree(parser->lex_brackstack);
 921     Safefree(parser->lex_casestack);
 922     Safefree(parser->lex_shared);
 923     PL_parser = parser->old_parser;
 924     Safefree(parser);
 925 }
 926
 927 void
 928 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 929 {
 930     I32 nexttoke = parser->nexttoke;
 931     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 932     while (nexttoke--) {
 933         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 934          && parser->nextval[nexttoke].opval
 935          && parser->nextval[nexttoke].opval->op_slabbed
 936          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 937             op_free(parser->nextval[nexttoke].opval);
 938             parser->nextval[nexttoke].opval = NULL;
 939         }
 940     }
 941 }
 942
 943
 944 /*
 945 =for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
 946
 947 Buffer scalar containing the chunk currently under consideration of the
 948 text currently being lexed.  This is always a plain string scalar (for
 949 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 950 normal scalar means; instead refer to the buffer directly by the pointer
 951 variables described below.
 952
 953 The lexer maintains various C<char*> pointers to things in the
 954 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 955 reallocated, all of these pointers must be updated.  Don't attempt to
 956 do this manually, but rather use L</lex_grow_linestr> if you need to
 957 reallocate the buffer.
 958
 959 The content of the text chunk in the buffer is commonly exactly one
 960 complete line of input, up to and including a newline terminator,
 961 but there are situations where it is otherwise.  The octets of the
 962 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 963 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 964 flag on this scalar, which may disagree with it.
 965
 966 For direct examination of the buffer, the variable
 967 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 968 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 969 of these pointers is usually preferable to examination of the scalar
 970 through normal scalar means.
 971
 972 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
 973
 974 Direct pointer to the end of the chunk of text currently being lexed, the
 975 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 976 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
 977 always located at the end of the buffer, and does not count as part of
 978 the buffer's contents.
 979
 980 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
 981
 982 Points to the current position of lexing inside the lexer buffer.
 983 Characters around this point may be freely examined, within
 984 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 985 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 986 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 987
 988 Lexing code (whether in the Perl core or not) moves this pointer past
 989 the characters that it consumes.  It is also expected to perform some
 990 bookkeeping whenever a newline character is consumed.  This movement
 991 can be more conveniently performed by the function L</lex_read_to>,
 992 which handles newlines appropriately.
 993
 994 Interpretation of the buffer's octets can be abstracted out by
 995 using the slightly higher-level functions L</lex_peek_unichar> and
 996 L</lex_read_unichar>.
 997
 998 =for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
 999
1000 Points to the start of the current line inside the lexer buffer.
1001 This is useful for indicating at which column an error occurred, and
1002 not much else.  This must be updated by any lexing code that consumes
1003 a newline; the function L</lex_read_to> handles this detail.
1004
1005 =cut
1006 */
1007
1008 /*
1009 =for apidoc lex_bufutf8
1010
1011 Indicates whether the octets in the lexer buffer
1012 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
1013 of Unicode characters.  If not, they should be interpreted as Latin-1
1014 characters.  This is analogous to the C<SvUTF8> flag for scalars.
1015
1016 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
1017 contains valid UTF-8.  Lexing code must be robust in the face of invalid
1018 encoding.
1019
1020 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
1021 is significant, but not the whole story regarding the input character
1022 encoding.  Normally, when a file is being read, the scalar contains octets
1023 and its C<SvUTF8> flag is off, but the octets should be interpreted as
1024 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
1025 however, the scalar may have the C<SvUTF8> flag on, and in this case its
1026 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
1027 is in effect.  This logic may change in the future; use this function
1028 instead of implementing the logic yourself.
1029
1030 =cut
1031 */
1032
1033 bool
1034 Perl_lex_bufutf8(pTHX)
1035 {
1036     return UTF;
1037 }
1038
1039 /*
1040 =for apidoc lex_grow_linestr
1041
1042 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
1043 at least C<len> octets (including terminating C<NUL>).  Returns a
1044 pointer to the reallocated buffer.  This is necessary before making
1045 any direct modification of the buffer that would increase its length.
1046 L</lex_stuff_pvn> provides a more convenient way to insert text into
1047 the buffer.
1048
1049 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
1050 this function updates all of the lexer's variables that point directly
1051 into the buffer.
1052
1053 =cut
1054 */
1055
1056 char *
1057 Perl_lex_grow_linestr(pTHX_ STRLEN len)
1058 {
1059     SV *linestr;
1060     char *buf;
1061     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1062     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
1063     bool current;
1064
1065     linestr = PL_parser->linestr;
1066     buf = SvPVX(linestr);
1067     if (len <= SvLEN(linestr))
1068         return buf;
1069
1070     /* Is the lex_shared linestr SV the same as the current linestr SV?
1071      * Only in this case does re_eval_start need adjusting, since it
1072      * points within lex_shared->ls_linestr's buffer */
1073     current = (   !PL_parser->lex_shared->ls_linestr
1074                || linestr == PL_parser->lex_shared->ls_linestr);
1075
1076     bufend_pos = PL_parser->bufend - buf;
1077     bufptr_pos = PL_parser->bufptr - buf;
1078     oldbufptr_pos = PL_parser->oldbufptr - buf;
1079     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1080     linestart_pos = PL_parser->linestart - buf;
1081     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1082     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1083     re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
1084                             PL_parser->lex_shared->re_eval_start - buf : 0;
1085
1086     buf = sv_grow(linestr, len);
1087
1088     PL_parser->bufend = buf + bufend_pos;
1089     PL_parser->bufptr = buf + bufptr_pos;
1090     PL_parser->oldbufptr = buf + oldbufptr_pos;
1091     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1092     PL_parser->linestart = buf + linestart_pos;
1093     if (PL_parser->last_uni)
1094         PL_parser->last_uni = buf + last_uni_pos;
1095     if (PL_parser->last_lop)
1096         PL_parser->last_lop = buf + last_lop_pos;
1097     if (current && PL_parser->lex_shared->re_eval_start)
1098         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
1099     return buf;
1100 }
1101
1102 /*
1103 =for apidoc lex_stuff_pvn
1104
1105 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1106 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1107 reallocating the buffer if necessary.  This means that lexing code that
1108 runs later will see the characters as if they had appeared in the input.
1109 It is not recommended to do this as part of normal parsing, and most
1110 uses of this facility run the risk of the inserted characters being
1111 interpreted in an unintended manner.
1112
1113 The string to be inserted is represented by C<len> octets starting
1114 at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
1115 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1116 The characters are recoded for the lexer buffer, according to how the
1117 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
1118 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1119 function is more convenient.
1120
1121 =for apidoc Amnh||LEX_STUFF_UTF8
1122
1123 =cut
1124 */
1125
1126 void
1127 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1128 {
1129     char *bufptr;
1130     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1131     if (flags & ~(LEX_STUFF_UTF8))
1132         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1133     if (UTF) {
1134         if (flags & LEX_STUFF_UTF8) {
1135             goto plain_copy;
1136         } else {
1137             STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1138                                                        (U8 *) pv + len);
1139             const char *p, *e = pv+len;;
1140             if (!highhalf)
1141                 goto plain_copy;
1142             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1143             bufptr = PL_parser->bufptr;
1144             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1145             SvCUR_set(PL_parser->linestr,
1146                 SvCUR(PL_parser->linestr) + len+highhalf);
1147             PL_parser->bufend += len+highhalf;
1148             for (p = pv; p != e; p++) {
1149                 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1150             }
1151         }
1152     } else {
1153         if (flags & LEX_STUFF_UTF8) {
1154             STRLEN highhalf = 0;
1155             const char *p, *e = pv+len;
1156             for (p = pv; p != e; p++) {
1157                 U8 c = (U8)*p;
1158                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1159                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1160                                 "non-Latin-1 character into Latin-1 input");
1161                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1162                     p++;
1163                     highhalf++;
1164                 } else assert(UTF8_IS_INVARIANT(c));
1165             }
1166             if (!highhalf)
1167                 goto plain_copy;
1168             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1169             bufptr = PL_parser->bufptr;
1170             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1171             SvCUR_set(PL_parser->linestr,
1172                 SvCUR(PL_parser->linestr) + len-highhalf);
1173             PL_parser->bufend += len-highhalf;
1174             p = pv;
1175             while (p < e) {
1176                 if (UTF8_IS_INVARIANT(*p)) {
1177                     *bufptr++ = *p;
1178                     p++;
1179                 }
1180                 else {
1181                     assert(p < e -1 );
1182                     *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1183                     p += 2;
1184                 }
1185             }
1186         } else {
1187           plain_copy:
1188             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1189             bufptr = PL_parser->bufptr;
1190             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1191             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1192             PL_parser->bufend += len;
1193             Copy(pv, bufptr, len, char);
1194         }
1195     }
1196 }
1197
1198 /*
1199 =for apidoc lex_stuff_pv
1200
1201 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1202 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1203 reallocating the buffer if necessary.  This means that lexing code that
1204 runs later will see the characters as if they had appeared in the input.
1205 It is not recommended to do this as part of normal parsing, and most
1206 uses of this facility run the risk of the inserted characters being
1207 interpreted in an unintended manner.
1208
1209 The string to be inserted is represented by octets starting at C<pv>
1210 and continuing to the first nul.  These octets are interpreted as either
1211 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1212 in C<flags>.  The characters are recoded for the lexer buffer, according
1213 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1214 If it is not convenient to nul-terminate a string to be inserted, the
1215 L</lex_stuff_pvn> function is more appropriate.
1216
1217 =cut
1218 */
1219
1220 void
1221 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1222 {
1223     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1224     lex_stuff_pvn(pv, strlen(pv), flags);
1225 }
1226
1227 /*
1228 =for apidoc lex_stuff_sv
1229
1230 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1231 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1232 reallocating the buffer if necessary.  This means that lexing code that
1233 runs later will see the characters as if they had appeared in the input.
1234 It is not recommended to do this as part of normal parsing, and most
1235 uses of this facility run the risk of the inserted characters being
1236 interpreted in an unintended manner.
1237
1238 The string to be inserted is the string value of C<sv>.  The characters
1239 are recoded for the lexer buffer, according to how the buffer is currently
1240 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1241 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1242 need to construct a scalar.
1243
1244 =cut
1245 */
1246
1247 void
1248 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1249 {
1250     char *pv;
1251     STRLEN len;
1252     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1253     if (flags)
1254         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1255     pv = SvPV(sv, len);
1256     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1257 }
1258
1259 /*
1260 =for apidoc lex_unstuff
1261
1262 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1263 C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
1264 This hides the discarded text from any lexing code that runs later,
1265 as if the text had never appeared.
1266
1267 This is not the normal way to consume lexed text.  For that, use
1268 L</lex_read_to>.
1269
1270 =cut
1271 */
1272
1273 void
1274 Perl_lex_unstuff(pTHX_ char *ptr)
1275 {
1276     char *buf, *bufend;
1277     STRLEN unstuff_len;
1278     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1279     buf = PL_parser->bufptr;
1280     if (ptr < buf)
1281         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1282     if (ptr == buf)
1283         return;
1284     bufend = PL_parser->bufend;
1285     if (ptr > bufend)
1286         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1287     unstuff_len = ptr - buf;
1288     Move(ptr, buf, bufend+1-ptr, char);
1289     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1290     PL_parser->bufend = bufend - unstuff_len;
1291 }
1292
1293 /*
1294 =for apidoc lex_read_to
1295
1296 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1297 to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1298 performing the correct bookkeeping whenever a newline character is passed.
1299 This is the normal way to consume lexed text.
1300
1301 Interpretation of the buffer's octets can be abstracted out by
1302 using the slightly higher-level functions L</lex_peek_unichar> and
1303 L</lex_read_unichar>.
1304
1305 =cut
1306 */
1307
1308 void
1309 Perl_lex_read_to(pTHX_ char *ptr)
1310 {
1311     char *s;
1312     PERL_ARGS_ASSERT_LEX_READ_TO;
1313     s = PL_parser->bufptr;
1314     if (ptr < s || ptr > PL_parser->bufend)
1315         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1316     for (; s != ptr; s++)
1317         if (*s == '\n') {
1318             COPLINE_INC_WITH_HERELINES;
1319             PL_parser->linestart = s+1;
1320         }
1321     PL_parser->bufptr = ptr;
1322 }
1323
1324 /*
1325 =for apidoc lex_discard_to
1326
1327 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1328 up to C<ptr>.  The remaining content of the buffer will be moved, and
1329 all pointers into the buffer updated appropriately.  C<ptr> must not
1330 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1331 it is not permitted to discard text that has yet to be lexed.
1332
1333 Normally it is not necessarily to do this directly, because it suffices to
1334 use the implicit discarding behaviour of L</lex_next_chunk> and things
1335 based on it.  However, if a token stretches across multiple lines,
1336 and the lexing code has kept multiple lines of text in the buffer for
1337 that purpose, then after completion of the token it would be wise to
1338 explicitly discard the now-unneeded earlier lines, to avoid future
1339 multi-line tokens growing the buffer without bound.
1340
1341 =cut
1342 */
1343
1344 void
1345 Perl_lex_discard_to(pTHX_ char *ptr)
1346 {
1347     char *buf;
1348     STRLEN discard_len;
1349     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1350     buf = SvPVX(PL_parser->linestr);
1351     if (ptr < buf)
1352         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1353     if (ptr == buf)
1354         return;
1355     if (ptr > PL_parser->bufptr)
1356         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1357     discard_len = ptr - buf;
1358     if (PL_parser->oldbufptr < ptr)
1359         PL_parser->oldbufptr = ptr;
1360     if (PL_parser->oldoldbufptr < ptr)
1361         PL_parser->oldoldbufptr = ptr;
1362     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1363         PL_parser->last_uni = NULL;
1364     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1365         PL_parser->last_lop = NULL;
1366     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1367     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1368     PL_parser->bufend -= discard_len;
1369     PL_parser->bufptr -= discard_len;
1370     PL_parser->oldbufptr -= discard_len;
1371     PL_parser->oldoldbufptr -= discard_len;
1372     if (PL_parser->last_uni)
1373         PL_parser->last_uni -= discard_len;
1374     if (PL_parser->last_lop)
1375         PL_parser->last_lop -= discard_len;
1376 }
1377
1378 void
1379 Perl_notify_parser_that_changed_to_utf8(pTHX)
1380 {
1381     /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1382      * off to on.  At compile time, this has the effect of entering a 'use
1383      * utf8' section.  This means that any input was not previously checked for
1384      * UTF-8 (because it was off), but now we do need to check it, or our
1385      * assumptions about the input being sane could be wrong, and we could
1386      * segfault.  This routine just sets a flag so that the next time we look
1387      * at the input we do the well-formed UTF-8 check.  If we aren't in the
1388      * proper phase, there may not be a parser object, but if there is, setting
1389      * the flag is harmless */
1390
1391     if (PL_parser) {
1392         PL_parser->recheck_utf8_validity = TRUE;
1393     }
1394 }
1395
1396 /*
1397 =for apidoc lex_next_chunk
1398
1399 Reads in the next chunk of text to be lexed, appending it to
1400 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1401 looked to the end of the current chunk and wants to know more.  It is
1402 usual, but not necessary, for lexing to have consumed the entirety of
1403 the current chunk at this time.
1404
1405 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1406 chunk (i.e., the current chunk has been entirely consumed), normally the
1407 current chunk will be discarded at the same time that the new chunk is
1408 read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1409 will not be discarded.  If the current chunk has not been entirely
1410 consumed, then it will not be discarded regardless of the flag.
1411
1412 Returns true if some new text was added to the buffer, or false if the
1413 buffer has reached the end of the input text.
1414
1415 =for apidoc Amnh||LEX_KEEP_PREVIOUS
1416
1417 =cut
1418 */
1419
1420 #define LEX_FAKE_EOF 0x80000000
1421 #define LEX_NO_TERM  0x40000000 /* here-doc */
1422
1423 bool
1424 Perl_lex_next_chunk(pTHX_ U32 flags)
1425 {
1426     SV *linestr;
1427     char *buf;
1428     STRLEN old_bufend_pos, new_bufend_pos;
1429     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1430     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1431     bool got_some_for_debugger = 0;
1432     bool got_some;
1433
1434     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1435         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1436     if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1437         return FALSE;
1438     linestr = PL_parser->linestr;
1439     buf = SvPVX(linestr);
1440     if (!(flags & LEX_KEEP_PREVIOUS)
1441           && PL_parser->bufptr == PL_parser->bufend)
1442     {
1443         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1444         linestart_pos = 0;
1445         if (PL_parser->last_uni != PL_parser->bufend)
1446             PL_parser->last_uni = NULL;
1447         if (PL_parser->last_lop != PL_parser->bufend)
1448             PL_parser->last_lop = NULL;
1449         last_uni_pos = last_lop_pos = 0;
1450         *buf = 0;
1451         SvCUR_set(linestr, 0);
1452     } else {
1453         old_bufend_pos = PL_parser->bufend - buf;
1454         bufptr_pos = PL_parser->bufptr - buf;
1455         oldbufptr_pos = PL_parser->oldbufptr - buf;
1456         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1457         linestart_pos = PL_parser->linestart - buf;
1458         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1459         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1460     }
1461     if (flags & LEX_FAKE_EOF) {
1462         goto eof;
1463     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1464         got_some = 0;
1465     } else if (filter_gets(linestr, old_bufend_pos)) {
1466         got_some = 1;
1467         got_some_for_debugger = 1;
1468     } else if (flags & LEX_NO_TERM) {
1469         got_some = 0;
1470     } else {
1471         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1472             SvPVCLEAR(linestr);
1473         eof:
1474         /* End of real input.  Close filehandle (unless it was STDIN),
1475          * then add implicit termination.
1476          */
1477         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1478             PerlIO_clearerr(PL_parser->rsfp);
1479         else if (PL_parser->rsfp)
1480             (void)PerlIO_close(PL_parser->rsfp);
1481         PL_parser->rsfp = NULL;
1482         PL_parser->in_pod = PL_parser->filtered = 0;
1483         if (!PL_in_eval && PL_minus_p) {
1484             sv_catpvs(linestr,
1485                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1486             PL_minus_n = PL_minus_p = 0;
1487         } else if (!PL_in_eval && PL_minus_n) {
1488             sv_catpvs(linestr, /*{*/";}");
1489             PL_minus_n = 0;
1490         } else
1491             sv_catpvs(linestr, ";");
1492         got_some = 1;
1493     }
1494     buf = SvPVX(linestr);
1495     new_bufend_pos = SvCUR(linestr);
1496     PL_parser->bufend = buf + new_bufend_pos;
1497     PL_parser->bufptr = buf + bufptr_pos;
1498
1499     if (UTF) {
1500         const U8* first_bad_char_loc;
1501         if (UNLIKELY(! is_utf8_string_loc(
1502                             (U8 *) PL_parser->bufptr,
1503                                    PL_parser->bufend - PL_parser->bufptr,
1504                                    &first_bad_char_loc)))
1505         {
1506             _force_out_malformed_utf8_message(first_bad_char_loc,
1507                                               (U8 *) PL_parser->bufend,
1508                                               0,
1509                                               1 /* 1 means die */ );
1510             NOT_REACHED; /* NOTREACHED */
1511         }
1512     }
1513
1514     PL_parser->oldbufptr = buf + oldbufptr_pos;
1515     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1516     PL_parser->linestart = buf + linestart_pos;
1517     if (PL_parser->last_uni)
1518         PL_parser->last_uni = buf + last_uni_pos;
1519     if (PL_parser->last_lop)
1520         PL_parser->last_lop = buf + last_lop_pos;
1521     if (PL_parser->preambling != NOLINE) {
1522         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1523         PL_parser->preambling = NOLINE;
1524     }
1525     if (   got_some_for_debugger
1526         && PERLDB_LINE_OR_SAVESRC
1527         && PL_curstash != PL_debstash)
1528     {
1529         /* debugger active and we're not compiling the debugger code,
1530          * so store the line into the debugger's array of lines
1531          */
1532         update_debugger_info(NULL, buf+old_bufend_pos,
1533             new_bufend_pos-old_bufend_pos);
1534     }
1535     return got_some;
1536 }
1537
1538 /*
1539 =for apidoc lex_peek_unichar
1540
1541 Looks ahead one (Unicode) character in the text currently being lexed.
1542 Returns the codepoint (unsigned integer value) of the next character,
1543 or -1 if lexing has reached the end of the input text.  To consume the
1544 peeked character, use L</lex_read_unichar>.
1545
1546 If the next character is in (or extends into) the next chunk of input
1547 text, the next chunk will be read in.  Normally the current chunk will be
1548 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1549 bit set, then the current chunk will not be discarded.
1550
1551 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1552 is encountered, an exception is generated.
1553
1554 =cut
1555 */
1556
1557 I32
1558 Perl_lex_peek_unichar(pTHX_ U32 flags)
1559 {
1560     char *s, *bufend;
1561     if (flags & ~(LEX_KEEP_PREVIOUS))
1562         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1563     s = PL_parser->bufptr;
1564     bufend = PL_parser->bufend;
1565     if (UTF) {
1566         U8 head;
1567         I32 unichar;
1568         STRLEN len, retlen;
1569         if (s == bufend) {
1570             if (!lex_next_chunk(flags))
1571                 return -1;
1572             s = PL_parser->bufptr;
1573             bufend = PL_parser->bufend;
1574         }
1575         head = (U8)*s;
1576         if (UTF8_IS_INVARIANT(head))
1577             return head;
1578         if (UTF8_IS_START(head)) {
1579             len = UTF8SKIP(&head);
1580             while ((STRLEN)(bufend-s) < len) {
1581                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1582                     break;
1583                 s = PL_parser->bufptr;
1584                 bufend = PL_parser->bufend;
1585             }
1586         }
1587         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1588         if (retlen == (STRLEN)-1) {
1589             _force_out_malformed_utf8_message((U8 *) s,
1590                                               (U8 *) bufend,
1591                                               0,
1592                                               1 /* 1 means die */ );
1593             NOT_REACHED; /* NOTREACHED */
1594         }
1595         return unichar;
1596     } else {
1597         if (s == bufend) {
1598             if (!lex_next_chunk(flags))
1599                 return -1;
1600             s = PL_parser->bufptr;
1601         }
1602         return (U8)*s;
1603     }
1604 }
1605
1606 /*
1607 =for apidoc lex_read_unichar
1608
1609 Reads the next (Unicode) character in the text currently being lexed.
1610 Returns the codepoint (unsigned integer value) of the character read,
1611 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1612 if lexing has reached the end of the input text.  To non-destructively
1613 examine the next character, use L</lex_peek_unichar> instead.
1614
1615 If the next character is in (or extends into) the next chunk of input
1616 text, the next chunk will be read in.  Normally the current chunk will be
1617 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1618 bit set, then the current chunk will not be discarded.
1619
1620 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1621 is encountered, an exception is generated.
1622
1623 =cut
1624 */
1625
1626 I32
1627 Perl_lex_read_unichar(pTHX_ U32 flags)
1628 {
1629     I32 c;
1630     if (flags & ~(LEX_KEEP_PREVIOUS))
1631         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1632     c = lex_peek_unichar(flags);
1633     if (c != -1) {
1634         if (c == '\n')
1635             COPLINE_INC_WITH_HERELINES;
1636         if (UTF)
1637             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1638         else
1639             ++(PL_parser->bufptr);
1640     }
1641     return c;
1642 }
1643
1644 /*
1645 =for apidoc lex_read_space
1646
1647 Reads optional spaces, in Perl style, in the text currently being
1648 lexed.  The spaces may include ordinary whitespace characters and
1649 Perl-style comments.  C<#line> directives are processed if encountered.
1650 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1651 at a non-space character (or the end of the input text).
1652
1653 If spaces extend into the next chunk of input text, the next chunk will
1654 be read in.  Normally the current chunk will be discarded at the same
1655 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1656 chunk will not be discarded.
1657
1658 =cut
1659 */
1660
1661 #define LEX_NO_INCLINE    0x40000000
1662 #define LEX_NO_NEXT_CHUNK 0x80000000
1663
1664 void
1665 Perl_lex_read_space(pTHX_ U32 flags)
1666 {
1667     char *s, *bufend;
1668     const bool can_incline = !(flags & LEX_NO_INCLINE);
1669     bool need_incline = 0;
1670     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1671         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1672     s = PL_parser->bufptr;
1673     bufend = PL_parser->bufend;
1674     while (1) {
1675         char c = *s;
1676         if (c == '#') {
1677             do {
1678                 c = *++s;
1679             } while (!(c == '\n' || (c == 0 && s == bufend)));
1680         } else if (c == '\n') {
1681             s++;
1682             if (can_incline) {
1683                 PL_parser->linestart = s;
1684                 if (s == bufend)
1685                     need_incline = 1;
1686                 else
1687                     incline(s, bufend);
1688             }
1689         } else if (isSPACE(c)) {
1690             s++;
1691         } else if (c == 0 && s == bufend) {
1692             bool got_more;
1693             line_t l;
1694             if (flags & LEX_NO_NEXT_CHUNK)
1695                 break;
1696             PL_parser->bufptr = s;
1697             l = CopLINE(PL_curcop);
1698             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1699             got_more = lex_next_chunk(flags);
1700             CopLINE_set(PL_curcop, l);
1701             s = PL_parser->bufptr;
1702             bufend = PL_parser->bufend;
1703             if (!got_more)
1704                 break;
1705             if (can_incline && need_incline && PL_parser->rsfp) {
1706                 incline(s, bufend);
1707                 need_incline = 0;
1708             }
1709         } else if (!c) {
1710             s++;
1711         } else {
1712             break;
1713         }
1714     }
1715     PL_parser->bufptr = s;
1716 }
1717
1718 /*
1719
1720 =for apidoc validate_proto
1721
1722 This function performs syntax checking on a prototype, C<proto>.
1723 If C<warn> is true, any illegal characters or mismatched brackets
1724 will trigger illegalproto warnings, declaring that they were
1725 detected in the prototype for C<name>.
1726
1727 The return value is C<true> if this is a valid prototype, and
1728 C<false> if it is not, regardless of whether C<warn> was C<true> or
1729 C<false>.
1730
1731 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1732
1733 =cut
1734
1735  */
1736
1737 bool
1738 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1739 {
1740     STRLEN len, origlen;
1741     char *p;
1742     bool bad_proto = FALSE;
1743     bool in_brackets = FALSE;
1744     bool after_slash = FALSE;
1745     char greedy_proto = ' ';
1746     bool proto_after_greedy_proto = FALSE;
1747     bool must_be_last = FALSE;
1748     bool underscore = FALSE;
1749     bool bad_proto_after_underscore = FALSE;
1750
1751     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1752
1753     if (!proto)
1754         return TRUE;
1755
1756     p = SvPV(proto, len);
1757     origlen = len;
1758     for (; len--; p++) {
1759         if (!isSPACE(*p)) {
1760             if (must_be_last)
1761                 proto_after_greedy_proto = TRUE;
1762             if (underscore) {
1763                 if (!memCHRs(";@%", *p))
1764                     bad_proto_after_underscore = TRUE;
1765                 underscore = FALSE;
1766             }
1767             if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
1768                 bad_proto = TRUE;
1769             }
1770             else {
1771                 if (*p == '[')
1772                     in_brackets = TRUE;
1773                 else if (*p == ']')
1774                     in_brackets = FALSE;
1775                 else if ((*p == '@' || *p == '%')
1776                          && !after_slash
1777                          && !in_brackets )
1778                 {
1779                     must_be_last = TRUE;
1780                     greedy_proto = *p;
1781                 }
1782                 else if (*p == '_')
1783                     underscore = TRUE;
1784             }
1785             if (*p == '\\')
1786                 after_slash = TRUE;
1787             else
1788                 after_slash = FALSE;
1789         }
1790     }
1791
1792     if (warn) {
1793         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1794         p -= origlen;
1795         p = SvUTF8(proto)
1796             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1797                              origlen, UNI_DISPLAY_ISPRINT)
1798             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1799
1800         if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1801             SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1802             sv_catpvs(name2, "::");
1803             sv_catsv(name2, (SV *)name);
1804             name = name2;
1805         }
1806
1807         if (proto_after_greedy_proto)
1808             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1809                         "Prototype after '%c' for %" SVf " : %s",
1810                         greedy_proto, SVfARG(name), p);
1811         if (in_brackets)
1812             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1813                         "Missing ']' in prototype for %" SVf " : %s",
1814                         SVfARG(name), p);
1815         if (bad_proto)
1816             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1817                         "Illegal character in prototype for %" SVf " : %s",
1818                         SVfARG(name), p);
1819         if (bad_proto_after_underscore)
1820             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1821                         "Illegal character after '_' in prototype for %" SVf " : %s",
1822                         SVfARG(name), p);
1823     }
1824
1825     return (! (proto_after_greedy_proto || bad_proto) );
1826 }
1827
1828 /*
1829  * S_incline
1830  * This subroutine has nothing to do with tilting, whether at windmills
1831  * or pinball tables.  Its name is short for "increment line".  It
1832  * increments the current line number in CopLINE(PL_curcop) and checks
1833  * to see whether the line starts with a comment of the form
1834  *    # line 500 "foo.pm"
1835  * If so, it sets the current line number and file to the values in the comment.
1836  */
1837
1838 STATIC void
1839 S_incline(pTHX_ const char *s, const char *end)
1840 {
1841     const char *t;
1842     const char *n;
1843     const char *e;
1844     line_t line_num;
1845     UV uv;
1846
1847     PERL_ARGS_ASSERT_INCLINE;
1848
1849     assert(end >= s);
1850
1851     COPLINE_INC_WITH_HERELINES;
1852     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1853      && s+1 == PL_bufend && *s == ';') {
1854         /* fake newline in string eval */
1855         CopLINE_dec(PL_curcop);
1856         return;
1857     }
1858     if (*s++ != '#')
1859         return;
1860     while (SPACE_OR_TAB(*s))
1861         s++;
1862     if (memBEGINs(s, (STRLEN) (end - s), "line"))
1863         s += sizeof("line") - 1;
1864     else
1865         return;
1866     if (SPACE_OR_TAB(*s))
1867         s++;
1868     else
1869         return;
1870     while (SPACE_OR_TAB(*s))
1871         s++;
1872     if (!isDIGIT(*s))
1873         return;
1874
1875     n = s;
1876     while (isDIGIT(*s))
1877         s++;
1878     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1879         return;
1880     while (SPACE_OR_TAB(*s))
1881         s++;
1882     if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1883         s++;
1884         e = t + 1;
1885     }
1886     else {
1887         t = s;
1888         while (*t && !isSPACE(*t))
1889             t++;
1890         e = t;
1891     }
1892     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1893         e++;
1894     if (*e != '\n' && *e != '\0')
1895         return;         /* false alarm */
1896
1897     if (!grok_atoUV(n, &uv, &e))
1898         return;
1899     line_num = ((line_t)uv) - 1;
1900
1901     if (t - s > 0) {
1902         const STRLEN len = t - s;
1903
1904         if (!PL_rsfp && !PL_parser->filtered) {
1905             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1906              * to *{"::_<newfilename"} */
1907             /* However, the long form of evals is only turned on by the
1908                debugger - usually they're "(eval %lu)" */
1909             GV * const cfgv = CopFILEGV(PL_curcop);
1910             if (cfgv) {
1911                 char smallbuf[128];
1912                 STRLEN tmplen2 = len;
1913                 char *tmpbuf2;
1914                 GV *gv2;
1915
1916                 if (tmplen2 + 2 <= sizeof smallbuf)
1917                     tmpbuf2 = smallbuf;
1918                 else
1919                     Newx(tmpbuf2, tmplen2 + 2, char);
1920
1921                 tmpbuf2[0] = '_';
1922                 tmpbuf2[1] = '<';
1923
1924                 memcpy(tmpbuf2 + 2, s, tmplen2);
1925                 tmplen2 += 2;
1926
1927                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1928                 if (!isGV(gv2)) {
1929                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1930                     /* adjust ${"::_<newfilename"} to store the new file name */
1931                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1932                     /* The line number may differ. If that is the case,
1933                        alias the saved lines that are in the array.
1934                        Otherwise alias the whole array. */
1935                     if (CopLINE(PL_curcop) == line_num) {
1936                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1937                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1938                     }
1939                     else if (GvAV(cfgv)) {
1940                         AV * const av = GvAV(cfgv);
1941                         const line_t start = CopLINE(PL_curcop)+1;
1942                         SSize_t items = AvFILLp(av) - start;
1943                         if (items > 0) {
1944                             AV * const av2 = GvAVn(gv2);
1945                             SV **svp = AvARRAY(av) + start;
1946                             Size_t l = line_num+1;
1947                             while (items-- && l < SSize_t_MAX && l == (line_t)l)
1948                                 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1949                         }
1950                     }
1951                 }
1952
1953                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1954             }
1955         }
1956         CopFILE_free(PL_curcop);
1957         CopFILE_setn(PL_curcop, s, len);
1958     }
1959     CopLINE_set(PL_curcop, line_num);
1960 }
1961
1962 STATIC void
1963 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1964 {
1965     AV *av = CopFILEAVx(PL_curcop);
1966     if (av) {
1967         SV * sv;
1968         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1969         else {
1970             sv = *av_fetch(av, 0, 1);
1971             SvUPGRADE(sv, SVt_PVMG);
1972         }
1973         if (!SvPOK(sv)) SvPVCLEAR(sv);
1974         if (orig_sv)
1975             sv_catsv(sv, orig_sv);
1976         else
1977             sv_catpvn(sv, buf, len);
1978         if (!SvIOK(sv)) {
1979             (void)SvIOK_on(sv);
1980             SvIV_set(sv, 0);
1981         }
1982         if (PL_parser->preambling == NOLINE)
1983             av_store(av, CopLINE(PL_curcop), sv);
1984     }
1985 }
1986
1987 /*
1988  * skipspace
1989  * Called to gobble the appropriate amount and type of whitespace.
1990  * Skips comments as well.
1991  * Returns the next character after the whitespace that is skipped.
1992  *
1993  * peekspace
1994  * Same thing, but look ahead without incrementing line numbers or
1995  * adjusting PL_linestart.
1996  */
1997
1998 #define skipspace(s) skipspace_flags(s, 0)
1999 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
2000
2001 char *
2002 Perl_skipspace_flags(pTHX_ char *s, U32 flags)
2003 {
2004     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
2005     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2006         while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
2007             s++;
2008     } else {
2009         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
2010         PL_bufptr = s;
2011         lex_read_space(flags | LEX_KEEP_PREVIOUS |
2012                 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
2013                     LEX_NO_NEXT_CHUNK : 0));
2014         s = PL_bufptr;
2015         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
2016         if (PL_linestart > PL_bufptr)
2017             PL_bufptr = PL_linestart;
2018         return s;
2019     }
2020     return s;
2021 }
2022
2023 /*
2024  * S_check_uni
2025  * Check the unary operators to ensure there's no ambiguity in how they're
2026  * used.  An ambiguous piece of code would be:
2027  *     rand + 5
2028  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
2029  * the +5 is its argument.
2030  */
2031
2032 STATIC void
2033 S_check_uni(pTHX)
2034 {
2035     const char *s;
2036
2037     if (PL_oldoldbufptr != PL_last_uni)
2038         return;
2039     while (isSPACE(*PL_last_uni))
2040         PL_last_uni++;
2041     s = PL_last_uni;
2042     while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
2043         s += UTF ? UTF8SKIP(s) : 1;
2044     if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
2045         return;
2046
2047     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2048                      "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
2049                      UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
2050 }
2051
2052 /*
2053  * LOP : macro to build a list operator.  Its behaviour has been replaced
2054  * with a subroutine, S_lop() for which LOP is just another name.
2055  */
2056
2057 #define LOP(f,x) return lop(f,x,s)
2058
2059 /*
2060  * S_lop
2061  * Build a list operator (or something that might be one).  The rules:
2062  *  - if we have a next token, then it's a list operator (no parens) for
2063  *    which the next token has already been parsed; e.g.,
2064  *       sort foo @args
2065  *       sort foo (@args)
2066  *  - if the next thing is an opening paren, then it's a function
2067  *  - else it's a list operator
2068  */
2069
2070 STATIC I32
2071 S_lop(pTHX_ I32 f, U8 x, char *s)
2072 {
2073     PERL_ARGS_ASSERT_LOP;
2074
2075     pl_yylval.ival = f;
2076     CLINE;
2077     PL_bufptr = s;
2078     PL_last_lop = PL_oldbufptr;
2079     PL_last_lop_op = (OPCODE)f;
2080     if (PL_nexttoke)
2081         goto lstop;
2082     PL_expect = x;
2083     if (*s == '(')
2084         return REPORT(FUNC);
2085     s = skipspace(s);
2086     if (*s == '(')
2087         return REPORT(FUNC);
2088     else {
2089         lstop:
2090         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2091             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2092         return REPORT(LSTOP);
2093     }
2094 }
2095
2096 /*
2097  * S_force_next
2098  * When the lexer realizes it knows the next token (for instance,
2099  * it is reordering tokens for the parser) then it can call S_force_next
2100  * to know what token to return the next time the lexer is called.  Caller
2101  * will need to set PL_nextval[] and possibly PL_expect to ensure
2102  * the lexer handles the token correctly.
2103  */
2104
2105 STATIC void
2106 S_force_next(pTHX_ I32 type)
2107 {
2108 #ifdef DEBUGGING
2109     if (DEBUG_T_TEST) {
2110         PerlIO_printf(Perl_debug_log, "### forced token:\n");
2111         tokereport(type, &NEXTVAL_NEXTTOKE);
2112     }
2113 #endif
2114     assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2115     PL_nexttype[PL_nexttoke] = type;
2116     PL_nexttoke++;
2117 }
2118
2119 /*
2120  * S_postderef
2121  *
2122  * This subroutine handles postfix deref syntax after the arrow has already
2123  * been emitted.  @* $* etc. are emitted as two separate tokens right here.
2124  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2125  * only the first, leaving yylex to find the next.
2126  */
2127
2128 static int
2129 S_postderef(pTHX_ int const funny, char const next)
2130 {
2131     assert(funny == DOLSHARP
2132         || funny == PERLY_DOLLAR
2133         || funny == PERLY_SNAIL
2134         || funny == PERLY_PERCENT_SIGN
2135         || funny == PERLY_AMPERSAND
2136         || funny == PERLY_STAR
2137     );
2138     if (next == '*') {
2139         PL_expect = XOPERATOR;
2140         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2141             assert(PERLY_SNAIL == funny || PERLY_DOLLAR == funny || DOLSHARP == funny);
2142             PL_lex_state = LEX_INTERPEND;
2143             if (PERLY_SNAIL == funny)
2144                 force_next(POSTJOIN);
2145         }
2146         force_next(PERLY_STAR);
2147         PL_bufptr+=2;
2148     }
2149     else {
2150         if (PERLY_SNAIL == funny && PL_lex_state == LEX_INTERPNORMAL
2151          && !PL_lex_brackets)
2152             PL_lex_dojoin = 2;
2153         PL_expect = XOPERATOR;
2154         PL_bufptr++;
2155     }
2156     return funny;
2157 }
2158
2159 void
2160 Perl_yyunlex(pTHX)
2161 {
2162     int yyc = PL_parser->yychar;
2163     if (yyc != YYEMPTY) {
2164         if (yyc) {
2165             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2166             if (yyc == PERLY_BRACE_OPEN || yyc == HASHBRACK || yyc == PERLY_BRACKET_OPEN) {
2167                 PL_lex_allbrackets--;
2168                 PL_lex_brackets--;
2169                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2170             } else if (yyc == PERLY_PAREN_OPEN) {
2171                 PL_lex_allbrackets--;
2172                 yyc |= (2<<24);
2173             }
2174             force_next(yyc);
2175         }
2176         PL_parser->yychar = YYEMPTY;
2177     }
2178 }
2179
2180 STATIC SV *
2181 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2182 {
2183     SV * const sv = newSVpvn_utf8(start, len,
2184                     ! IN_BYTES
2185                   &&  UTF
2186                   &&  len != 0
2187                   &&  is_utf8_non_invariant_string((const U8*)start, len));
2188     return sv;
2189 }
2190
2191 /*
2192  * S_force_word
2193  * When the lexer knows the next thing is a word (for instance, it has
2194  * just seen -> and it knows that the next char is a word char, then
2195  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2196  * lookahead.
2197  *
2198  * Arguments:
2199  *   char *start : buffer position (must be within PL_linestr)
2200  *   int token   : PL_next* will be this type of bare word
2201  *                 (e.g., METHOD,BAREWORD)
2202  *   int check_keyword : if true, Perl checks to make sure the word isn't
2203  *       a keyword (do this if the word is a label, e.g. goto FOO)
2204  *   int allow_pack : if true, : characters will also be allowed (require,
2205  *       use, etc. do this)
2206  */
2207
2208 STATIC char *
2209 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2210 {
2211     char *s;
2212     STRLEN len;
2213
2214     PERL_ARGS_ASSERT_FORCE_WORD;
2215
2216     start = skipspace(start);
2217     s = start;
2218     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2219         || (allow_pack && *s == ':' && s[1] == ':') )
2220     {
2221         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2222         if (check_keyword) {
2223           char *s2 = PL_tokenbuf;
2224           STRLEN len2 = len;
2225           if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2226             s2 += sizeof("CORE::") - 1;
2227             len2 -= sizeof("CORE::") - 1;
2228           }
2229           if (keyword(s2, len2, 0))
2230             return start;
2231         }
2232         if (token == METHOD) {
2233             s = skipspace(s);
2234             if (*s == '(')
2235                 PL_expect = XTERM;
2236             else {
2237                 PL_expect = XOPERATOR;
2238             }
2239         }
2240         NEXTVAL_NEXTTOKE.opval
2241             = newSVOP(OP_CONST,0,
2242                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2243         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2244         force_next(token);
2245     }
2246     return s;
2247 }
2248
2249 /*
2250  * S_force_ident
2251  * Called when the lexer wants $foo *foo &foo etc, but the program
2252  * text only contains the "foo" portion.  The first argument is a pointer
2253  * to the "foo", and the second argument is the type symbol to prefix.
2254  * Forces the next token to be a "BAREWORD".
2255  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2256  */
2257
2258 STATIC void
2259 S_force_ident(pTHX_ const char *s, int kind)
2260 {
2261     PERL_ARGS_ASSERT_FORCE_IDENT;
2262
2263     if (s[0]) {
2264         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2265         OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2266                                                                 UTF ? SVf_UTF8 : 0));
2267         NEXTVAL_NEXTTOKE.opval = o;
2268         force_next(BAREWORD);
2269         if (kind) {
2270             o->op_private = OPpCONST_ENTERED;
2271             /* XXX see note in pp_entereval() for why we forgo typo
2272                warnings if the symbol must be introduced in an eval.
2273                GSAR 96-10-12 */
2274             gv_fetchpvn_flags(s, len,
2275                               (PL_in_eval ? GV_ADDMULTI
2276                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2277                               kind == PERLY_DOLLAR ? SVt_PV :
2278                               kind == PERLY_SNAIL ? SVt_PVAV :
2279                               kind == PERLY_PERCENT_SIGN ? SVt_PVHV :
2280                               SVt_PVGV
2281                               );
2282         }
2283     }
2284 }
2285
2286 static void
2287 S_force_ident_maybe_lex(pTHX_ char pit)
2288 {
2289     NEXTVAL_NEXTTOKE.ival = pit;
2290     force_next('p');
2291 }
2292
2293 NV
2294 Perl_str_to_version(pTHX_ SV *sv)
2295 {
2296     NV retval = 0.0;
2297     NV nshift = 1.0;
2298     STRLEN len;
2299     const char *start = SvPV_const(sv,len);
2300     const char * const end = start + len;
2301     const bool utf = cBOOL(SvUTF8(sv));
2302
2303     PERL_ARGS_ASSERT_STR_TO_VERSION;
2304
2305     while (start < end) {
2306         STRLEN skip;
2307         UV n;
2308         if (utf)
2309             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2310         else {
2311             n = *(U8*)start;
2312             skip = 1;
2313         }
2314         retval += ((NV)n)/nshift;
2315         start += skip;
2316         nshift *= 1000;
2317     }
2318     return retval;
2319 }
2320
2321 /*
2322  * S_force_version
2323  * Forces the next token to be a version number.
2324  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2325  * and if "guessing" is TRUE, then no new token is created (and the caller
2326  * must use an alternative parsing method).
2327  */
2328
2329 STATIC char *
2330 S_force_version(pTHX_ char *s, int guessing)
2331 {
2332     OP *version = NULL;
2333     char *d;
2334
2335     PERL_ARGS_ASSERT_FORCE_VERSION;
2336
2337     s = skipspace(s);
2338
2339     d = s;
2340     if (*d == 'v')
2341         d++;
2342     if (isDIGIT(*d)) {
2343         while (isDIGIT(*d) || *d == '_' || *d == '.')
2344             d++;
2345         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2346             SV *ver;
2347             s = scan_num(s, &pl_yylval);
2348             version = pl_yylval.opval;
2349             ver = cSVOPx(version)->op_sv;
2350             if (SvPOK(ver) && !SvNIOK(ver)) {
2351                 SvUPGRADE(ver, SVt_PVNV);
2352                 SvNV_set(ver, str_to_version(ver));
2353                 SvNOK_on(ver);          /* hint that it is a version */
2354             }
2355         }
2356         else if (guessing) {
2357             return s;
2358         }
2359     }
2360
2361     /* NOTE: The parser sees the package name and the VERSION swapped */
2362     NEXTVAL_NEXTTOKE.opval = version;
2363     force_next(BAREWORD);
2364
2365     return s;
2366 }
2367
2368 /*
2369  * S_force_strict_version
2370  * Forces the next token to be a version number using strict syntax rules.
2371  */
2372
2373 STATIC char *
2374 S_force_strict_version(pTHX_ char *s)
2375 {
2376     OP *version = NULL;
2377     const char *errstr = NULL;
2378
2379     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2380
2381     while (isSPACE(*s)) /* leading whitespace */
2382         s++;
2383
2384     if (is_STRICT_VERSION(s,&errstr)) {
2385         SV *ver = newSV(0);
2386         s = (char *)scan_version(s, ver, 0);
2387         version = newSVOP(OP_CONST, 0, ver);
2388     }
2389     else if ((*s != ';' && *s != '{' && *s != '}' )
2390              && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2391     {
2392         PL_bufptr = s;
2393         if (errstr)
2394             yyerror(errstr); /* version required */
2395         return s;
2396     }
2397
2398     /* NOTE: The parser sees the package name and the VERSION swapped */
2399     NEXTVAL_NEXTTOKE.opval = version;
2400     force_next(BAREWORD);
2401
2402     return s;
2403 }
2404
2405 /*
2406  * S_tokeq
2407  * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2408  * modified as necessary.  However, if HINT_NEW_STRING is on, 'sv' is
2409  * unchanged, and a new SV containing the modified input is returned.
2410  */
2411
2412 STATIC SV *
2413 S_tokeq(pTHX_ SV *sv)
2414 {
2415     char *s;
2416     char *send;
2417     char *d;
2418     SV *pv = sv;
2419
2420     PERL_ARGS_ASSERT_TOKEQ;
2421
2422     assert (SvPOK(sv));
2423     assert (SvLEN(sv));
2424     assert (!SvIsCOW(sv));
2425     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2426         goto finish;
2427     s = SvPVX(sv);
2428     send = SvEND(sv);
2429     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2430     while (s < send && !(*s == '\\' && s[1] == '\\'))
2431         s++;
2432     if (s == send)
2433         goto finish;
2434     d = s;
2435     if ( PL_hints & HINT_NEW_STRING ) {
2436         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2437                             SVs_TEMP | SvUTF8(sv));
2438     }
2439     while (s < send) {
2440         if (*s == '\\') {
2441             if (s + 1 < send && (s[1] == '\\'))
2442                 s++;            /* all that, just for this */
2443         }
2444         *d++ = *s++;
2445     }
2446     *d = '\0';
2447     SvCUR_set(sv, d - SvPVX_const(sv));
2448   finish:
2449     if ( PL_hints & HINT_NEW_STRING )
2450        return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2451     return sv;
2452 }
2453
2454 /*
2455  * Now come three functions related to double-quote context,
2456  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2457  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2458  * interact with PL_lex_state, and create fake ( ... ) argument lists
2459  * to handle functions and concatenation.
2460  * For example,
2461  *   "foo\lbar"
2462  * is tokenised as
2463  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2464  */
2465
2466 /*
2467  * S_sublex_start
2468  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2469  *
2470  * Pattern matching will set PL_lex_op to the pattern-matching op to
2471  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2472  *
2473  * OP_CONST is easy--just make the new op and return.
2474  *
2475  * Everything else becomes a FUNC.
2476  *
2477  * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2478  * had an OP_CONST.  This just sets us up for a
2479  * call to S_sublex_push().
2480  */
2481
2482 STATIC I32
2483 S_sublex_start(pTHX)
2484 {
2485     const I32 op_type = pl_yylval.ival;
2486
2487     if (op_type == OP_NULL) {
2488         pl_yylval.opval = PL_lex_op;
2489         PL_lex_op = NULL;
2490         return THING;
2491     }
2492     if (op_type == OP_CONST) {
2493         SV *sv = PL_lex_stuff;
2494         PL_lex_stuff = NULL;
2495         sv = tokeq(sv);
2496
2497         if (SvTYPE(sv) == SVt_PVIV) {
2498             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2499             STRLEN len;
2500             const char * const p = SvPV_const(sv, len);
2501             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2502             SvREFCNT_dec(sv);
2503             sv = nsv;
2504         }
2505         pl_yylval.opval = newSVOP(op_type, 0, sv);
2506         return THING;
2507     }
2508
2509     PL_parser->lex_super_state = PL_lex_state;
2510     PL_parser->lex_sub_inwhat = (U16)op_type;
2511     PL_parser->lex_sub_op = PL_lex_op;
2512     PL_parser->sub_no_recover = FALSE;
2513     PL_parser->sub_error_count = PL_error_count;
2514     PL_lex_state = LEX_INTERPPUSH;
2515
2516     PL_expect = XTERM;
2517     if (PL_lex_op) {
2518         pl_yylval.opval = PL_lex_op;
2519         PL_lex_op = NULL;
2520         return PMFUNC;
2521     }
2522     else
2523         return FUNC;
2524 }
2525
2526 /*
2527  * S_sublex_push
2528  * Create a new scope to save the lexing state.  The scope will be
2529  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2530  * to the uc, lc, etc. found before.
2531  * Sets PL_lex_state to LEX_INTERPCONCAT.
2532  */
2533
2534 STATIC I32
2535 S_sublex_push(pTHX)
2536 {
2537     LEXSHARED *shared;
2538     const bool is_heredoc = PL_multi_close == '<';
2539     ENTER;
2540
2541     PL_lex_state = PL_parser->lex_super_state;
2542     SAVEI8(PL_lex_dojoin);
2543     SAVEI32(PL_lex_brackets);
2544     SAVEI32(PL_lex_allbrackets);
2545     SAVEI32(PL_lex_formbrack);
2546     SAVEI8(PL_lex_fakeeof);
2547     SAVEI32(PL_lex_casemods);
2548     SAVEI32(PL_lex_starts);
2549     SAVEI8(PL_lex_state);
2550     SAVESPTR(PL_lex_repl);
2551     SAVEVPTR(PL_lex_inpat);
2552     SAVEI16(PL_lex_inwhat);
2553     if (is_heredoc)
2554     {
2555         SAVECOPLINE(PL_curcop);
2556         SAVEI32(PL_multi_end);
2557         SAVEI32(PL_parser->herelines);
2558         PL_parser->herelines = 0;
2559     }
2560     SAVEIV(PL_multi_close);
2561     SAVEPPTR(PL_bufptr);
2562     SAVEPPTR(PL_bufend);
2563     SAVEPPTR(PL_oldbufptr);
2564     SAVEPPTR(PL_oldoldbufptr);
2565     SAVEPPTR(PL_last_lop);
2566     SAVEPPTR(PL_last_uni);
2567     SAVEPPTR(PL_linestart);
2568     SAVESPTR(PL_linestr);
2569     SAVEGENERICPV(PL_lex_brackstack);
2570     SAVEGENERICPV(PL_lex_casestack);
2571     SAVEGENERICPV(PL_parser->lex_shared);
2572     SAVEBOOL(PL_parser->lex_re_reparsing);
2573     SAVEI32(PL_copline);
2574
2575     /* The here-doc parser needs to be able to peek into outer lexing
2576        scopes to find the body of the here-doc.  So we put PL_linestr and
2577        PL_bufptr into lex_shared, to ‘share’ those values.
2578      */
2579     PL_parser->lex_shared->ls_linestr = PL_linestr;
2580     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2581
2582     PL_linestr = PL_lex_stuff;
2583     PL_lex_repl = PL_parser->lex_sub_repl;
2584     PL_lex_stuff = NULL;
2585     PL_parser->lex_sub_repl = NULL;
2586
2587     /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2588        set for an inner quote-like operator and then an error causes scope-
2589        popping.  We must not have a PL_lex_stuff value left dangling, as
2590        that breaks assumptions elsewhere.  See bug #123617.  */
2591     SAVEGENERICSV(PL_lex_stuff);
2592     SAVEGENERICSV(PL_parser->lex_sub_repl);
2593
2594     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2595         = SvPVX(PL_linestr);
2596     PL_bufend += SvCUR(PL_linestr);
2597     PL_last_lop = PL_last_uni = NULL;
2598     SAVEFREESV(PL_linestr);
2599     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2600
2601     PL_lex_dojoin = FALSE;
2602     PL_lex_brackets = PL_lex_formbrack = 0;
2603     PL_lex_allbrackets = 0;
2604     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2605     Newx(PL_lex_brackstack, 120, char);
2606     Newx(PL_lex_casestack, 12, char);
2607     PL_lex_casemods = 0;
2608     *PL_lex_casestack = '\0';
2609     PL_lex_starts = 0;
2610     PL_lex_state = LEX_INTERPCONCAT;
2611     if (is_heredoc)
2612         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2613     PL_copline = NOLINE;
2614
2615     Newxz(shared, 1, LEXSHARED);
2616     shared->ls_prev = PL_parser->lex_shared;
2617     PL_parser->lex_shared = shared;
2618
2619     PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2620     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2621     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2622         PL_lex_inpat = PL_parser->lex_sub_op;
2623     else
2624         PL_lex_inpat = NULL;
2625
2626     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2627     PL_in_eval &= ~EVAL_RE_REPARSING;
2628
2629     return SUBLEXSTART;
2630 }
2631
2632 /*
2633  * S_sublex_done
2634  * Restores lexer state after a S_sublex_push.
2635  */
2636
2637 STATIC I32
2638 S_sublex_done(pTHX)
2639 {
2640     if (!PL_lex_starts++) {
2641         SV * const sv = newSVpvs("");
2642         if (SvUTF8(PL_linestr))
2643             SvUTF8_on(sv);
2644         PL_expect = XOPERATOR;
2645         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2646         return THING;
2647     }
2648
2649     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2650         PL_lex_state = LEX_INTERPCASEMOD;
2651         return yylex();
2652     }
2653
2654     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2655     assert(PL_lex_inwhat != OP_TRANSR);
2656     if (PL_lex_repl) {
2657         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2658         PL_linestr = PL_lex_repl;
2659         PL_lex_inpat = 0;
2660         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2661         PL_bufend += SvCUR(PL_linestr);
2662         PL_last_lop = PL_last_uni = NULL;
2663         PL_lex_dojoin = FALSE;
2664         PL_lex_brackets = 0;
2665         PL_lex_allbrackets = 0;
2666         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2667         PL_lex_casemods = 0;
2668         *PL_lex_casestack = '\0';
2669         PL_lex_starts = 0;
2670         if (SvEVALED(PL_lex_repl)) {
2671             PL_lex_state = LEX_INTERPNORMAL;
2672             PL_lex_starts++;
2673             /*  we don't clear PL_lex_repl here, so that we can check later
2674                 whether this is an evalled subst; that means we rely on the
2675                 logic to ensure sublex_done() is called again only via the
2676                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2677         }
2678         else {
2679             PL_lex_state = LEX_INTERPCONCAT;
2680             PL_lex_repl = NULL;
2681         }
2682         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2683             CopLINE(PL_curcop) +=
2684                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2685                  + PL_parser->herelines;
2686             PL_parser->herelines = 0;
2687         }
2688         return PERLY_SLASH;
2689     }
2690     else {
2691         const line_t l = CopLINE(PL_curcop);
2692         LEAVE;
2693         if (PL_parser->sub_error_count != PL_error_count) {
2694             if (PL_parser->sub_no_recover) {
2695                 yyquit();
2696                 NOT_REACHED;
2697             }
2698         }
2699         if (PL_multi_close == '<')
2700             PL_parser->herelines += l - PL_multi_end;
2701         PL_bufend = SvPVX(PL_linestr);
2702         PL_bufend += SvCUR(PL_linestr);
2703         PL_expect = XOPERATOR;
2704         return SUBLEXEND;
2705     }
2706 }
2707
2708 HV *
2709 Perl_load_charnames(pTHX_ SV * char_name, const char * context,
2710                           const STRLEN context_len, const char ** error_msg)
2711 {
2712     /* Load the official _charnames module if not already there.  The
2713      * parameters are just to give info for any error messages generated:
2714      *  char_name   a name to look up which is the reason for loading this
2715      *  context     'char_name' in the context in the input in which it appears
2716      *  context_len how many bytes 'context' occupies
2717      *  error_msg   *error_msg will be set to any error
2718      *
2719      *  Returns the ^H table if success; otherwise NULL */
2720
2721     unsigned int i;
2722     HV * table;
2723     SV **cvp;
2724     SV * res;
2725
2726     PERL_ARGS_ASSERT_LOAD_CHARNAMES;
2727
2728     /* This loop is executed 1 1/2 times.  On the first time through, if it
2729      * isn't already loaded, try loading it, and iterate just once to see if it
2730      * worked.  */
2731     for (i = 0; i < 2; i++) {
2732         table = GvHV(PL_hintgv);                 /* ^H */
2733
2734         if (    table
2735             && (PL_hints & HINT_LOCALIZE_HH)
2736             && (cvp = hv_fetchs(table, "charnames", FALSE))
2737             &&  SvOK(*cvp))
2738         {
2739             return table;   /* Quit if already loaded */
2740         }
2741
2742         if (i == 0) {
2743             Perl_load_module(aTHX_
2744                 0,
2745                 newSVpvs("_charnames"),
2746
2747                 /* version parameter; no need to specify it, as if we get too early
2748                 * a version, will fail anyway, not being able to find 'charnames'
2749                 * */
2750                 NULL,
2751                 newSVpvs(":full"),
2752                 newSVpvs(":short"),
2753                 NULL);
2754         }
2755     }
2756
2757     /* Here, it failed; new_constant will give appropriate error messages */
2758     *error_msg = NULL;
2759     res = new_constant( NULL, 0, "charnames", char_name, NULL,
2760                         context, context_len, error_msg);
2761     SvREFCNT_dec(res);
2762
2763     return NULL;
2764 }
2765
2766 STATIC SV*
2767 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2768 {
2769     /* This justs wraps get_and_check_backslash_N_name() to output any error
2770      * message it returns. */
2771
2772     const char * error_msg = NULL;
2773     SV * result;
2774
2775     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2776
2777     /* charnames doesn't work well if there have been errors found */
2778     if (PL_error_count > 0) {
2779         return NULL;
2780     }
2781
2782     result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2783
2784     if (error_msg) {
2785         yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2786     }
2787
2788     return result;
2789 }
2790
2791 SV*
2792 Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2793                                           const char* e,
2794                                           const bool is_utf8,
2795                                           const char ** error_msg)
2796 {
2797     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2798      * interior, hence to the "}".  Finds what the name resolves to, returning
2799      * an SV* containing it; NULL if no valid one found.
2800      *
2801      * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2802      * doesn't have to be. */
2803
2804     SV* char_name;
2805     SV* res;
2806     HV * table;
2807     SV **cvp;
2808     SV *cv;
2809     SV *rv;
2810     HV *stash;
2811
2812     /* Points to the beginning of the \N{... so that any messages include the
2813      * context of what's failing*/
2814     const char* context = s - 3;
2815     STRLEN context_len = e - context + 1; /* include all of \N{...} */
2816
2817
2818     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2819
2820     assert(e >= s);
2821     assert(s > (char *) 3);
2822
2823     while (s < e && isBLANK(*s)) {
2824         s++;
2825     }
2826
2827     while (s < e && isBLANK(*(e - 1))) {
2828         e--;
2829     }
2830
2831     char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2832
2833     if (!SvCUR(char_name)) {
2834         SvREFCNT_dec_NN(char_name);
2835         /* diag_listed_as: Unknown charname '%s' */
2836         *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2837         return NULL;
2838     }
2839
2840     /* Autoload the charnames module */
2841
2842     table = load_charnames(char_name, context, context_len, error_msg);
2843     if (table == NULL) {
2844         return NULL;
2845     }
2846
2847     *error_msg = NULL;
2848     res = new_constant( NULL, 0, "charnames", char_name, NULL,
2849                         context, context_len, error_msg);
2850     if (*error_msg) {
2851         *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name));
2852
2853         SvREFCNT_dec(res);
2854         return NULL;
2855     }
2856
2857     /* See if the charnames handler is the Perl core's, and if so, we can skip
2858      * the validation needed for a user-supplied one, as Perl's does its own
2859      * validation. */
2860     cvp = hv_fetchs(table, "charnames", FALSE);
2861     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2862         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2863     {
2864         const char * const name = HvNAME(stash);
2865          if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2866            return res;
2867        }
2868     }
2869
2870     /* Here, it isn't Perl's charname handler.  We can't rely on a
2871      * user-supplied handler to validate the input name.  For non-ut8 input,
2872      * look to see that the first character is legal.  Then loop through the
2873      * rest checking that each is a continuation */
2874
2875     /* This code makes the reasonable assumption that the only Latin1-range
2876      * characters that begin a character name alias are alphabetic, otherwise
2877      * would have to create a isCHARNAME_BEGIN macro */
2878
2879     if (! is_utf8) {
2880         if (! isALPHAU(*s)) {
2881             goto bad_charname;
2882         }
2883         s++;
2884         while (s < e) {
2885             if (! isCHARNAME_CONT(*s)) {
2886                 goto bad_charname;
2887             }
2888             if (*s == ' ' && *(s-1) == ' ') {
2889                 goto multi_spaces;
2890             }
2891             s++;
2892         }
2893     }
2894     else {
2895         /* Similarly for utf8.  For invariants can check directly; for other
2896          * Latin1, can calculate their code point and check; otherwise  use an
2897          * inversion list */
2898         if (UTF8_IS_INVARIANT(*s)) {
2899             if (! isALPHAU(*s)) {
2900                 goto bad_charname;
2901             }
2902             s++;
2903         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2904             if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2905                 goto bad_charname;
2906             }
2907             s += 2;
2908         }
2909         else {
2910             if (! _invlist_contains_cp(PL_utf8_charname_begin,
2911                                        utf8_to_uvchr_buf((U8 *) s,
2912                                                          (U8 *) e,
2913                                                          NULL)))
2914             {
2915                 goto bad_charname;
2916             }
2917             s += UTF8SKIP(s);
2918         }
2919
2920         while (s < e) {
2921             if (UTF8_IS_INVARIANT(*s)) {
2922                 if (! isCHARNAME_CONT(*s)) {
2923                     goto bad_charname;
2924                 }
2925                 if (*s == ' ' && *(s-1) == ' ') {
2926                     goto multi_spaces;
2927                 }
2928                 s++;
2929             }
2930             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2931                 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2932                 {
2933                     goto bad_charname;
2934                 }
2935                 s += 2;
2936             }
2937             else {
2938                 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2939                                            utf8_to_uvchr_buf((U8 *) s,
2940                                                              (U8 *) e,
2941                                                              NULL)))
2942                 {
2943                     goto bad_charname;
2944                 }
2945                 s += UTF8SKIP(s);
2946             }
2947         }
2948     }
2949     if (*(s-1) == ' ') {
2950         /* diag_listed_as: charnames alias definitions may not contain
2951                            trailing white-space; marked by <-- HERE in %s
2952          */
2953         *error_msg = Perl_form(aTHX_
2954             "charnames alias definitions may not contain trailing "
2955             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2956             (int)(s - context + 1), context,
2957             (int)(e - s + 1), s + 1);
2958         return NULL;
2959     }
2960
2961     if (SvUTF8(res)) { /* Don't accept malformed charname value */
2962         const U8* first_bad_char_loc;
2963         STRLEN len;
2964         const char* const str = SvPV_const(res, len);
2965         if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2966                                           &first_bad_char_loc)))
2967         {
2968             _force_out_malformed_utf8_message(first_bad_char_loc,
2969                                               (U8 *) PL_parser->bufend,
2970                                               0,
2971                                               0 /* 0 means don't die */ );
2972             /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
2973                                immediately after '%s' */
2974             *error_msg = Perl_form(aTHX_
2975                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2976                  (int) context_len, context,
2977                  (int) ((char *) first_bad_char_loc - str), str);
2978             return NULL;
2979         }
2980     }
2981
2982     return res;
2983
2984   bad_charname: {
2985
2986         /* The final %.*s makes sure that should the trailing NUL be missing
2987          * that this print won't run off the end of the string */
2988         /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
2989                            in \N{%s} */
2990         *error_msg = Perl_form(aTHX_
2991             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2992             (int)(s - context + 1), context,
2993             (int)(e - s + 1), s + 1);
2994         return NULL;
2995     }
2996
2997   multi_spaces:
2998         /* diag_listed_as: charnames alias definitions may not contain a
2999                            sequence of multiple spaces; marked by <-- HERE
3000                            in %s */
3001         *error_msg = Perl_form(aTHX_
3002             "charnames alias definitions may not contain a sequence of "
3003             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
3004             (int)(s - context + 1), context,
3005             (int)(e - s + 1), s + 1);
3006         return NULL;
3007 }
3008
3009 /*
3010   scan_const
3011
3012   Extracts the next constant part of a pattern, double-quoted string,
3013   or transliteration.  This is terrifying code.
3014
3015   For example, in parsing the double-quoted string "ab\x63$d", it would
3016   stop at the '$' and return an OP_CONST containing 'abc'.
3017
3018   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3019   processing a pattern (PL_lex_inpat is true), a transliteration
3020   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3021
3022   Returns a pointer to the character scanned up to. If this is
3023   advanced from the start pointer supplied (i.e. if anything was
3024   successfully parsed), will leave an OP_CONST for the substring scanned
3025   in pl_yylval. Caller must intuit reason for not parsing further
3026   by looking at the next characters herself.
3027
3028   In patterns:
3029     expand:
3030       \N{FOO}  => \N{U+hex_for_character_FOO}
3031       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3032
3033     pass through:
3034         all other \-char, including \N and \N{ apart from \N{ABC}
3035
3036     stops on:
3037         @ and $ where it appears to be a var, but not for $ as tail anchor
3038         \l \L \u \U \Q \E
3039         (?{  or  (??{
3040
3041   In transliterations:
3042     characters are VERY literal, except for - not at the start or end
3043     of the string, which indicates a range.  However some backslash sequences
3044     are recognized: \r, \n, and the like
3045                     \007 \o{}, \x{}, \N{}
3046     If all elements in the transliteration are below 256,
3047     scan_const expands the range to the full set of intermediate
3048     characters. If the range is in utf8, the hyphen is replaced with
3049     a certain range mark which will be handled by pmtrans() in op.c.
3050
3051   In double-quoted strings:
3052     backslashes:
3053       all those recognized in transliterations
3054       deprecated backrefs: \1 (in substitution replacements)
3055       case and quoting: \U \Q \E
3056     stops on @ and $
3057
3058   scan_const does *not* construct ops to handle interpolated strings.
3059   It stops processing as soon as it finds an embedded $ or @ variable
3060   and leaves it to the caller to work out what's going on.
3061
3062   embedded arrays (whether in pattern or not) could be:
3063       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3064
3065   $ in double-quoted strings must be the symbol of an embedded scalar.
3066
3067   $ in pattern could be $foo or could be tail anchor.  Assumption:
3068   it's a tail anchor if $ is the last thing in the string, or if it's
3069   followed by one of "()| \r\n\t"
3070
3071   \1 (backreferences) are turned into $1 in substitutions
3072
3073   The structure of the code is
3074       while (there's a character to process) {
3075           handle transliteration ranges
3076           skip regexp comments /(?#comment)/ and codes /(?{code})/
3077           skip #-initiated comments in //x patterns
3078           check for embedded arrays
3079           check for embedded scalars
3080           if (backslash) {
3081               deprecate \1 in substitution replacements
3082               handle string-changing backslashes \l \U \Q \E, etc.
3083               switch (what was escaped) {
3084                   handle \- in a transliteration (becomes a literal -)
3085                   if a pattern and not \N{, go treat as regular character
3086                   handle \132 (octal characters)
3087                   handle \x15 and \x{1234} (hex characters)
3088                   handle \N{name} (named characters, also \N{3,5} in a pattern)
3089                   handle \cV (control characters)
3090                   handle printf-style backslashes (\f, \r, \n, etc)
3091               } (end switch)
3092               continue
3093           } (end if backslash)
3094           handle regular character
3095     } (end while character to read)
3096
3097 */
3098
3099 STATIC char *
3100 S_scan_const(pTHX_ char *start)
3101 {
3102     const char * const send = PL_bufend;/* end of the constant */
3103     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
3104                                            on sizing. */
3105     char *s = start;                    /* start of the constant */
3106     char *d = SvPVX(sv);                /* destination for copies */
3107     bool dorange = FALSE;               /* are we in a translit range? */
3108     bool didrange = FALSE;              /* did we just finish a range? */
3109     bool in_charclass = FALSE;          /* within /[...]/ */
3110     const bool s_is_utf8 = cBOOL(UTF);  /* Is the source string assumed to be
3111                                            UTF8?  But, this can show as true
3112                                            when the source isn't utf8, as for
3113                                            example when it is entirely composed
3114                                            of hex constants */
3115     bool d_is_utf8 = FALSE;             /* Output constant is UTF8 */
3116     STRLEN utf8_variant_count = 0;      /* When not in UTF-8, this counts the
3117                                            number of characters found so far
3118                                            that will expand (into 2 bytes)
3119                                            should we have to convert to
3120                                            UTF-8) */
3121     SV *res;                            /* result from charnames */
3122     STRLEN offset_to_max = 0;   /* The offset in the output to where the range
3123                                    high-end character is temporarily placed */
3124
3125     /* Does something require special handling in tr/// ?  This avoids extra
3126      * work in a less likely case.  As such, khw didn't feel it was worth
3127      * adding any branches to the more mainline code to handle this, which
3128      * means that this doesn't get set in some circumstances when things like
3129      * \x{100} get expanded out.  As a result there needs to be extra testing
3130      * done in the tr code */
3131     bool has_above_latin1 = FALSE;
3132
3133     /* Note on sizing:  The scanned constant is placed into sv, which is
3134      * initialized by newSV() assuming one byte of output for every byte of
3135      * input.  This routine expects newSV() to allocate an extra byte for a
3136      * trailing NUL, which this routine will append if it gets to the end of
3137      * the input.  There may be more bytes of input than output (eg., \N{LATIN
3138      * CAPITAL LETTER A}), or more output than input if the constant ends up
3139      * recoded to utf8, but each time a construct is found that might increase
3140      * the needed size, SvGROW() is called.  Its size parameter each time is
3141      * based on the best guess estimate at the time, namely the length used so
3142      * far, plus the length the current construct will occupy, plus room for
3143      * the trailing NUL, plus one byte for every input byte still unscanned */
3144
3145     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3146                        before set */
3147 #ifdef EBCDIC
3148     int backslash_N = 0;            /* ? was the character from \N{} */
3149     int non_portable_endpoint = 0;  /* ? In a range is an endpoint
3150                                        platform-specific like \x65 */
3151 #endif
3152
3153     PERL_ARGS_ASSERT_SCAN_CONST;
3154
3155     assert(PL_lex_inwhat != OP_TRANSR);
3156
3157     /* Protect sv from errors and fatal warnings. */
3158     ENTER_with_name("scan_const");
3159     SAVEFREESV(sv);
3160
3161     /* A bunch of code in the loop below assumes that if s[n] exists and is not
3162      * NUL, then s[n+1] exists.  This assertion makes sure that assumption is
3163      * valid */
3164     assert(*send == '\0');
3165
3166     while (s < send
3167            || dorange   /* Handle tr/// range at right edge of input */
3168     ) {
3169
3170         /* get transliterations out of the way (they're most literal) */
3171         if (PL_lex_inwhat == OP_TRANS) {
3172
3173             /* But there isn't any special handling necessary unless there is a
3174              * range, so for most cases we just drop down and handle the value
3175              * as any other.  There are two exceptions.
3176              *
3177              * 1.  A hyphen indicates that we are actually going to have a
3178              *     range.  In this case, skip the '-', set a flag, then drop
3179              *     down to handle what should be the end range value.
3180              * 2.  After we've handled that value, the next time through, that
3181              *     flag is set and we fix up the range.
3182              *
3183              * Ranges entirely within Latin1 are expanded out entirely, in
3184              * order to make the transliteration a simple table look-up.
3185              * Ranges that extend above Latin1 have to be done differently, so
3186              * there is no advantage to expanding them here, so they are
3187              * stored here as Min, RANGE_INDICATOR, Max.  'RANGE_INDICATOR' is
3188              * a byte that can't occur in legal UTF-8, and hence can signify a
3189              * hyphen without any possible ambiguity.  On EBCDIC machines, if
3190              * the range is expressed as Unicode, the Latin1 portion is
3191              * expanded out even if the range extends above Latin1.  This is
3192              * because each code point in it has to be processed here
3193              * individually to get its native translation */
3194
3195             if (! dorange) {
3196
3197                 /* Here, we don't think we're in a range.  If the new character
3198                  * is not a hyphen; or if it is a hyphen, but it's too close to
3199                  * either edge to indicate a range, or if we haven't output any
3200                  * characters yet then it's a regular character. */
3201                 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv))
3202                 {
3203
3204                     /* A regular character.  Process like any other, but first
3205                      * clear any flags */
3206                     didrange = FALSE;
3207                     dorange = FALSE;
3208 #ifdef EBCDIC
3209                     non_portable_endpoint = 0;
3210                     backslash_N = 0;
3211 #endif
3212                     /* The tests here for being above Latin1 and similar ones
3213                      * in the following 'else' suffice to find all such
3214                      * occurences in the constant, except those added by a
3215                      * backslash escape sequence, like \x{100}.  Mostly, those
3216                      * set 'has_above_latin1' as appropriate */
3217                     if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3218                         has_above_latin1 = TRUE;
3219                     }
3220
3221                     /* Drops down to generic code to process current byte */
3222                 }
3223                 else {  /* Is a '-' in the context where it means a range */
3224                     if (didrange) { /* Something like y/A-C-Z// */
3225                         Perl_croak(aTHX_ "Ambiguous range in transliteration"
3226                                          " operator");
3227                     }
3228
3229                     dorange = TRUE;
3230
3231                     s++;    /* Skip past the hyphen */
3232
3233                     /* d now points to where the end-range character will be
3234                      * placed.  Drop down to get that character.  We'll finish
3235                      * processing the range the next time through the loop */
3236
3237                     if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3238                         has_above_latin1 = TRUE;
3239                     }
3240
3241                     /* Drops down to generic code to process current byte */
3242                 }
3243             }  /* End of not a range */
3244             else {
3245                 /* Here we have parsed a range.  Now must handle it.  At this
3246                  * point:
3247                  * 'sv' is a SV* that contains the output string we are
3248                  *      constructing.  The final two characters in that string
3249                  *      are the range start and range end, in order.
3250                  * 'd'  points to just beyond the range end in the 'sv' string,
3251                  *      where we would next place something
3252                  */
3253                 char * max_ptr;
3254                 char * min_ptr;
3255                 IV range_min;
3256                 IV range_max;   /* last character in range */
3257                 STRLEN grow;
3258                 Size_t offset_to_min = 0;
3259                 Size_t extras = 0;
3260 #ifdef EBCDIC
3261                 bool convert_unicode;
3262                 IV real_range_max = 0;
3263 #endif
3264                 /* Get the code point values of the range ends. */
3265                 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1;
3266                 offset_to_max = max_ptr - SvPVX_const(sv);
3267                 if (d_is_utf8) {
3268                     /* We know the utf8 is valid, because we just constructed
3269                      * it ourselves in previous loop iterations */
3270                     min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3271                     range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3272                     range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3273
3274                     /* This compensates for not all code setting
3275                      * 'has_above_latin1', so that we don't skip stuff that
3276                      * should be executed */
3277                     if (range_max > 255) {
3278                         has_above_latin1 = TRUE;
3279                     }
3280                 }
3281                 else {
3282                     min_ptr = max_ptr - 1;
3283                     range_min = * (U8*) min_ptr;
3284                     range_max = * (U8*) max_ptr;
3285                 }
3286
3287                 /* If the range is just a single code point, like tr/a-a/.../,
3288                  * that code point is already in the output, twice.  We can
3289                  * just back up over the second instance and avoid all the rest
3290                  * of the work.  But if it is a variant character, it's been
3291                  * counted twice, so decrement.  (This unlikely scenario is
3292                  * special cased, like the one for a range of 2 code points
3293                  * below, only because the main-line code below needs a range
3294                  * of 3 or more to work without special casing.  Might as well
3295                  * get it out of the way now.) */
3296                 if (UNLIKELY(range_max == range_min)) {
3297                     d = max_ptr;
3298                     if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3299                         utf8_variant_count--;
3300                     }
3301                     goto range_done;
3302                 }
3303
3304 #ifdef EBCDIC
3305                 /* On EBCDIC platforms, we may have to deal with portable
3306                  * ranges.  These happen if at least one range endpoint is a
3307                  * Unicode value (\N{...}), or if the range is a subset of
3308                  * [A-Z] or [a-z], and both ends are literal characters,
3309                  * like 'A', and not like \x{C1} */
3310                 convert_unicode =
3311                                cBOOL(backslash_N)   /* \N{} forces Unicode,
3312                                                        hence portable range */
3313                     || (     ! non_portable_endpoint
3314                         && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
3315                            || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3316                 if (convert_unicode) {
3317
3318                     /* Special handling is needed for these portable ranges.
3319                      * They are defined to be in Unicode terms, which includes
3320                      * all the Unicode code points between the end points.
3321                      * Convert to Unicode to get the Unicode range.  Later we
3322                      * will convert each code point in the range back to
3323                      * native.  */
3324                     range_min = NATIVE_TO_UNI(range_min);
3325                     range_max = NATIVE_TO_UNI(range_max);
3326                 }
3327 #endif
3328
3329                 if (range_min > range_max) {
3330 #ifdef EBCDIC
3331                     if (convert_unicode) {
3332                         /* Need to convert back to native for meaningful
3333                          * messages for this platform */
3334                         range_min = UNI_TO_NATIVE(range_min);
3335                         range_max = UNI_TO_NATIVE(range_max);
3336                     }
3337 #endif
3338                     /* Use the characters themselves for the error message if
3339                      * ASCII printables; otherwise some visible representation
3340                      * of them */
3341                     if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3342                         Perl_croak(aTHX_
3343                          "Invalid range \"%c-%c\" in transliteration operator",
3344                          (char)range_min, (char)range_max);
3345                     }
3346 #ifdef EBCDIC
3347                     else if (convert_unicode) {
3348         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3349                         Perl_croak(aTHX_
3350                            "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3351                            UVXf "}\" in transliteration operator",
3352                            range_min, range_max);
3353                     }
3354 #endif
3355                     else {
3356         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3357                         Perl_croak(aTHX_
3358                            "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3359                            " in transliteration operator",
3360                            range_min, range_max);
3361                     }
3362                 }
3363
3364                 /* If the range is exactly two code points long, they are
3365                  * already both in the output */
3366                 if (UNLIKELY(range_min + 1 == range_max)) {
3367                     goto range_done;
3368                 }
3369
3370                 /* Here the range contains at least 3 code points */
3371
3372                 if (d_is_utf8) {
3373
3374                     /* If everything in the transliteration is below 256, we
3375                      * can avoid special handling later.  A translation table
3376                      * for each of those bytes is created by op.c.  So we
3377                      * expand out all ranges to their constituent code points.
3378                      * But if we've encountered something above 255, the
3379                      * expanding won't help, so skip doing that.  But if it's
3380                      * EBCDIC, we may have to look at each character below 256
3381                      * if we have to convert to/from Unicode values */
3382                     if (   has_above_latin1
3383 #ifdef EBCDIC
3384                         && (range_min > 255 || ! convert_unicode)
3385 #endif
3386                     ) {
3387                         const STRLEN off = d - SvPVX(sv);
3388                         const STRLEN extra = 1 + (send - s) + 1;
3389                         char *e;
3390
3391                         /* Move the high character one byte to the right; then
3392                          * insert between it and the range begin, an illegal
3393                          * byte which serves to indicate this is a range (using
3394                          * a '-' would be ambiguous). */
3395
3396                         if (off + extra > SvLEN(sv)) {
3397                             d = off + SvGROW(sv, off + extra);
3398                             max_ptr = d - off + offset_to_max;
3399                         }
3400
3401                         e = d++;
3402                         while (e-- > max_ptr) {
3403                             *(e + 1) = *e;
3404                         }
3405                         *(e + 1) = (char) RANGE_INDICATOR;
3406                         goto range_done;
3407                     }
3408
3409                     /* Here, we're going to expand out the range.  For EBCDIC
3410                      * the range can extend above 255 (not so in ASCII), so
3411                      * for EBCDIC, split it into the parts above and below
3412                      * 255/256 */
3413 #ifdef EBCDIC
3414                     if (range_max > 255) {
3415                         real_range_max = range_max;
3416                         range_max = 255;
3417                     }
3418 #endif
3419                 }
3420
3421                 /* Here we need to expand out the string to contain each
3422                  * character in the range.  Grow the output to handle this.
3423                  * For non-UTF8, we need a byte for each code point in the
3424                  * range, minus the three that we've already allocated for: the
3425                  * hyphen, the min, and the max.  For UTF-8, we need this
3426                  * plus an extra byte for each code point that occupies two
3427                  * bytes (is variant) when in UTF-8 (except we've already
3428                  * allocated for the end points, including if they are
3429                  * variants).  For ASCII platforms and Unicode ranges on EBCDIC
3430                  * platforms, it's easy to calculate a precise number.  To
3431                  * start, we count the variants in the range, which we need
3432                  * elsewhere in this function anyway.  (For the case where it
3433                  * isn't easy to calculate, 'extras' has been initialized to 0,
3434                  * and the calculation is done in a loop further down.) */
3435 #ifdef EBCDIC
3436                 if (convert_unicode)
3437 #endif
3438                 {
3439                     /* This is executed unconditionally on ASCII, and for
3440                      * Unicode ranges on EBCDIC.  Under these conditions, all
3441                      * code points above a certain value are variant; and none
3442                      * under that value are.  We just need to find out how much
3443                      * of the range is above that value.  We don't count the
3444                      * end points here, as they will already have been counted
3445                      * as they were parsed. */
3446                     if (range_min >= UTF_CONTINUATION_MARK) {
3447
3448                         /* The whole range is made up of variants */
3449                         extras = (range_max - 1) - (range_min + 1) + 1;
3450                     }
3451                     else if (range_max >= UTF_CONTINUATION_MARK) {
3452
3453                         /* Only the higher portion of the range is variants */
3454                         extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3455                     }
3456
3457                     utf8_variant_count += extras;
3458                 }
3459
3460                 /* The base growth is the number of code points in the range,
3461                  * not including the endpoints, which have already been sized
3462                  * for (and output).  We don't subtract for the hyphen, as it
3463                  * has been parsed but not output, and the SvGROW below is
3464                  * based only on what's been output plus what's left to parse.
3465                  * */
3466                 grow = (range_max - 1) - (range_min + 1) + 1;
3467
3468                 if (d_is_utf8) {
3469 #ifdef EBCDIC
3470                     /* In some cases in EBCDIC, we haven't yet calculated a
3471                      * precise amount needed for the UTF-8 variants.  Just
3472                      * assume the worst case, that everything will expand by a
3473                      * byte */
3474                     if (! convert_unicode) {
3475                         grow *= 2;
3476                     }
3477                     else
3478 #endif
3479                     {
3480                         /* Otherwise we know exactly how many variants there
3481                          * are in the range. */
3482                         grow += extras;
3483                     }
3484                 }
3485
3486                 /* Grow, but position the output to overwrite the range min end
3487                  * point, because in some cases we overwrite that */
3488                 SvCUR_set(sv, d - SvPVX_const(sv));
3489                 offset_to_min = min_ptr - SvPVX_const(sv);
3490
3491                 /* See Note on sizing above. */
3492                 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3493                                              + (send - s)
3494                                              + grow
3495                                              + 1 /* Trailing NUL */ );
3496
3497                 /* Now, we can expand out the range. */
3498 #ifdef EBCDIC
3499                 if (convert_unicode) {
3500                     SSize_t i;
3501
3502                     /* Recall that the min and max are now in Unicode terms, so
3503                      * we have to convert each character to its native
3504                      * equivalent */
3505                     if (d_is_utf8) {
3506                         for (i = range_min; i <= range_max; i++) {
3507                             append_utf8_from_native_byte(
3508                                                     LATIN1_TO_NATIVE((U8) i),
3509                                                     (U8 **) &d);
3510                         }
3511                     }
3512                     else {
3513                         for (i = range_min; i <= range_max; i++) {
3514                             *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3515                         }
3516                     }
3517                 }
3518                 else
3519 #endif
3520                 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3521                 {
3522                     /* Here, no conversions are necessary, which means that the
3523                      * first character in the range is already in 'd' and
3524                      * valid, so we can skip overwriting it */
3525                     if (d_is_utf8) {
3526                         SSize_t i;
3527                         d += UTF8SKIP(d);
3528                         for (i = range_min + 1; i <= range_max; i++) {
3529                             append_utf8_from_native_byte((U8) i, (U8 **) &d);
3530                         }
3531                     }
3532                     else {
3533                         SSize_t i;
3534                         d++;
3535                         assert(range_min + 1 <= range_max);
3536                         for (i = range_min + 1; i < range_max; i++) {
3537 #ifdef EBCDIC
3538                             /* In this case on EBCDIC, we haven't calculated
3539                              * the variants.  Do it here, as we go along */
3540                             if (! UVCHR_IS_INVARIANT(i)) {
3541                                 utf8_variant_count++;
3542                             }
3543 #endif
3544                             *d++ = (char)i;
3545                         }
3546
3547                         /* The range_max is done outside the loop so as to
3548                          * avoid having to special case not incrementing
3549                          * 'utf8_variant_count' on EBCDIC (it's already been
3550                          * counted when originally parsed) */
3551                         *d++ = (char) range_max;
3552                     }
3553                 }
3554
3555 #ifdef EBCDIC
3556                 /* If the original range extended above 255, add in that
3557                  * portion. */
3558                 if (real_range_max) {
3559                     *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3560                     *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3561                     if (real_range_max > 0x100) {
3562                         if (real_range_max > 0x101) {
3563                             *d++ = (char) RANGE_INDICATOR;
3564                         }
3565                         d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3566                     }
3567                 }
3568 #endif
3569
3570               range_done:
3571                 /* mark the range as done, and continue */
3572                 didrange = TRUE;
3573                 dorange = FALSE;
3574 #ifdef EBCDIC
3575                 non_portable_endpoint = 0;
3576                 backslash_N = 0;
3577 #endif
3578                 continue;
3579             } /* End of is a range */
3580         } /* End of transliteration.  Joins main code after these else's */
3581         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3582             char *s1 = s-1;
3583             int esc = 0;
3584             while (s1 >= start && *s1-- == '\\')
3585                 esc = !esc;
3586             if (!esc)
3587                 in_charclass = TRUE;
3588         }
3589         else if (*s == ']' && PL_lex_inpat && in_charclass) {
3590             char *s1 = s-1;
3591             int esc = 0;
3592             while (s1 >= start && *s1-- == '\\')
3593                 esc = !esc;
3594             if (!esc)
3595                 in_charclass = FALSE;
3596         }
3597             /* skip for regexp comments /(?#comment)/, except for the last
3598              * char, which will be done separately.  Stop on (?{..}) and
3599              * friends */
3600         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3601             if (s[2] == '#') {
3602                 if (s_is_utf8) {
3603                     PERL_UINT_FAST8_T  len = UTF8SKIP(s);
3604
3605                     while (s + len < send && *s != ')') {
3606                         Copy(s, d, len, U8);
3607                         d += len;
3608                         s += len;
3609                         len = UTF8_SAFE_SKIP(s, send);
3610                     }
3611                 }
3612                 else while (s+1 < send && *s != ')') {
3613                     *d++ = *s++;
3614                 }
3615             }
3616             else if (!PL_lex_casemods
3617                      && (    s[2] == '{' /* This should match regcomp.c */
3618                          || (s[2] == '?' && s[3] == '{')))
3619             {
3620                 break;
3621             }
3622         }
3623             /* likewise skip #-initiated comments in //x patterns */
3624         else if (*s == '#'
3625                  && PL_lex_inpat
3626                  && !in_charclass
3627                  && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3628         {
3629             while (s < send && *s != '\n')
3630                 *d++ = *s++;
3631         }
3632             /* no further processing of single-quoted regex */
3633         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3634             goto default_action;
3635
3636             /* check for embedded arrays
3637              * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3638              */
3639         else if (*s == '@' && s[1]) {
3640             if (UTF
3641                ? isIDFIRST_utf8_safe(s+1, send)
3642                : isWORDCHAR_A(s[1]))
3643             {
3644                 break;
3645             }
3646             if (memCHRs(":'{$", s[1]))
3647                 break;
3648             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3649                 break; /* in regexp, neither @+ nor @- are interpolated */
3650         }
3651             /* check for embedded scalars.  only stop if we're sure it's a
3652              * variable.  */
3653         else if (*s == '$') {
3654             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3655                 break;
3656             if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
3657                 if (s[1] == '\\') {
3658                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3659                                    "Possible unintended interpolation of $\\ in regex");
3660                 }
3661                 break;          /* in regexp, $ might be tail anchor */
3662             }
3663         }
3664
3665         /* End of else if chain - OP_TRANS rejoin rest */
3666
3667         if (UNLIKELY(s >= send)) {
3668             assert(s == send);
3669             break;
3670         }
3671
3672         /* backslashes */
3673         if (*s == '\\' && s+1 < send) {
3674             char* bslash = s;   /* point to beginning \ */
3675             char* rbrace;       /* point to ending '}' */
3676             char* e;            /* 1 past the meat (non-blanks) before the
3677                                    brace */
3678             s++;
3679
3680             /* warn on \1 - \9 in substitution replacements, but note that \11
3681              * is an octal; and \19 is \1 followed by '9' */
3682             if (PL_lex_inwhat == OP_SUBST
3683                 && !PL_lex_inpat
3684                 && isDIGIT(*s)
3685                 && *s != '0'
3686                 && !isDIGIT(s[1]))
3687             {
3688                 /* diag_listed_as: \%d better written as $%d */
3689                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3690                 s = bslash;
3691                 *s = '$';
3692                 break;
3693             }
3694
3695             /* string-change backslash escapes */
3696             if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
3697                 s = bslash;
3698                 break;
3699             }
3700             /* In a pattern, process \N, but skip any other backslash escapes.
3701              * This is because we don't want to translate an escape sequence
3702              * into a meta symbol and have the regex compiler use the meta
3703              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3704              * in spite of this, we do have to process \N here while the proper
3705              * charnames handler is in scope.  See bugs #56444 and #62056.
3706              *
3707              * There is a complication because \N in a pattern may also stand
3708              * for 'match a non-nl', and not mean a charname, in which case its
3709              * processing should be deferred to the regex compiler.  To be a
3710              * charname it must be followed immediately by a '{', and not look
3711              * like \N followed by a curly quantifier, i.e., not something like
3712              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3713              * quantifier */
3714             else if (PL_lex_inpat
3715                     && (*s != 'N'
3716                         || s[1] != '{'
3717                         || regcurly(s + 1, send, NULL)))
3718             {
3719                 *d++ = '\\';
3720                 goto default_action;
3721             }
3722
3723             switch (*s) {
3724             default:
3725                 {
3726                     if ((isALPHANUMERIC(*s)))
3727                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3728                                        "Unrecognized escape \\%c passed through",
3729                                        *s);
3730                     /* default action is to copy the quoted character */
3731                     goto default_action;
3732                 }
3733
3734             /* eg. \132 indicates the octal constant 0132 */
3735             case '0': case '1': case '2': case '3':
3736             case '4': case '5': case '6': case '7':
3737                 {
3738                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT
3739                               | PERL_SCAN_NOTIFY_ILLDIGIT;
3740                     STRLEN len = 3;
3741                     uv = grok_oct(s, &len, &flags, NULL);
3742                     s += len;
3743                     if (  (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
3744                         && s < send
3745                         && isDIGIT(*s)  /* like \08, \178 */
3746                         && ckWARN(WARN_MISC))
3747                     {
3748                         Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
3749                             form_alien_digit_msg(8, len, s, send, UTF, FALSE));
3750                     }
3751                 }
3752                 goto NUM_ESCAPE_INSERT;
3753
3754             /* eg. \o{24} indicates the octal constant \024 */
3755             case 'o':
3756                 {
3757                     const char* error;
3758
3759                     if (! grok_bslash_o(&s, send,
3760                                                &uv, &error,
3761                                                NULL,
3762                                                FALSE, /* Not strict */
3763                                                FALSE, /* No illegal cp's */
3764                                                UTF))
3765                     {
3766                         yyerror(error);
3767                         uv = 0; /* drop through to ensure range ends are set */
3768                     }
3769                     goto NUM_ESCAPE_INSERT;
3770                 }
3771
3772             /* eg. \x24 indicates the hex constant 0x24 */
3773             case 'x':
3774                 {
3775                     const char* error;
3776
3777                     if (! grok_bslash_x(&s, send,
3778                                                &uv, &error,
3779                                                NULL,
3780                                                FALSE, /* Not strict */
3781                                                FALSE, /* No illegal cp's */
3782                                                UTF))
3783                     {
3784                         yyerror(error);
3785                         uv = 0; /* drop through to ensure range ends are set */
3786                     }
3787                 }
3788
3789               NUM_ESCAPE_INSERT:
3790                 /* Insert oct or hex escaped character. */
3791
3792                 /* Here uv is the ordinal of the next character being added */
3793                 if (UVCHR_IS_INVARIANT(uv)) {
3794                     *d++ = (char) uv;
3795                 }
3796                 else {
3797                     if (!d_is_utf8 && uv > 255) {
3798
3799                         /* Here, 'uv' won't fit unless we convert to UTF-8.
3800                          * If we've only seen invariants so far, all we have to
3801                          * do is turn on the flag */
3802                         if (utf8_variant_count == 0) {
3803                             SvUTF8_on(sv);
3804                         }
3805                         else {
3806                             SvCUR_set(sv, d - SvPVX_const(sv));
3807                             SvPOK_on(sv);
3808                             *d = '\0';
3809
3810                             sv_utf8_upgrade_flags_grow(
3811                                            sv,
3812                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3813
3814                                            /* Since we're having to grow here,
3815                                             * make sure we have enough room for
3816                                             * this escape and a NUL, so the
3817                                             * code immediately below won't have
3818                                             * to actually grow again */
3819                                           UVCHR_SKIP(uv)
3820                                         + (STRLEN)(send - s) + 1);
3821                             d = SvPVX(sv) + SvCUR(sv);
3822                         }
3823
3824                         has_above_latin1 = TRUE;
3825                         d_is_utf8 = TRUE;
3826                     }
3827
3828                     if (! d_is_utf8) {
3829                         *d++ = (char)uv;
3830                         utf8_variant_count++;
3831                     }
3832                     else {
3833                        /* Usually, there will already be enough room in 'sv'
3834                         * since such escapes are likely longer than any UTF-8
3835                         * sequence they can end up as.  This isn't the case on
3836                         * EBCDIC where \x{40000000} contains 12 bytes, and the
3837                         * UTF-8 for it contains 14.  And, we have to allow for
3838                         * a trailing NUL.  It probably can't happen on ASCII
3839                         * platforms, but be safe.  See Note on sizing above. */
3840                         const STRLEN needed = d - SvPVX(sv)
3841                                             + UVCHR_SKIP(uv)
3842                                             + (send - s)
3843                                             + 1;
3844                         if (UNLIKELY(needed > SvLEN(sv))) {
3845                             SvCUR_set(sv, d - SvPVX_const(sv));
3846                             d = SvCUR(sv) + SvGROW(sv, needed);
3847                         }
3848
3849                         d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
3850                                                    (ckWARN(WARN_PORTABLE))
3851                                                    ? UNICODE_WARN_PERL_EXTENDED
3852                                                    : 0);
3853                     }
3854                 }
3855 #ifdef EBCDIC
3856                 non_portable_endpoint++;
3857 #endif
3858                 continue;
3859
3860             case 'N':
3861                 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3862                  * named character, like \N{LATIN SMALL LETTER A}, or a named
3863                  * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3864                  * GRAVE} (except y/// can't handle the latter, croaking).  For
3865                  * convenience all three forms are referred to as "named
3866                  * characters" below.
3867                  *
3868                  * For patterns, \N also can mean to match a non-newline.  Code
3869                  * before this 'switch' statement should already have handled
3870                  * this situation, and hence this code only has to deal with
3871                  * the named character cases.
3872                  *
3873                  * For non-patterns, the named characters are converted to
3874                  * their string equivalents.  In patterns, named characters are
3875                  * not converted to their ultimate forms for the same reasons
3876                  * that other escapes aren't (mainly that the ultimate
3877                  * character could be considered a meta-symbol by the regex
3878                  * compiler).  Instead, they are converted to the \N{U+...}
3879                  * form to get the value from the charnames that is in effect
3880                  * right now, while preserving the fact that it was a named
3881                  * character, so that the regex compiler knows this.
3882                  *
3883                  * The structure of this section of code (besides checking for
3884                  * errors and upgrading to utf8) is:
3885                  *    If the named character is of the form \N{U+...}, pass it
3886                  *      through if a pattern; otherwise convert the code point
3887                  *      to utf8
3888                  *    Otherwise must be some \N{NAME}: convert to
3889                  *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3890                  *
3891                  * Transliteration is an exception.  The conversion to utf8 is
3892                  * only done if the code point requires it to be representable.
3893                  *
3894                  * Here, 's' points to the 'N'; the test below is guaranteed to
3895                  * succeed if we are being called on a pattern, as we already
3896                  * know from a test above that the next character is a '{'.  A
3897                  * non-pattern \N must mean 'named character', which requires
3898                  * braces */
3899                 s++;
3900                 if (*s != '{') {
3901                     yyerror("Missing braces on \\N{}");
3902                     *d++ = '\0';
3903                     continue;
3904                 }
3905                 s++;
3906
3907                 /* If there is no matching '}', it is an error. */
3908                 if (! (rbrace = (char *) memchr(s, '}', send - s))) {
3909                     if (! PL_lex_inpat) {
3910                         yyerror("Missing right brace on \\N{}");
3911                     } else {
3912                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3913                     }
3914                     yyquit(); /* Have exhausted the input. */
3915                 }
3916
3917                 /* Here it looks like a named character */
3918                 while (s < rbrace && isBLANK(*s)) {
3919                     s++;
3920                 }
3921
3922                 e = rbrace;
3923                 while (s < e && isBLANK(*(e - 1))) {
3924                     e--;
3925                 }
3926
3927                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3928                     s += 2;         /* Skip to next char after the 'U+' */
3929                     if (PL_lex_inpat) {
3930
3931                         /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3932                         /* Check the syntax.  */
3933                         if (!isXDIGIT(*s)) {
3934                           bad_NU:
3935                             yyerror(
3936                                 "Invalid hexadecimal number in \\N{U+...}"
3937                             );
3938                             s = rbrace + 1;
3939                             *d++ = '\0';
3940                             continue;
3941                         }
3942                         while (++s < e) {
3943                             if (isXDIGIT(*s))
3944                                 continue;
3945                             else if ((*s == '.' || *s == '_')
3946                                   && isXDIGIT(s[1]))
3947                                 continue;
3948                             goto bad_NU;
3949                         }
3950
3951                         /* Pass everything through unchanged.
3952                          * +1 is to include the '}' */
3953                         Copy(bslash, d, rbrace - bslash + 1, char);
3954                         d += rbrace - bslash + 1;
3955                     }
3956                     else {  /* Not a pattern: convert the hex to string */
3957                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3958                                   | PERL_SCAN_SILENT_ILLDIGIT
3959                                   | PERL_SCAN_SILENT_OVERFLOW
3960                                   | PERL_SCAN_DISALLOW_PREFIX;
3961                         STRLEN len = e - s;
3962
3963                         uv = grok_hex(s, &len, &flags, NULL);
3964                         if (len == 0 || (len != (STRLEN)(e - s)))
3965                             goto bad_NU;
3966
3967                         if (    uv > MAX_LEGAL_CP
3968                             || (flags & PERL_SCAN_GREATER_THAN_UV_MAX))
3969                         {
3970                             yyerror(form_cp_too_large_msg(16, s, len, 0));
3971                             uv = 0; /* drop through to ensure range ends are
3972                                        set */
3973                         }
3974
3975                          /* For non-tr///, if the destination is not in utf8,
3976                           * unconditionally recode it to be so.  This is
3977                           * because \N{} implies Unicode semantics, and scalars
3978                           * have to be in utf8 to guarantee those semantics.
3979                           * tr/// doesn't care about Unicode rules, so no need
3980                           * there to upgrade to UTF-8 for small enough code
3981                           * points */
3982                         if (! d_is_utf8 && (   uv > 0xFF
3983                                            || PL_lex_inwhat != OP_TRANS))
3984                         {
3985                             /* See Note on sizing above.  */
3986                             const STRLEN extra = OFFUNISKIP(uv) + (send - rbrace) + 1;
3987
3988                             SvCUR_set(sv, d - SvPVX_const(sv));
3989                             SvPOK_on(sv);
3990                             *d = '\0';
3991
3992                             if (utf8_variant_count == 0) {
3993                                 SvUTF8_on(sv);
3994                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3995                             }
3996                             else {
3997                                 sv_utf8_upgrade_flags_grow(
3998                                                sv,
3999                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4000                                                extra);
4001                                 d = SvPVX(sv) + SvCUR(sv);
4002                             }
4003
4004                             d_is_utf8 = TRUE;
4005                             has_above_latin1 = TRUE;
4006                         }
4007
4008                         /* Add the (Unicode) code point to the output. */
4009                         if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
4010                             *d++ = (char) LATIN1_TO_NATIVE(uv);
4011                         }
4012                         else {
4013                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
4014                                                    (ckWARN(WARN_PORTABLE))
4015                                                    ? UNICODE_WARN_PERL_EXTENDED
4016                                                    : 0);
4017                         }
4018                     }
4019                 }
4020                 else     /* Here is \N{NAME} but not \N{U+...}. */
4021                      if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
4022                 {   /* Failed.  We should die eventually, but for now use a NUL
4023                        to keep parsing */
4024                     *d++ = '\0';
4025                 }
4026                 else {  /* Successfully evaluated the name */
4027                     STRLEN len;
4028                     const char *str = SvPV_const(res, len);
4029                     if (PL_lex_inpat) {
4030
4031                         if (! len) { /* The name resolved to an empty string */
4032                             const char empty_N[] = "\\N{_}";
4033                             Copy(empty_N, d, sizeof(empty_N) - 1, char);
4034                             d += sizeof(empty_N) - 1;
4035                         }
4036                         else {
4037                             /* In order to not lose information for the regex
4038                             * compiler, pass the result in the specially made
4039                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
4040                             * the code points in hex of each character
4041                             * returned by charnames */
4042
4043                             const char *str_end = str + len;
4044                             const STRLEN off = d - SvPVX_const(sv);
4045
4046                             if (! SvUTF8(res)) {
4047                                 /* For the non-UTF-8 case, we can determine the
4048                                  * exact length needed without having to parse
4049                                  * through the string.  Each character takes up
4050                                  * 2 hex digits plus either a trailing dot or
4051                                  * the "}" */
4052                                 const char initial_text[] = "\\N{U+";
4053                                 const STRLEN initial_len = sizeof(initial_text)
4054                                                            - 1;
4055                                 d = off + SvGROW(sv, off
4056                                                     + 3 * len
4057
4058                                                     /* +1 for trailing NUL */
4059                                                     + initial_len + 1
4060
4061                                                     + (STRLEN)(send - rbrace));
4062                                 Copy(initial_text, d, initial_len, char);
4063                                 d += initial_len;
4064                                 while (str < str_end) {
4065                                     char hex_string[4];
4066                                     int len =
4067                                         my_snprintf(hex_string,
4068                                                   sizeof(hex_string),
4069                                                   "%02X.",
4070
4071                                                   /* The regex compiler is
4072                                                    * expecting Unicode, not
4073                                                    * native */
4074                                                   NATIVE_TO_LATIN1(*str));
4075                                     PERL_MY_SNPRINTF_POST_GUARD(len,
4076                                                            sizeof(hex_string));
4077                                     Copy(hex_string, d, 3, char);
4078                                     d += 3;
4079                                     str++;
4080                                 }
4081                                 d--;    /* Below, we will overwrite the final
4082                                            dot with a right brace */
4083                             }
4084                             else {
4085                                 STRLEN char_length; /* cur char's byte length */
4086
4087                                 /* and the number of bytes after this is
4088                                  * translated into hex digits */
4089                                 STRLEN output_length;
4090
4091                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
4092                                  * for max('U+', '.'); and 1 for NUL */
4093                                 char hex_string[2 * UTF8_MAXBYTES + 5];
4094
4095                                 /* Get the first character of the result. */
4096                                 U32 uv = utf8n_to_uvchr((U8 *) str,
4097                                                         len,
4098                                                         &char_length,
4099                                                         UTF8_ALLOW_ANYUV);
4100                                 /* Convert first code point to Unicode hex,
4101                                  * including the boiler plate before it. */
4102                                 output_length =
4103                                     my_snprintf(hex_string, sizeof(hex_string),
4104                                              "\\N{U+%X",
4105                                              (unsigned int) NATIVE_TO_UNI(uv));
4106
4107                                 /* Make sure there is enough space to hold it */
4108                                 d = off + SvGROW(sv, off
4109                                                     + output_length
4110                                                     + (STRLEN)(send - rbrace)
4111                                                     + 2);       /* '}' + NUL */
4112                                 /* And output it */
4113                                 Copy(hex_string, d, output_length, char);
4114                                 d += output_length;
4115
4116                                 /* For each subsequent character, append dot and
4117                                 * its Unicode code point in hex */
4118                                 while ((str += char_length) < str_end) {
4119                                     const STRLEN off = d - SvPVX_const(sv);
4120                                     U32 uv = utf8n_to_uvchr((U8 *) str,
4121                                                             str_end - str,
4122                                                             &char_length,
4123                                                             UTF8_ALLOW_ANYUV);
4124                                     output_length =
4125                                         my_snprintf(hex_string,
4126                                              sizeof(hex_string),
4127                                              ".%X",
4128                                              (unsigned int) NATIVE_TO_UNI(uv));
4129
4130                                     d = off + SvGROW(sv, off
4131                                                         + output_length
4132                                                         + (STRLEN)(send - rbrace)
4133                                                         + 2);   /* '}' +  NUL */
4134                                     Copy(hex_string, d, output_length, char);
4135                                     d += output_length;
4136                                 }
4137                             }
4138
4139                             *d++ = '}'; /* Done.  Add the trailing brace */
4140                         }
4141                     }
4142                     else { /* Here, not in a pattern.  Convert the name to a
4143                             * string. */
4144
4145                         if (PL_lex_inwhat == OP_TRANS) {
4146                             str = SvPV_const(res, len);
4147                             if (len > ((SvUTF8(res))
4148                                        ? UTF8SKIP(str)
4149                                        : 1U))
4150                             {
4151                                 yyerror(Perl_form(aTHX_
4152                                     "%.*s must not be a named sequence"
4153                                     " in transliteration operator",
4154                                         /*  +1 to include the "}" */
4155                                     (int) (rbrace + 1 - start), start));
4156                                 *d++ = '\0';
4157                                 goto end_backslash_N;
4158                             }
4159
4160                             if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
4161                                 has_above_latin1 = TRUE;
4162                             }
4163
4164                         }
4165                         else if (! SvUTF8(res)) {
4166                             /* Make sure \N{} return is UTF-8.  This is because
4167                              * \N{} implies Unicode semantics, and scalars have
4168                              * to be in utf8 to guarantee those semantics; but
4169                              * not needed in tr/// */
4170                             sv_utf8_upgrade_flags(res, 0);
4171                             str = SvPV_const(res, len);
4172                         }
4173
4174                          /* Upgrade destination to be utf8 if this new
4175                           * component is */
4176                         if (! d_is_utf8 && SvUTF8(res)) {
4177                             /* See Note on sizing above.  */
4178                             const STRLEN extra = len + (send - s) + 1;
4179
4180                             SvCUR_set(sv, d - SvPVX_const(sv));
4181                             SvPOK_on(sv);
4182                             *d = '\0';
4183
4184                             if (utf8_variant_count == 0) {
4185                                 SvUTF8_on(sv);
4186                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4187                             }
4188                             else {
4189                                 sv_utf8_upgrade_flags_grow(sv,
4190                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4191                                                 extra);
4192                                 d = SvPVX(sv) + SvCUR(sv);
4193                             }
4194                             d_is_utf8 = TRUE;
4195                         } else if (len > (STRLEN)(e - s + 4)) { /* +4 is for \N{} */
4196
4197                             /* See Note on sizing above.  (NOTE: SvCUR() is not
4198                              * set correctly here). */
4199                             const STRLEN extra = len + (send - rbrace) + 1;
4200                             const STRLEN off = d - SvPVX_const(sv);
4201                             d = off + SvGROW(sv, off + extra);
4202                         }
4203                         Copy(str, d, len, char);
4204                         d += len;
4205                     }
4206
4207                     SvREFCNT_dec(res);
4208
4209                 } /* End \N{NAME} */
4210
4211               end_backslash_N:
4212 #ifdef EBCDIC
4213                 backslash_N++; /* \N{} is defined to be Unicode */
4214 #endif
4215                 s = rbrace + 1;  /* Point to just after the '}' */
4216                 continue;
4217
4218             /* \c is a control character */
4219             case 'c':
4220                 s++;
4221                 if (s < send) {
4222                     const char * message;
4223
4224                     if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
4225                         yyerror(message);
4226                         yyquit();   /* Have always immediately croaked on
4227                                        errors in this */
4228                     }
4229                     d++;
4230                 }
4231                 else {
4232                     yyerror("Missing control char name in \\c");
4233                     yyquit();   /* Are at end of input, no sense continuing */
4234                 }
4235 #ifdef EBCDIC
4236                 non_portable_endpoint++;
4237 #endif
4238                 break;
4239
4240             /* printf-style backslashes, formfeeds, newlines, etc */
4241             case 'b':
4242                 *d++ = '\b';
4243                 break;
4244             case 'n':
4245                 *d++ = '\n';
4246                 break;
4247             case 'r':
4248                 *d++ = '\r';
4249                 break;
4250             case 'f':
4251                 *d++ = '\f';
4252                 break;
4253             case 't':
4254                 *d++ = '\t';
4255                 break;
4256             case 'e':
4257                 *d++ = ESC_NATIVE;
4258                 break;
4259             case 'a':
4260                 *d++ = '\a';
4261                 break;
4262             } /* end switch */
4263
4264             s++;
4265             continue;
4266         } /* end if (backslash) */
4267
4268     default_action:
4269         /* Just copy the input to the output, though we may have to convert
4270          * to/from UTF-8.
4271          *
4272          * If the input has the same representation in UTF-8 as not, it will be
4273          * a single byte, and we don't care about UTF8ness; just copy the byte */
4274         if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4275             *d++ = *s++;
4276         }
4277         else if (! s_is_utf8 && ! d_is_utf8) {
4278             /* If neither source nor output is UTF-8, is also a single byte,
4279              * just copy it; but this byte counts should we later have to
4280              * convert to UTF-8 */
4281             *d++ = *s++;
4282             utf8_variant_count++;
4283         }
4284         else if (s_is_utf8 && d_is_utf8) {   /* Both UTF-8, can just copy */
4285             const STRLEN len = UTF8SKIP(s);
4286
4287             /* We expect the source to have already been checked for
4288              * malformedness */
4289             assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4290
4291             Copy(s, d, len, U8);
4292             d += len;
4293             s += len;
4294         }
4295         else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4296             STRLEN need = send - s + 1; /* See Note on sizing above. */
4297
4298             SvCUR_set(sv, d - SvPVX_const(sv));
4299             SvPOK_on(sv);
4300             *d = '\0';
4301
4302             if (utf8_variant_count == 0) {
4303                 SvUTF8_on(sv);
4304                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4305             }
4306             else {
4307                 sv_utf8_upgrade_flags_grow(sv,
4308                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4309                                            need);
4310                 d = SvPVX(sv) + SvCUR(sv);
4311             }
4312             d_is_utf8 = TRUE;
4313             goto default_action; /* Redo, having upgraded so both are UTF-8 */
4314         }
4315         else {  /* UTF8ness matters: convert this non-UTF8 source char to
4316                    UTF-8 for output.  It will occupy 2 bytes, but don't include
4317                    the input byte since we haven't incremented 's' yet. See
4318                    Note on sizing above. */
4319             const STRLEN off = d - SvPVX(sv);
4320             const STRLEN extra = 2 + (send - s - 1) + 1;
4321             if (off + extra > SvLEN(sv)) {
4322                 d = off + SvGROW(sv, off + extra);
4323             }
4324             *d++ = UTF8_EIGHT_BIT_HI(*s);
4325             *d++ = UTF8_EIGHT_BIT_LO(*s);
4326             s++;
4327         }
4328     } /* while loop to process each character */
4329
4330     {
4331         const STRLEN off = d - SvPVX(sv);
4332
4333         /* See if room for the terminating NUL */
4334         if (UNLIKELY(off >= SvLEN(sv))) {
4335
4336 #ifndef DEBUGGING
4337
4338             if (off > SvLEN(sv))
4339 #endif
4340                 Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
4341                         " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
4342
4343             /* Whew!  Here we don't have room for the terminating NUL, but
4344              * everything else so far has fit.  It's not too late to grow
4345              * to fit the NUL and continue on.  But it is a bug, as the code
4346              * above was supposed to have made room for this, so under
4347              * DEBUGGING builds, we panic anyway.  */
4348             d = off + SvGROW(sv, off + 1);
4349         }
4350     }
4351
4352     /* terminate the string and set up the sv */
4353     *d = '\0';
4354     SvCUR_set(sv, d - SvPVX_const(sv));
4355
4356     SvPOK_on(sv);
4357     if (d_is_utf8) {
4358         SvUTF8_on(sv);
4359     }
4360
4361     /* shrink the sv if we allocated more than we used */
4362     if (SvCUR(sv) + 5 < SvLEN(sv)) {
4363         SvPV_shrink_to_cur(sv);
4364     }
4365
4366     /* return the substring (via pl_yylval) only if we parsed anything */
4367     if (s > start) {
4368         char *s2 = start;
4369         for (; s2 < s; s2++) {
4370             if (*s2 == '\n')
4371                 COPLINE_INC_WITH_HERELINES;
4372         }
4373         SvREFCNT_inc_simple_void_NN(sv);
4374         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4375             && ! PL_parser->lex_re_reparsing)
4376         {
4377             const char *const key = PL_lex_inpat ? "qr" : "q";
4378             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4379             const char *type;
4380             STRLEN typelen;
4381
4382             if (PL_lex_inwhat == OP_TRANS) {
4383                 type = "tr";
4384                 typelen = 2;
4385             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4386                 type = "s";
4387                 typelen = 1;
4388             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4389                 type = "q";
4390                 typelen = 1;
4391             } else {
4392                 type = "qq";
4393                 typelen = 2;
4394             }
4395
4396             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4397                                 type, typelen, NULL);
4398         }
4399         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4400     }
4401     LEAVE_with_name("scan_const");
4402     return s;
4403 }
4404
4405 /* S_intuit_more
4406  * Returns TRUE if there's more to the expression (e.g., a subscript),
4407  * FALSE otherwise.
4408  *
4409  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4410  *
4411  * ->[ and ->{ return TRUE
4412  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4413  * { and [ outside a pattern are always subscripts, so return TRUE
4414  * if we're outside a pattern and it's not { or [, then return FALSE
4415  * if we're in a pattern and the first char is a {
4416  *   {4,5} (any digits around the comma) returns FALSE
4417  * if we're in a pattern and the first char is a [
4418  *   [] returns FALSE
4419  *   [SOMETHING] has a funky algorithm to decide whether it's a
4420  *      character class or not.  It has to deal with things like
4421  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4422  * anything else returns TRUE
4423  */
4424
4425 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4426
4427 STATIC int
4428 S_intuit_more(pTHX_ char *s, char *e)
4429 {
4430     PERL_ARGS_ASSERT_INTUIT_MORE;
4431
4432     if (PL_lex_brackets)
4433         return TRUE;
4434     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4435         return TRUE;
4436     if (*s == '-' && s[1] == '>'
4437      && FEATURE_POSTDEREF_QQ_IS_ENABLED
4438      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4439         ||(s[2] == '@' && memCHRs("*[{",s[3])) ))
4440         return TRUE;
4441     if (*s != '{' && *s != '[')
4442         return FALSE;
4443     PL_parser->sub_no_recover = TRUE;
4444     if (!PL_lex_inpat)
4445         return TRUE;
4446
4447     /* In a pattern, so maybe we have {n,m}. */
4448     if (*s == '{') {
4449         if (regcurly(s, e, NULL)) {
4450             return FALSE;
4451         }
4452         return TRUE;
4453     }
4454
4455     /* On the other hand, maybe we have a character class */
4456
4457     s++;
4458     if (*s == ']' || *s == '^')
4459         return FALSE;
4460     else {
4461         /* this is terrifying, and it works */
4462         int weight;
4463         char seen[256];
4464         const char * const send = (char *) memchr(s, ']', e - s);
4465         unsigned char un_char, last_un_char;
4466         char tmpbuf[sizeof PL_tokenbuf * 4];
4467
4468         if (!send)              /* has to be an expression */
4469             return TRUE;
4470         weight = 2;             /* let's weigh the evidence */
4471
4472         if (*s == '$')
4473             weight -= 3;
4474         else if (isDIGIT(*s)) {
4475             if (s[1] != ']') {
4476                 if (isDIGIT(s[1]) && s[2] == ']')
4477                     weight -= 10;
4478             }
4479             else
4480                 weight -= 100;
4481         }
4482         Zero(seen,256,char);
4483         un_char = 255;
4484         for (; s < send; s++) {
4485             last_un_char = un_char;
4486             un_char = (unsigned char)*s;
4487             switch (*s) {
4488             case '@':
4489             case '&':
4490             case '$':
4491                 weight -= seen[un_char] * 10;
4492                 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4493                     int len;
4494                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4495                     len = (int)strlen(tmpbuf);
4496                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4497                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
4498                         weight -= 100;
4499                     else
4500                         weight -= 10;
4501                 }
4502                 else if (*s == '$'
4503                          && s[1]
4504                          && memCHRs("[#!%*<>()-=",s[1]))
4505                 {
4506                     if (/*{*/ memCHRs("])} =",s[2]))
4507                         weight -= 10;
4508                     else
4509                         weight -= 1;
4510                 }
4511                 break;
4512             case '\\':
4513                 un_char = 254;
4514                 if (s[1]) {
4515                     if (memCHRs("wds]",s[1]))
4516                         weight += 100;
4517                     else if (seen[(U8)'\''] || seen[(U8)'"'])
4518                         weight += 1;
4519                     else if (memCHRs("rnftbxcav",s[1]))
4520                         weight += 40;
4521                     else if (isDIGIT(s[1])) {
4522                         weight += 40;
4523                         while (s[1] && isDIGIT(s[1]))
4524                             s++;
4525                     }
4526                 }
4527                 else
4528                     weight += 100;
4529                 break;
4530             case '-':
4531                 if (s[1] == '\\')
4532                     weight += 50;
4533                 if (memCHRs("aA01! ",last_un_char))
4534                     weight += 30;
4535                 if (memCHRs("zZ79~",s[1]))
4536                     weight += 30;
4537                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4538                     weight -= 5;        /* cope with negative subscript */
4539                 break;
4540             default:
4541                 if (!isWORDCHAR(last_un_char)
4542                     && !(last_un_char == '$' || last_un_char == '@'
4543                          || last_un_char == '&')
4544                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4545                     char *d = s;
4546                     while (isALPHA(*s))
4547                         s++;
4548                     if (keyword(d, s - d, 0))
4549                         weight -= 150;
4550                 }
4551                 if (un_char == last_un_char + 1)
4552                     weight += 5;
4553                 weight -= seen[un_char];
4554                 break;
4555             }
4556             seen[un_char]++;
4557         }
4558         if (weight >= 0)        /* probably a character class */
4559             return FALSE;
4560     }
4561
4562     return TRUE;
4563 }
4564
4565 /*
4566  * S_intuit_method
4567  *
4568  * Does all the checking to disambiguate
4569  *   foo bar
4570  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4571  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4572  *
4573  * First argument is the stuff after the first token, e.g. "bar".
4574  *
4575  * Not a method if foo is a filehandle.
4576  * Not a method if foo is a subroutine prototyped to take a filehandle.
4577  * Not a method if it's really "Foo $bar"
4578  * Method if it's "foo $bar"
4579  * Not a method if it's really "print foo $bar"
4580  * Method if it's really "foo package::" (interpreted as package->foo)
4581  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4582  * Not a method if bar is a filehandle or package, but is quoted with
4583  *   =>
4584  */
4585
4586 STATIC int
4587 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4588 {
4589     char *s = start + (*start == '$');
4590     char tmpbuf[sizeof PL_tokenbuf];
4591     STRLEN len;
4592     GV* indirgv;
4593         /* Mustn't actually add anything to a symbol table.
4594            But also don't want to "initialise" any placeholder
4595            constants that might already be there into full
4596            blown PVGVs with attached PVCV.  */
4597     GV * const gv =
4598         ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4599
4600     PERL_ARGS_ASSERT_INTUIT_METHOD;
4601
4602     if (!FEATURE_INDIRECT_IS_ENABLED)
4603         return 0;
4604
4605     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4606             return 0;
4607     if (cv && SvPOK(cv)) {
4608         const char *proto = CvPROTO(cv);
4609         if (proto) {
4610             while (*proto && (isSPACE(*proto) || *proto == ';'))
4611                 proto++;
4612             if (*proto == '*')
4613                 return 0;
4614         }
4615     }
4616
4617     if (*start == '$') {
4618         SSize_t start_off = start - SvPVX(PL_linestr);
4619         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4620             || isUPPER(*PL_tokenbuf))
4621             return 0;
4622         /* this could be $# */
4623         if (isSPACE(*s))
4624             s = skipspace(s);
4625         PL_bufptr = SvPVX(PL_linestr) + start_off;
4626         PL_expect = XREF;
4627         return *s == '(' ? FUNCMETH : METHOD;
4628     }
4629
4630     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4631     /* start is the beginning of the possible filehandle/object,
4632      * and s is the end of it
4633      * tmpbuf is a copy of it (but with single quotes as double colons)
4634      */
4635
4636     if (!keyword(tmpbuf, len, 0)) {
4637         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4638             len -= 2;
4639             tmpbuf[len] = '\0';
4640             goto bare_package;
4641         }
4642         indirgv = gv_fetchpvn_flags(tmpbuf, len,
4643                                     GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4644                                     SVt_PVCV);
4645         if (indirgv && SvTYPE(indirgv) != SVt_NULL
4646          && (!isGV(indirgv) || GvCVu(indirgv)))
4647             return 0;
4648         /* filehandle or package name makes it a method */
4649         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4650             s = skipspace(s);
4651             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4652                 return 0;       /* no assumptions -- "=>" quotes bareword */
4653       bare_package:
4654             NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4655                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4656             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4657             PL_expect = XTERM;
4658             force_next(BAREWORD);
4659             PL_bufptr = s;
4660             return *s == '(' ? FUNCMETH : METHOD;
4661         }
4662     }
4663     return 0;
4664 }
4665
4666 /* Encoded script support. filter_add() effectively inserts a
4667  * 'pre-processing' function into the current source input stream.
4668  * Note that the filter function only applies to the current source file
4669  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4670  *
4671  * The datasv parameter (which may be NULL) can be used to pass
4672  * private data to this instance of the filter. The filter function
4673  * can recover the SV using the FILTER_DATA macro and use it to
4674  * store private buffers and state information.
4675  *
4676  * The supplied datasv parameter is upgraded to a PVIO type
4677  * and the IoDIRP/IoANY field is used to store the function pointer,
4678  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4679  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4680  * private use must be set using malloc'd pointers.
4681  */
4682
4683 SV *
4684 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4685 {
4686     if (!funcp)
4687         return NULL;
4688
4689     if (!PL_parser)
4690         return NULL;
4691
4692     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4693         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4694
4695     if (!PL_rsfp_filters)
4696         PL_rsfp_filters = newAV();
4697     if (!datasv)
4698         datasv = newSV(0);
4699     SvUPGRADE(datasv, SVt_PVIO);
4700     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4701     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4702     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4703                           FPTR2DPTR(void *, IoANY(datasv)),
4704                           SvPV_nolen(datasv)));
4705     av_unshift(PL_rsfp_filters, 1);
4706     av_store(PL_rsfp_filters, 0, datasv) ;
4707     if (
4708         !PL_parser->filtered
4709      && PL_parser->lex_flags & LEX_EVALBYTES
4710      && PL_bufptr < PL_bufend
4711     ) {
4712         const char *s = PL_bufptr;
4713         while (s < PL_bufend) {
4714             if (*s == '\n') {
4715                 SV *linestr = PL_parser->linestr;
4716                 char *buf = SvPVX(linestr);
4717                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4718                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4719                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4720                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4721                 STRLEN const last_uni_pos =
4722                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4723                 STRLEN const last_lop_pos =
4724                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4725                 av_push(PL_rsfp_filters, linestr);
4726                 PL_parser->linestr =
4727                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4728                 buf = SvPVX(PL_parser->linestr);
4729                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4730                 PL_parser->bufptr = buf + bufptr_pos;
4731                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4732                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4733                 PL_parser->linestart = buf + linestart_pos;
4734                 if (PL_parser->last_uni)
4735                     PL_parser->last_uni = buf + last_uni_pos;
4736                 if (PL_parser->last_lop)
4737                     PL_parser->last_lop = buf + last_lop_pos;
4738                 SvLEN_set(linestr, SvCUR(linestr));
4739                 SvCUR_set(linestr, s - SvPVX(linestr));
4740                 PL_parser->filtered = 1;
4741                 break;
4742             }
4743             s++;
4744         }
4745     }
4746     return(datasv);
4747 }
4748
4749
4750 /* Delete most recently added instance of this filter function. */
4751 void
4752 Perl_filter_del(pTHX_ filter_t funcp)
4753 {
4754     SV *datasv;
4755
4756     PERL_ARGS_ASSERT_FILTER_DEL;
4757
4758 #ifdef DEBUGGING
4759     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4760                           FPTR2DPTR(void*, funcp)));
4761 #endif
4762     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4763         return;
4764     /* if filter is on top of stack (usual case) just pop it off */
4765     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4766     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4767         sv_free(av_pop(PL_rsfp_filters));
4768
4769         return;
4770     }
4771     /* we need to search for the correct entry and clear it     */
4772     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4773 }
4774
4775
4776 /* Invoke the idxth filter function for the current rsfp.        */
4777 /* maxlen 0 = read one text line */
4778 I32
4779 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4780 {
4781     filter_t funcp;
4782     I32 ret;
4783     SV *datasv = NULL;
4784     /* This API is bad. It should have been using unsigned int for maxlen.
4785        Not sure if we want to change the API, but if not we should sanity
4786        check the value here.  */
4787     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4788
4789     PERL_ARGS_ASSERT_FILTER_READ;
4790
4791     if (!PL_parser || !PL_rsfp_filters)
4792         return -1;
4793     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4794         /* Provide a default input filter to make life easy.    */
4795         /* Note that we append to the line. This is handy.      */
4796         DEBUG_P(PerlIO_printf(Perl_debug_log,
4797                               "filter_read %d: from rsfp\n", idx));
4798         if (correct_length) {
4799             /* Want a block */
4800             int len ;
4801             const int old_len = SvCUR(buf_sv);
4802
4803             /* ensure buf_sv is large enough */
4804             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4805             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4806                                    correct_length)) <= 0) {
4807                 if (PerlIO_error(PL_rsfp))
4808                     return -1;          /* error */
4809                 else
4810                     return 0 ;          /* end of file */
4811             }
4812             SvCUR_set(buf_sv, old_len + len) ;
4813             SvPVX(buf_sv)[old_len + len] = '\0';
4814         } else {
4815             /* Want a line */
4816             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4817                 if (PerlIO_error(PL_rsfp))
4818                     return -1;          /* error */
4819                 else
4820                     return 0 ;          /* end of file */
4821             }
4822         }
4823         return SvCUR(buf_sv);
4824     }
4825     /* Skip this filter slot if filter has been deleted */
4826     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4827         DEBUG_P(PerlIO_printf(Perl_debug_log,
4828                               "filter_read %d: skipped (filter deleted)\n",
4829                               idx));
4830         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4831     }
4832     if (SvTYPE(datasv) != SVt_PVIO) {
4833         if (correct_length) {
4834             /* Want a block */
4835             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4836             if (!remainder) return 0; /* eof */
4837             if (correct_length > remainder) correct_length = remainder;
4838             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4839             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4840         } else {
4841             /* Want a line */
4842             const char *s = SvEND(datasv);
4843             const char *send = SvPVX(datasv) + SvLEN(datasv);
4844             while (s < send) {
4845                 if (*s == '\n') {
4846                     s++;
4847                     break;
4848                 }
4849                 s++;
4850             }
4851             if (s == send) return 0; /* eof */
4852             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4853             SvCUR_set(datasv, s-SvPVX(datasv));
4854         }
4855         return SvCUR(buf_sv);
4856     }
4857     /* Get function pointer hidden within datasv        */
4858     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4859     DEBUG_P(PerlIO_printf(Perl_debug_log,
4860                           "filter_read %d: via function %p (%s)\n",
4861                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4862     /* Call function. The function is expected to       */
4863     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4864     /* Return: <0:error, =0:eof, >0:not eof             */
4865     ENTER;
4866     save_scalar(PL_errgv);
4867     ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4868     LEAVE;
4869     return ret;
4870 }
4871
4872 STATIC char *
4873 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4874 {
4875     PERL_ARGS_ASSERT_FILTER_GETS;
4876
4877 #ifdef PERL_CR_FILTER
4878     if (!PL_rsfp_filters) {
4879         filter_add(S_cr_textfilter,NULL);
4880     }
4881 #endif
4882     if (PL_rsfp_filters) {
4883         if (!append)
4884             SvCUR_set(sv, 0);   /* start with empty line        */
4885         if (FILTER_READ(0, sv, 0) > 0)
4886             return ( SvPVX(sv) ) ;
4887         else
4888             return NULL ;
4889     }
4890     else
4891         return (sv_gets(sv, PL_rsfp, append));
4892 }
4893
4894 STATIC HV *
4895 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4896 {
4897     GV *gv;
4898
4899     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4900
4901     if (memEQs(pkgname, len, "__PACKAGE__"))
4902         return PL_curstash;
4903
4904     if (len > 2
4905         && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4906         && (gv = gv_fetchpvn_flags(pkgname,
4907                                    len,
4908                                    ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4909     {
4910         return GvHV(gv);                        /* Foo:: */
4911     }
4912
4913     /* use constant CLASS => 'MyClass' */
4914     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4915     if (gv && GvCV(gv)) {
4916         SV * const sv = cv_const_sv(GvCV(gv));
4917         if (sv)
4918             return gv_stashsv(sv, 0);
4919     }
4920
4921     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4922 }
4923
4924
4925 STATIC char *
4926 S_tokenize_use(pTHX_ int is_use, char *s) {
4927     PERL_ARGS_ASSERT_TOKENIZE_USE;
4928
4929     if (PL_expect != XSTATE)
4930         /* diag_listed_as: "use" not allowed in expression */
4931         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4932                     is_use ? "use" : "no"));
4933     PL_expect = XTERM;
4934     s = skipspace(s);
4935     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4936         s = force_version(s, TRUE);
4937         if (*s == ';' || *s == '}'
4938                 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4939             NEXTVAL_NEXTTOKE.opval = NULL;
4940             force_next(BAREWORD);
4941         }
4942         else if (*s == 'v') {
4943             s = force_word(s,BAREWORD,FALSE,TRUE);
4944             s = force_version(s, FALSE);
4945         }
4946     }
4947     else {
4948         s = force_word(s,BAREWORD,FALSE,TRUE);
4949         s = force_version(s, FALSE);
4950     }
4951     pl_yylval.ival = is_use;
4952     return s;
4953 }
4954 #ifdef DEBUGGING
4955     static const char* const exp_name[] =
4956         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4957           "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4958           "SIGVAR", "TERMORDORDOR"
4959         };
4960 #endif
4961
4962 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4963 STATIC bool
4964 S_word_takes_any_delimiter(char *p, STRLEN len)
4965 {
4966     return (len == 1 && memCHRs("msyq", p[0]))
4967             || (len == 2
4968                 && ((p[0] == 't' && p[1] == 'r')
4969                     || (p[0] == 'q' && memCHRs("qwxr", p[1]))));
4970 }
4971
4972 static void
4973 S_check_scalar_slice(pTHX_ char *s)
4974 {
4975     s++;
4976     while (SPACE_OR_TAB(*s)) s++;
4977     if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4978                                                              PL_bufend,
4979                                                              UTF))
4980     {
4981         return;
4982     }
4983     while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4984            || (*s && memCHRs(" \t$#+-'\"", *s)))
4985     {
4986         s += UTF ? UTF8SKIP(s) : 1;
4987     }
4988     if (*s == '}' || *s == ']')
4989         pl_yylval.ival = OPpSLICEWARNING;
4990 }
4991
4992 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4993 static void
4994 S_lex_token_boundary(pTHX)
4995 {
4996     PL_oldoldbufptr = PL_oldbufptr;
4997     PL_oldbufptr = PL_bufptr;
4998 }
4999
5000 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
5001 static char *
5002 S_vcs_conflict_marker(pTHX_ char *s)
5003 {
5004     lex_token_boundary();
5005     PL_bufptr = s;
5006     yyerror("Version control conflict marker");
5007     while (s < PL_bufend && *s != '\n')
5008         s++;
5009     return s;
5010 }
5011
5012 static int
5013 yyl_sigvar(pTHX_ char *s)
5014 {
5015     /* we expect the sigil and optional var name part of a
5016      * signature element here. Since a '$' is not necessarily
5017      * followed by a var name, handle it specially here; the general
5018      * yylex code would otherwise try to interpret whatever follows
5019      * as a var; e.g. ($, ...) would be seen as the var '$,'
5020      */
5021
5022     U8 sigil;
5023
5024     s = skipspace(s);
5025     sigil = *s++;
5026     PL_bufptr = s; /* for error reporting */
5027     switch (sigil) {
5028     case '$':
5029     case '@':
5030     case '%':
5031         /* spot stuff that looks like an prototype */
5032         if (memCHRs("$:@%&*;\\[]", *s)) {
5033             yyerror("Illegal character following sigil in a subroutine signature");
5034             break;
5035         }
5036         /* '$#' is banned, while '$ # comment' isn't */
5037         if (*s == '#') {
5038             yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
5039             break;
5040         }
5041         s = skipspace(s);
5042         if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5043             char *dest = PL_tokenbuf + 1;
5044             /* read var name, including sigil, into PL_tokenbuf */
5045             PL_tokenbuf[0] = sigil;
5046             parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
5047                 0, cBOOL(UTF), FALSE, FALSE);
5048             *dest = '\0';
5049             assert(PL_tokenbuf[1]); /* we have a variable name */
5050         }
5051         else {
5052             *PL_tokenbuf = 0;
5053             PL_in_my = 0;
5054         }
5055
5056         s = skipspace(s);
5057         /* parse the = for the default ourselves to avoid '+=' etc being accepted here
5058          * as the ASSIGNOP, and exclude other tokens that start with =
5059          */
5060         if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) {
5061             /* save now to report with the same context as we did when
5062              * all ASSIGNOPS were accepted */
5063             PL_oldbufptr = s;
5064
5065             ++s;
5066             NEXTVAL_NEXTTOKE.ival = 0;
5067             force_next(ASSIGNOP);
5068             PL_expect = XTERM;
5069         }
5070         else if (*s == ',' || *s == ')') {
5071             PL_expect = XOPERATOR;
5072         }
5073         else {
5074             /* make sure the context shows the unexpected character and
5075              * hopefully a bit more */
5076             if (*s) ++s;
5077             while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5078                 s++;
5079             PL_bufptr = s; /* for error reporting */
5080             yyerror("Illegal operator following parameter in a subroutine signature");
5081             PL_in_my = 0;
5082         }
5083         if (*PL_tokenbuf) {
5084             NEXTVAL_NEXTTOKE.ival = sigil;
5085             force_next('p'); /* force a signature pending identifier */
5086         }
5087         break;
5088
5089     case ')':
5090         PL_expect = XBLOCK;
5091         break;
5092     case ',': /* handle ($a,,$b) */
5093         break;
5094
5095     default:
5096         PL_in_my = 0;
5097         yyerror("A signature parameter must start with '$', '@' or '%'");
5098         /* very crude error recovery: skip to likely next signature
5099          * element */
5100         while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5101             s++;
5102         break;
5103     }
5104
5105     switch (sigil) {
5106         case ',': TOKEN (PERLY_COMMA);
5107         case '$': TOKEN (PERLY_DOLLAR);
5108         case '@': TOKEN (PERLY_SNAIL);
5109         case '%': TOKEN (PERLY_PERCENT_SIGN);
5110         case ')': TOKEN (PERLY_PAREN_CLOSE);
5111         default:  TOKEN (sigil);
5112     }
5113 }
5114
5115 static int
5116 yyl_dollar(pTHX_ char *s)
5117 {
5118     CLINE;
5119
5120     if (PL_expect == XPOSTDEREF) {
5121         if (s[1] == '#') {
5122             s++;
5123             POSTDEREF(DOLSHARP);
5124         }
5125         POSTDEREF(PERLY_DOLLAR);
5126     }
5127
5128     if (   s[1] == '#'
5129         && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
5130             || memCHRs("{$:+-@", s[2])))
5131     {
5132         PL_tokenbuf[0] = '@';
5133         s = scan_ident(s + 1, PL_tokenbuf + 1,
5134                        sizeof PL_tokenbuf - 1, FALSE);
5135         if (PL_expect == XOPERATOR) {
5136             char *d = s;
5137             if (PL_bufptr > s) {
5138                 d = PL_bufptr-1;
5139                 PL_bufptr = PL_oldbufptr;
5140             }
5141             no_op("Array length", d);
5142         }
5143         if (!PL_tokenbuf[1])
5144             PREREF(DOLSHARP);
5145         PL_expect = XOPERATOR;
5146         force_ident_maybe_lex('#');
5147         TOKEN(DOLSHARP);
5148     }
5149
5150     PL_tokenbuf[0] = '$';
5151     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5152     if (PL_expect == XOPERATOR) {
5153         char *d = s;
5154         if (PL_bufptr > s) {
5155             d = PL_bufptr-1;
5156             PL_bufptr = PL_oldbufptr;
5157         }
5158         no_op("Scalar", d);
5159     }
5160     if (!PL_tokenbuf[1]) {
5161         if (s == PL_bufend)
5162             yyerror("Final $ should be \\$ or $name");
5163         PREREF(PERLY_DOLLAR);
5164     }
5165
5166     {
5167         const char tmp = *s;
5168         if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5169             s = skipspace(s);
5170
5171         if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5172             && intuit_more(s, PL_bufend)) {
5173             if (*s == '[') {
5174                 PL_tokenbuf[0] = '@';
5175                 if (ckWARN(WARN_SYNTAX)) {
5176                     char *t = s+1;
5177
5178                     while ( t < PL_bufend ) {
5179                         if (isSPACE(*t)) {
5180                             do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
5181                             /* consumed one or more space chars */
5182                         } else if (*t == '$' || *t == '@') {
5183                             /* could be more than one '$' like $$ref or @$ref */
5184                             do { t++; } while (t < PL_bufend && *t == '$');
5185
5186                             /* could be an abigail style identifier like $ foo */
5187                             while (t < PL_bufend && *t == ' ') t++;
5188
5189                             /* strip off the name of the var */
5190                             while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5191                                 t += UTF ? UTF8SKIP(t) : 1;
5192                             /* consumed a varname */
5193                         } else if (isDIGIT(*t)) {
5194                             /* deal with hex constants like 0x11 */
5195                             if (t[0] == '0' && t[1] == 'x') {
5196                                 t += 2;
5197                                 while (t < PL_bufend && isXDIGIT(*t)) t++;
5198                             } else {
5199                                 /* deal with decimal/octal constants like 1 and 0123 */
5200                                 do { t++; } while (isDIGIT(*t));
5201                                 if (t<PL_bufend && *t == '.') {
5202                                     do { t++; } while (isDIGIT(*t));
5203                                 }
5204                             }
5205                             /* consumed a number */
5206                         } else {
5207                             /* not a var nor a space nor a number */
5208                             break;
5209                         }
5210                     }
5211                     if (t < PL_bufend && *t++ == ',') {
5212                         PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
5213                         while (t < PL_bufend && *t != ']')
5214                             t++;
5215                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5216                                     "Multidimensional syntax %" UTF8f " not supported",
5217                                     UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
5218                     }
5219                 }
5220             }
5221             else if (*s == '{') {
5222                 char *t;
5223                 PL_tokenbuf[0] = '%';
5224                 if (    strEQ(PL_tokenbuf+1, "SIG")
5225                     && ckWARN(WARN_SYNTAX)
5226                     && (t = (char *) memchr(s, '}', PL_bufend - s))
5227                     && (t = (char *) memchr(t, '=', PL_bufend - t)))
5228                 {
5229                     char tmpbuf[sizeof PL_tokenbuf];
5230                     do {
5231                         t++;
5232                     } while (isSPACE(*t));
5233                     if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
5234                         STRLEN len;
5235                         t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5236                                         &len);
5237                         while (isSPACE(*t))
5238                             t++;
5239                         if (  *t == ';'
5240                             && get_cvn_flags(tmpbuf, len, UTF
5241                                                             ? SVf_UTF8
5242                                                             : 0))
5243                         {
5244                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5245                                 "You need to quote \"%" UTF8f "\"",
5246                                     UTF8fARG(UTF, len, tmpbuf));
5247                         }
5248                     }
5249                 }
5250             }
5251         }
5252
5253         PL_expect = XOPERATOR;
5254         if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
5255             const bool islop = (PL_last_lop == PL_oldoldbufptr);
5256             if (!islop || PL_last_lop_op == OP_GREPSTART)
5257                 PL_expect = XOPERATOR;
5258             else if (memCHRs("$@\"'`q", *s))
5259                 PL_expect = XTERM;              /* e.g. print $fh "foo" */
5260             else if (   memCHRs("&*<%", *s)
5261                      && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
5262             {
5263                 PL_expect = XTERM;              /* e.g. print $fh &sub */
5264             }
5265             else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5266                 char tmpbuf[sizeof PL_tokenbuf];
5267                 int t2;
5268                 STRLEN len;
5269                 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5270                 if ((t2 = keyword(tmpbuf, len, 0))) {
5271                     /* binary operators exclude handle interpretations */
5272                     switch (t2) {
5273                     case -KEY_x:
5274                     case -KEY_eq:
5275                     case -KEY_ne:
5276                     case -KEY_gt:
5277                     case -KEY_lt:
5278                     case -KEY_ge:
5279                     case -KEY_le:
5280                     case -KEY_cmp:
5281                         break;
5282                     default:
5283                         PL_expect = XTERM;      /* e.g. print $fh length() */
5284                         break;
5285                     }
5286                 }
5287                 else {
5288                     PL_expect = XTERM;  /* e.g. print $fh subr() */
5289                 }
5290             }
5291             else if (isDIGIT(*s))
5292                 PL_expect = XTERM;              /* e.g. print $fh 3 */
5293             else if (*s == '.' && isDIGIT(s[1]))
5294                 PL_expect = XTERM;              /* e.g. print $fh .3 */
5295             else if ((*s == '?' || *s == '-' || *s == '+')
5296                 && !isSPACE(s[1]) && s[1] != '=')
5297                 PL_expect = XTERM;              /* e.g. print $fh -1 */
5298             else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5299                      && s[1] != '/')
5300                 PL_expect = XTERM;              /* e.g. print $fh /.../
5301                                                XXX except DORDOR operator
5302                                             */
5303             else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5304                      && s[2] != '=')
5305                 PL_expect = XTERM;              /* print $fh <<"EOF" */
5306         }
5307     }
5308     force_ident_maybe_lex('$');
5309     TOKEN(PERLY_DOLLAR);
5310 }
5311
5312 static int
5313 yyl_sub(pTHX_ char *s, const int key)
5314 {
5315     char * const tmpbuf = PL_tokenbuf + 1;
5316     bool have_name, have_proto;
5317     STRLEN len;
5318     SV *format_name = NULL;
5319     bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
5320
5321     SSize_t off = s-SvPVX(PL_linestr);
5322     char *d;
5323
5324     s = skipspace(s); /* can move PL_linestr */
5325
5326     d = SvPVX(PL_linestr)+off;
5327
5328     SAVEBOOL(PL_parser->sig_seen);
5329     PL_parser->sig_seen = FALSE;
5330
5331     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5332         || *s == '\''
5333         || (*s == ':' && s[1] == ':'))
5334     {
5335
5336         PL_expect = XATTRBLOCK;
5337         d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
5338                       &len);
5339         if (key == KEY_format)
5340             format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
5341         *PL_tokenbuf = '&';
5342         if (memchr(tmpbuf, ':', len) || key != KEY_sub
5343          || pad_findmy_pvn(
5344                 PL_tokenbuf, len + 1, 0
5345             ) != NOT_IN_PAD)
5346             sv_setpvn(PL_subname, tmpbuf, len);
5347         else {
5348             sv_setsv(PL_subname,PL_curstname);
5349             sv_catpvs(PL_subname,"::");
5350             sv_catpvn(PL_subname,tmpbuf,len);
5351         }
5352         if (SvUTF8(PL_linestr))
5353             SvUTF8_on(PL_subname);
5354         have_name = TRUE;
5355
5356         s = skipspace(d);
5357     }
5358     else {
5359         if (key == KEY_my || key == KEY_our || key==KEY_state) {
5360             *d = '\0';
5361             /* diag_listed_as: Missing name in "%s sub" */
5362             Perl_croak(aTHX_
5363                       "Missing name in \"%s\"", PL_bufptr);
5364         }
5365         PL_expect = XATTRTERM;
5366         sv_setpvs(PL_subname,"?");
5367         have_name = FALSE;
5368     }
5369
5370     if (key == KEY_format) {
5371         if (format_name) {
5372             NEXTVAL_NEXTTOKE.opval
5373                 = newSVOP(OP_CONST,0, format_name);
5374             NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
5375             force_next(BAREWORD);
5376         }
5377         PREBLOCK(FORMAT);
5378     }
5379
5380     /* Look for a prototype */
5381     if (*s == '(' && !is_sigsub) {
5382         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5383         if (!s)
5384             Perl_croak(aTHX_ "Prototype not terminated");
5385         COPLINE_SET_FROM_MULTI_END;
5386         (void)validate_proto(PL_subname, PL_lex_stuff,
5387                              ckWARN(WARN_ILLEGALPROTO), 0);
5388         have_proto = TRUE;
5389
5390         s = skipspace(s);
5391     }
5392     else
5393         have_proto = FALSE;
5394
5395     if (  !(*s == ':' && s[1] != ':')
5396         && (*s != '{' && *s != '(') && key != KEY_format)
5397     {
5398         assert(key == KEY_sub || key == KEY_AUTOLOAD ||
5399                key == KEY_DESTROY || key == KEY_BEGIN ||
5400                key == KEY_UNITCHECK || key == KEY_CHECK ||
5401                key == KEY_INIT || key == KEY_END ||
5402                key == KEY_my || key == KEY_state ||
5403                key == KEY_our);
5404         if (!have_name)
5405             Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5406         else if (*s != ';' && *s != '}')
5407             Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
5408     }
5409
5410     if (have_proto) {
5411         NEXTVAL_NEXTTOKE.opval =
5412             newSVOP(OP_CONST, 0, PL_lex_stuff);
5413         PL_lex_stuff = NULL;
5414         force_next(THING);
5415     }
5416     if (!have_name) {
5417         if (PL_curstash)
5418             sv_setpvs(PL_subname, "__ANON__");
5419         else
5420             sv_setpvs(PL_subname, "__ANON__::__ANON__");
5421         if (is_sigsub)
5422             TOKEN(ANON_SIGSUB);
5423         else
5424             TOKEN(ANONSUB);
5425     }
5426     force_ident_maybe_lex('&');
5427     if (is_sigsub)
5428         TOKEN(SIGSUB);
5429     else
5430         TOKEN(SUB);
5431 }
5432
5433 static int
5434 yyl_interpcasemod(pTHX_ char *s)
5435 {
5436 #ifdef DEBUGGING
5437     if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5438         Perl_croak(aTHX_
5439                    "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
5440                    PL_bufptr, PL_bufend, *PL_bufptr);
5441 #endif
5442
5443     if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
5444         /* if at a \E */
5445         if (PL_lex_casemods) {
5446             const char oldmod = PL_lex_casestack[--PL_lex_casemods];
5447             PL_lex_casestack[PL_lex_casemods] = '\0';
5448
5449             if (PL_bufptr != PL_bufend
5450                 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
5451                     || oldmod == 'F')) {
5452                 PL_bufptr += 2;
5453                 PL_lex_state = LEX_INTERPCONCAT;
5454             }
5455             PL_lex_allbrackets--;
5456             return REPORT(PERLY_PAREN_CLOSE);
5457         }
5458         else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
5459            /* Got an unpaired \E */
5460            Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5461                     "Useless use of \\E");
5462         }
5463         if (PL_bufptr != PL_bufend)
5464             PL_bufptr += 2;
5465         PL_lex_state = LEX_INTERPCONCAT;
5466         return yylex();
5467     }
5468     else {
5469         DEBUG_T({
5470             PerlIO_printf(Perl_debug_log, "### Saw case modifier\n");
5471         });
5472         s = PL_bufptr + 1;
5473         if (s[1] == '\\' && s[2] == 'E') {
5474             PL_bufptr = s + 3;
5475             PL_lex_state = LEX_INTERPCONCAT;
5476             return yylex();
5477         }
5478         else {
5479             I32 tmp;
5480             if (   memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
5481                 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
5482             {
5483                 tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
5484             }
5485             if ((*s == 'L' || *s == 'U' || *s == 'F')
5486                 && (strpbrk(PL_lex_casestack, "LUF")))
5487             {
5488                 PL_lex_casestack[--PL_lex_casemods] = '\0';
5489                 PL_lex_allbrackets--;
5490                 return REPORT(PERLY_PAREN_CLOSE);
5491             }
5492             if (PL_lex_casemods > 10)
5493                 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
5494             PL_lex_casestack[PL_lex_casemods++] = *s;
5495             PL_lex_casestack[PL_lex_casemods] = '\0';
5496             PL_lex_state = LEX_INTERPCONCAT;
5497             NEXTVAL_NEXTTOKE.ival = 0;
5498             force_next((2<<24)|PERLY_PAREN_OPEN);
5499             if (*s == 'l')
5500                 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
5501             else if (*s == 'u')
5502                 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
5503             else if (*s == 'L')
5504                 NEXTVAL_NEXTTOKE.ival = OP_LC;
5505             else if (*s == 'U')
5506                 NEXTVAL_NEXTTOKE.ival = OP_UC;
5507             else if (*s == 'Q')
5508                 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
5509             else if (*s == 'F')
5510                 NEXTVAL_NEXTTOKE.ival = OP_FC;
5511             else
5512                 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5513             PL_bufptr = s + 1;
5514         }
5515         force_next(FUNC);
5516         if (PL_lex_starts) {
5517             s = PL_bufptr;
5518             PL_lex_starts = 0;
5519             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5520             if (PL_lex_casemods == 1 && PL_lex_inpat)
5521                 TOKEN(PERLY_COMMA);
5522             else
5523                 AopNOASSIGN(OP_CONCAT);
5524         }
5525         else
5526             return yylex();
5527     }
5528 }
5529
5530 static int
5531 yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword,
5532                         GV **pgv, GV ***pgvp)
5533 {
5534     GV *ogv = NULL;     /* override (winner) */
5535     GV *hgv = NULL;     /* hidden (loser) */
5536     GV *gv = *pgv;
5537
5538     if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5539         CV *cv;
5540         if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5541                                     (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5542                                     SVt_PVCV))
5543             && (cv = GvCVu(gv)))
5544         {
5545             if (GvIMPORTED_CV(gv))
5546                 ogv = gv;
5547             else if (! CvMETHOD(cv))
5548                 hgv = gv;
5549         }
5550         if (!ogv
5551             && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE))
5552             && (gv = **pgvp)
5553             && (isGV_with_GP(gv)
5554                 ? GvCVu(gv) && GvIMPORTED_CV(gv)
5555                 :   SvPCS_IMPORTED(gv)
5556                 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
5557                                                          len, 0), 1)))
5558         {
5559             ogv = gv;
5560         }
5561     }
5562
5563     *pgv = gv;
5564
5565     if (ogv) {
5566         *orig_keyword = key;
5567         return 0;               /* overridden by import or by GLOBAL */
5568     }
5569     else if (gv && !*pgvp
5570              && -key==KEY_lock  /* XXX generalizable kludge */
5571              && GvCVu(gv))
5572     {
5573         return 0;               /* any sub overrides "weak" keyword */
5574     }
5575     else {                      /* no override */
5576         key = -key;
5577         if (key == KEY_dump) {
5578             Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
5579         }
5580         *pgv = NULL;
5581         *pgvp = 0;
5582         if (hgv && key != KEY_x)        /* never ambiguous */
5583             Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5584                            "Ambiguous call resolved as CORE::%s(), "
5585                            "qualify as such or use &",
5586                            GvENAME(hgv));
5587         return key;
5588     }
5589 }
5590
5591 static int
5592 yyl_qw(pTHX_ char *s, STRLEN len)
5593 {
5594     OP *words = NULL;
5595
5596     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5597     if (!s)
5598         missingterm(NULL, 0);
5599
5600     COPLINE_SET_FROM_MULTI_END;
5601     PL_expect = XOPERATOR;
5602     if (SvCUR(PL_lex_stuff)) {
5603         int warned_comma = !ckWARN(WARN_QW);
5604         int warned_comment = warned_comma;
5605         char *d = SvPV_force(PL_lex_stuff, len);
5606         while (len) {
5607             for (; isSPACE(*d) && len; --len, ++d)
5608                 /**/;
5609             if (len) {
5610                 SV *sv;
5611                 const char *b = d;
5612                 if (!warned_comma || !warned_comment) {
5613                     for (; !isSPACE(*d) && len; --len, ++d) {
5614                         if (!warned_comma && *d == ',') {
5615                             Perl_warner(aTHX_ packWARN(WARN_QW),
5616                                 "Possible attempt to separate words with commas");
5617                             ++warned_comma;
5618                         }
5619                         else if (!warned_comment && *d == '#') {
5620                             Perl_warner(aTHX_ packWARN(WARN_QW),
5621                                 "Possible attempt to put comments in qw() list");
5622                             ++warned_comment;
5623                         }
5624                     }
5625                 }
5626                 else {
5627                     for (; !isSPACE(*d) && len; --len, ++d)
5628                         /**/;
5629                 }
5630                 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
5631                 words = op_append_elem(OP_LIST, words,
5632                                        newSVOP(OP_CONST, 0, tokeq(sv)));
5633             }
5634         }
5635     }
5636     if (!words)
5637         words = newNULLLIST();
5638     SvREFCNT_dec_NN(PL_lex_stuff);
5639     PL_lex_stuff = NULL;
5640     PL_expect = XOPERATOR;
5641     pl_yylval.opval = sawparens(words);
5642     TOKEN(QWLIST);
5643 }
5644
5645 static int
5646 yyl_hyphen(pTHX_ char *s)
5647 {
5648     if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5649         I32 ftst = 0;
5650         char tmp;
5651
5652         s++;
5653         PL_bufptr = s;
5654         tmp = *s++;
5655
5656         while (s < PL_bufend && SPACE_OR_TAB(*s))
5657             s++;
5658
5659         if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5660             s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5661             DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5662             OPERATOR(PERLY_MINUS);              /* unary minus */
5663         }
5664         switch (tmp) {
5665         case 'r': ftst = OP_FTEREAD;    break;
5666         case 'w': ftst = OP_FTEWRITE;   break;
5667         case 'x': ftst = OP_FTEEXEC;    break;
5668         case 'o': ftst = OP_FTEOWNED;   break;
5669         case 'R': ftst = OP_FTRREAD;    break;
5670         case 'W': ftst = OP_FTRWRITE;   break;
5671         case 'X': ftst = OP_FTREXEC;    break;
5672         case 'O': ftst = OP_FTROWNED;   break;
5673         case 'e': ftst = OP_FTIS;       break;
5674         case 'z': ftst = OP_FTZERO;     break;
5675         case 's': ftst = OP_FTSIZE;     break;
5676         case 'f': ftst = OP_FTFILE;     break;
5677         case 'd': ftst = OP_FTDIR;      break;
5678         case 'l': ftst = OP_FTLINK;     break;
5679         case 'p': ftst = OP_FTPIPE;     break;
5680         case 'S': ftst = OP_FTSOCK;     break;
5681         case 'u': ftst = OP_FTSUID;     break;
5682         case 'g': ftst = OP_FTSGID;     break;
5683         case 'k': ftst = OP_FTSVTX;     break;
5684         case 'b': ftst = OP_FTBLK;      break;
5685         case 'c': ftst = OP_FTCHR;      break;
5686         case 't': ftst = OP_FTTTY;      break;
5687         case 'T': ftst = OP_FTTEXT;     break;
5688         case 'B': ftst = OP_FTBINARY;   break;
5689         case 'M': case 'A': case 'C':
5690             gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5691             switch (tmp) {
5692             case 'M': ftst = OP_FTMTIME; break;
5693             case 'A': ftst = OP_FTATIME; break;
5694             case 'C': ftst = OP_FTCTIME; break;
5695             default:                     break;
5696             }
5697             break;
5698         default:
5699             break;
5700         }
5701         if (ftst) {
5702             PL_last_uni = PL_oldbufptr;
5703             PL_last_lop_op = (OPCODE)ftst;
5704             DEBUG_T( {
5705                 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp);
5706             } );
5707             FTST(ftst);
5708         }
5709         else {
5710             /* Assume it was a minus followed by a one-letter named
5711              * subroutine call (or a -bareword), then. */
5712             DEBUG_T( {
5713                 PerlIO_printf(Perl_debug_log,
5714                     "### '-%c' looked like a file test but was not\n",
5715                     (int) tmp);
5716             } );
5717             s = --PL_bufptr;
5718         }
5719     }
5720     {
5721         const char tmp = *s++;
5722         if (*s == tmp) {
5723             s++;
5724             if (PL_expect == XOPERATOR)
5725                 TERM(POSTDEC);
5726             else
5727                 OPERATOR(PREDEC);
5728         }
5729         else if (*s == '>') {
5730             s++;
5731             s = skipspace(s);
5732             if (((*s == '$' || *s == '&') && s[1] == '*')
5733               ||(*s == '$' && s[1] == '#' && s[2] == '*')
5734               ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1]))
5735               ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5736              )
5737             {
5738                 PL_expect = XPOSTDEREF;
5739                 TOKEN(ARROW);
5740             }
5741             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5742                 s = force_word(s,METHOD,FALSE,TRUE);
5743                 TOKEN(ARROW);
5744             }
5745             else if (*s == '$')
5746                 OPERATOR(ARROW);
5747             else
5748                 TERM(ARROW);
5749         }
5750         if (PL_expect == XOPERATOR) {
5751             if (*s == '='
5752                 && !PL_lex_allbrackets
5753                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5754             {
5755                 s--;
5756                 TOKEN(0);
5757             }
5758             Aop(OP_SUBTRACT);
5759         }
5760         else {
5761             if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5762                 check_uni();
5763             OPERATOR(PERLY_MINUS);              /* unary minus */
5764         }
5765     }
5766 }
5767
5768 static int
5769 yyl_plus(pTHX_ char *s)
5770 {
5771     const char tmp = *s++;
5772     if (*s == tmp) {
5773         s++;
5774         if (PL_expect == XOPERATOR)
5775             TERM(POSTINC);
5776         else
5777             OPERATOR(PREINC);
5778     }
5779     if (PL_expect == XOPERATOR) {
5780         if (*s == '='
5781             && !PL_lex_allbrackets
5782             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5783         {
5784             s--;
5785             TOKEN(0);
5786         }
5787         Aop(OP_ADD);
5788     }
5789     else {
5790         if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5791             check_uni();
5792         OPERATOR(PERLY_PLUS);
5793     }
5794 }
5795
5796 static int
5797 yyl_star(pTHX_ char *s)
5798 {
5799     if (PL_expect == XPOSTDEREF)
5800         POSTDEREF(PERLY_STAR);
5801
5802     if (PL_expect != XOPERATOR) {
5803         s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5804         PL_expect = XOPERATOR;
5805         force_ident(PL_tokenbuf, PERLY_STAR);
5806         if (!*PL_tokenbuf)
5807             PREREF(PERLY_STAR);
5808         TERM(PERLY_STAR);
5809     }
5810
5811     s++;
5812     if (*s == '*') {
5813         s++;
5814         if (*s == '=' && !PL_lex_allbrackets
5815             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5816         {
5817             s -= 2;
5818             TOKEN(0);
5819         }
5820         PWop(OP_POW);
5821     }
5822
5823     if (*s == '='
5824         && !PL_lex_allbrackets
5825         && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5826     {
5827         s--;
5828         TOKEN(0);
5829     }
5830
5831     Mop(OP_MULTIPLY);
5832 }
5833
5834 static int
5835 yyl_percent(pTHX_ char *s)
5836 {
5837     if (PL_expect == XOPERATOR) {
5838         if (s[1] == '='
5839             && !PL_lex_allbrackets
5840             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5841         {
5842             TOKEN(0);
5843         }
5844         ++s;
5845         Mop(OP_MODULO);
5846     }
5847     else if (PL_expect == XPOSTDEREF)
5848         POSTDEREF(PERLY_PERCENT_SIGN);
5849
5850     PL_tokenbuf[0] = '%';
5851     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5852     pl_yylval.ival = 0;
5853     if (!PL_tokenbuf[1]) {
5854         PREREF(PERLY_PERCENT_SIGN);
5855     }
5856     if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5857         && intuit_more(s, PL_bufend)) {
5858         if (*s == '[')
5859             PL_tokenbuf[0] = '@';
5860     }
5861     PL_expect = XOPERATOR;
5862     force_ident_maybe_lex('%');
5863     TERM(PERLY_PERCENT_SIGN);
5864 }
5865
5866 static int
5867 yyl_caret(pTHX_ char *s)
5868 {
5869     char *d = s;
5870     const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED);
5871     if (bof && s[1] == '.')
5872         s++;
5873     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5874             (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5875     {
5876         s = d;
5877         TOKEN(0);
5878     }
5879     s++;
5880     BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5881 }
5882
5883 static int
5884 yyl_colon(pTHX_ char *s)
5885 {
5886     OP *attrs;
5887
5888     switch (PL_expect) {
5889     case XOPERATOR:
5890         if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
5891             break;
5892         PL_bufptr = s;  /* update in case we back off */
5893         if (*s == '=') {
5894             Perl_croak(aTHX_
5895                        "Use of := for an empty attribute list is not allowed");
5896         }
5897         goto grabattrs;
5898     case XATTRBLOCK:
5899         PL_expect = XBLOCK;
5900         goto grabattrs;
5901     case XATTRTERM:
5902         PL_expect = XTERMBLOCK;
5903      grabattrs:
5904         /* NB: as well as parsing normal attributes, we also end up
5905          * here if there is something looking like attributes
5906          * following a signature (which is illegal, but used to be
5907          * legal in 5.20..5.26). If the latter, we still parse the
5908          * attributes so that error messages(s) are less confusing,
5909          * but ignore them (parser->sig_seen).
5910          */
5911         s = skipspace(s);
5912         attrs = NULL;
5913         while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5914             bool sig = PL_parser->sig_seen;
5915             I32 tmp;
5916             SV *sv;
5917             STRLEN len;
5918             char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5919             if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5920                 if (tmp < 0) tmp = -tmp;
5921                 switch (tmp) {
5922                 case KEY_or:
5923                 case KEY_and:
5924                 case KEY_for:
5925                 case KEY_foreach:
5926                 case KEY_unless:
5927                 case KEY_if:
5928                 case KEY_while:
5929                 case KEY_until:
5930                     goto got_attrs;
5931                 default:
5932                     break;
5933                 }
5934             }
5935             sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5936             if (*d == '(') {
5937                 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5938                 if (!d) {
5939                     if (attrs)
5940                         op_free(attrs);
5941                     sv_free(sv);
5942                     Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5943                 }
5944                 COPLINE_SET_FROM_MULTI_END;
5945             }
5946             if (PL_lex_stuff) {
5947                 sv_catsv(sv, PL_lex_stuff);
5948                 attrs = op_append_elem(OP_LIST, attrs,
5949                                     newSVOP(OP_CONST, 0, sv));
5950                 SvREFCNT_dec_NN(PL_lex_stuff);
5951                 PL_lex_stuff = NULL;
5952             }
5953             else {
5954                 /* NOTE: any CV attrs applied here need to be part of
5955                    the CVf_BUILTIN_ATTRS define in cv.h! */
5956                 if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
5957                     sv_free(sv);
5958                     if (!sig)
5959                         CvLVALUE_on(PL_compcv);
5960                 }
5961                 else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
5962                     sv_free(sv);
5963                     if (!sig)
5964                         CvMETHOD_on(PL_compcv);
5965                 }
5966                 else if (!PL_in_my && memEQs(SvPVX(sv), len, "const")) {
5967                     sv_free(sv);
5968                     if (!sig) {
5969                         Perl_ck_warner_d(aTHX_
5970                             packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5971                            ":const is experimental"
5972                         );
5973                         CvANONCONST_on(PL_compcv);
5974                         if (!CvANON(PL_compcv))
5975                             yyerror(":const is not permitted on named "
5976                                     "subroutines");
5977                     }
5978                 }
5979                 /* After we've set the flags, it could be argued that
5980                    we don't need to do the attributes.pm-based setting
5981                    process, and shouldn't bother appending recognized
5982                    flags.  To experiment with that, uncomment the
5983                    following "else".  (Note that's already been
5984                    uncommented.  That keeps the above-applied built-in
5985                    attributes from being intercepted (and possibly
5986                    rejected) by a package's attribute routines, but is
5987                    justified by the performance win for the common case
5988                    of applying only built-in attributes.) */
5989                 else
5990                     attrs = op_append_elem(OP_LIST, attrs,
5991                                         newSVOP(OP_CONST, 0,
5992                                                 sv));
5993             }
5994             s = skipspace(d);
5995             if (*s == ':' && s[1] != ':')
5996                 s = skipspace(s+1);
5997             else if (s == d)
5998                 break;  /* require real whitespace or :'s */
5999             /* XXX losing whitespace on sequential attributes here */
6000         }
6001
6002         if (*s != ';'
6003             && *s != '}'
6004             && !(PL_expect == XOPERATOR
6005                  ? (*s == '=' ||  *s == ')')
6006                  : (*s == '{' ||  *s == '(')))
6007         {
6008             const char q = ((*s == '\'') ? '"' : '\'');
6009             /* If here for an expression, and parsed no attrs, back off. */
6010             if (PL_expect == XOPERATOR && !attrs) {
6011                 s = PL_bufptr;
6012                 break;
6013             }
6014             /* MUST advance bufptr here to avoid bogus "at end of line"
6015                context messages from yyerror().
6016             */
6017             PL_bufptr = s;
6018             yyerror( (const char *)
6019                      (*s
6020                       ? Perl_form(aTHX_ "Invalid separator character "
6021                                   "%c%c%c in attribute list", q, *s, q)
6022                       : "Unterminated attribute list" ) );
6023             if (attrs)
6024                 op_free(attrs);
6025             OPERATOR(PERLY_COLON);
6026         }
6027
6028     got_attrs:
6029         if (PL_parser->sig_seen) {
6030             /* see comment about about sig_seen and parser error
6031              * handling */
6032             if (attrs)
6033                 op_free(attrs);
6034             Perl_croak(aTHX_ "Subroutine attributes must come "
6035                              "before the signature");
6036         }
6037         if (attrs) {
6038             NEXTVAL_NEXTTOKE.opval = attrs;
6039             force_next(THING);
6040         }
6041         TOKEN(COLONATTR);
6042     }
6043
6044     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6045         s--;
6046         TOKEN(0);
6047     }
6048
6049     PL_lex_allbrackets--;
6050     OPERATOR(PERLY_COLON);
6051 }
6052
6053 static int
6054 yyl_subproto(pTHX_ char *s, CV *cv)
6055 {
6056     STRLEN protolen = CvPROTOLEN(cv);
6057     const char *proto = CvPROTO(cv);
6058     bool optional;
6059
6060     proto = S_strip_spaces(aTHX_ proto, &protolen);
6061     if (!protolen)
6062         TERM(FUNC0SUB);
6063     if ((optional = *proto == ';')) {
6064         do {
6065             proto++;
6066         } while (*proto == ';');
6067     }
6068
6069     if (
6070         (
6071             (
6072                 *proto == '$' || *proto == '_'
6073              || *proto == '*' || *proto == '+'
6074             )
6075          && proto[1] == '\0'
6076         )
6077      || (
6078          *proto == '\\' && proto[1] && proto[2] == '\0'
6079         )
6080     ) {
6081         UNIPROTO(UNIOPSUB,optional);
6082     }
6083
6084     if (*proto == '\\' && proto[1] == '[') {
6085         const char *p = proto + 2;
6086         while(*p && *p != ']')
6087             ++p;
6088         if(*p == ']' && !p[1])
6089             UNIPROTO(UNIOPSUB,optional);
6090     }
6091
6092     if (*proto == '&' && *s == '{') {
6093         if (PL_curstash)
6094             sv_setpvs(PL_subname, "__ANON__");
6095         else
6096             sv_setpvs(PL_subname, "__ANON__::__ANON__");
6097         if (!PL_lex_allbrackets
6098             && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6099         {
6100             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6101         }
6102         PREBLOCK(LSTOPSUB);
6103     }
6104
6105     return KEY_NULL;
6106 }
6107
6108 static int
6109 yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
6110 {
6111     char *d;
6112     if (PL_lex_brackets > 100) {
6113         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6114     }
6115
6116     switch (PL_expect) {
6117     case XTERM:
6118     case XTERMORDORDOR:
6119         PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6120         PL_lex_allbrackets++;
6121         OPERATOR(HASHBRACK);
6122     case XOPERATOR:
6123         while (s < PL_bufend && SPACE_OR_TAB(*s))
6124             s++;
6125         d = s;
6126         PL_tokenbuf[0] = '\0';
6127         if (d < PL_bufend && *d == '-') {
6128             PL_tokenbuf[0] = '-';
6129             d++;
6130             while (d < PL_bufend && SPACE_OR_TAB(*d))
6131                 d++;
6132         }
6133         if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6134             STRLEN len;
6135             d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6136                           FALSE, &len);
6137             while (d < PL_bufend && SPACE_OR_TAB(*d))
6138                 d++;
6139             if (*d == '}') {
6140                 const char minus = (PL_tokenbuf[0] == '-');
6141                 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6142                 if (minus)
6143                     force_next(PERLY_MINUS);
6144             }
6145         }
6146         /* FALLTHROUGH */
6147     case XATTRTERM:
6148     case XTERMBLOCK:
6149         PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6150         PL_lex_allbrackets++;
6151         PL_expect = XSTATE;
6152         break;
6153     case XATTRBLOCK:
6154     case XBLOCK:
6155         PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6156         PL_lex_allbrackets++;
6157         PL_expect = XSTATE;
6158         break;
6159     case XBLOCKTERM:
6160         PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6161         PL_lex_allbrackets++;
6162         PL_expect = XSTATE;
6163         break;
6164     default: {
6165             const char *t;
6166             if (PL_oldoldbufptr == PL_last_lop)
6167                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6168             else
6169                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6170             PL_lex_allbrackets++;
6171             s = skipspace(s);
6172             if (*s == '}') {
6173                 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6174                     PL_expect = XTERM;
6175                     /* This hack is to get the ${} in the message. */
6176                     PL_bufptr = s+1;
6177                     yyerror("syntax error");
6178                     break;
6179                 }
6180                 OPERATOR(HASHBRACK);
6181             }
6182             if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6183                 /* ${...} or @{...} etc., but not print {...}
6184                  * Skip the disambiguation and treat this as a block.
6185                  */
6186                 goto block_expectation;
6187             }
6188             /* This hack serves to disambiguate a pair of curlies
6189              * as being a block or an anon hash.  Normally, expectation
6190              * determines that, but in cases where we're not in a
6191              * position to expect anything in particular (like inside
6192              * eval"") we have to resolve the ambiguity.  This code
6193              * covers the case where the first term in the curlies is a
6194              * quoted string.  Most other cases need to be explicitly
6195              * disambiguated by prepending a "+" before the opening
6196              * curly in order to force resolution as an anon hash.
6197              *
6198              * XXX should probably propagate the outer expectation
6199              * into eval"" to rely less on this hack, but that could
6200              * potentially break current behavior of eval"".
6201              * GSAR 97-07-21
6202              */
6203             t = s;
6204             if (*s == '\'' || *s == '"' || *s == '`') {
6205                 /* common case: get past first string, handling escapes */
6206                 for (t++; t < PL_bufend && *t != *s;)
6207                     if (*t++ == '\\')
6208                         t++;
6209                 t++;
6210             }
6211             else if (*s == 'q') {
6212                 if (++t < PL_bufend
6213                     && (!isWORDCHAR(*t)
6214                         || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6215                             && !isWORDCHAR(*t))))
6216                 {
6217                     /* skip q//-like construct */
6218                     const char *tmps;
6219                     char open, close, term;
6220                     I32 brackets = 1;
6221
6222                     while (t < PL_bufend && isSPACE(*t))
6223                         t++;
6224                     /* check for q => */
6225                     if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6226                         OPERATOR(HASHBRACK);
6227                     }
6228                     term = *t;
6229                     open = term;
6230                     if (term && (tmps = memCHRs("([{< )]}> )]}>",term)))
6231                         term = tmps[5];
6232                     close = term;
6233                     if (open == close)
6234                         for (t++; t < PL_bufend; t++) {
6235                             if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6236                                 t++;
6237                             else if (*t == open)
6238                                 break;
6239                         }
6240                     else {
6241                         for (t++; t < PL_bufend; t++) {
6242                             if (*t == '\\' && t+1 < PL_bufend)
6243                                 t++;
6244                             else if (*t == close && --brackets <= 0)
6245                                 break;
6246                             else if (*t == open)
6247                                 brackets++;
6248                         }
6249                     }
6250                     t++;
6251                 }
6252                 else
6253                     /* skip plain q word */
6254                     while (   t < PL_bufend
6255                            && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6256                     {
6257                         t += UTF ? UTF8SKIP(t) : 1;
6258                     }
6259             }
6260             else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6261                 t += UTF ? UTF8SKIP(t) : 1;
6262                 while (   t < PL_bufend
6263                        && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6264                 {
6265                     t += UTF ? UTF8SKIP(t) : 1;
6266                 }
6267             }
6268             while (t < PL_bufend && isSPACE(*t))
6269                 t++;
6270             /* if comma follows first term, call it an anon hash */
6271             /* XXX it could be a comma expression with loop modifiers */
6272             if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6273                                || (*t == '=' && t[1] == '>')))
6274                 OPERATOR(HASHBRACK);
6275             if (PL_expect == XREF) {
6276               block_expectation:
6277                 /* If there is an opening brace or 'sub:', treat it
6278                    as a term to make ${{...}}{k} and &{sub:attr...}
6279                    dwim.  Otherwise, treat it as a statement, so
6280                    map {no strict; ...} works.
6281                  */
6282                 s = skipspace(s);
6283                 if (*s == '{') {
6284                     PL_expect = XTERM;
6285                     break;
6286                 }
6287                 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6288                     PL_bufptr = s;
6289                     d = s + 3;
6290                     d = skipspace(d);
6291                     s = PL_bufptr;
6292                     if (*d == ':') {
6293                         PL_expect = XTERM;
6294                         break;
6295                     }
6296                 }
6297                 PL_expect = XSTATE;
6298             }
6299             else {
6300                 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6301                 PL_expect = XSTATE;
6302             }
6303         }
6304         break;
6305     }
6306
6307     pl_yylval.ival = CopLINE(PL_curcop);
6308     PL_copline = NOLINE;   /* invalidate current command line number */
6309     TOKEN(formbrack ? PERLY_EQUAL_SIGN : PERLY_BRACE_OPEN);
6310 }
6311
6312 static int
6313 yyl_rightcurly(pTHX_ char *s, const U8 formbrack)
6314 {
6315     assert(s != PL_bufend);
6316     s++;
6317
6318     if (PL_lex_brackets <= 0)
6319         /* diag_listed_as: Unmatched right %s bracket */
6320         yyerror("Unmatched right curly bracket");
6321     else
6322         PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6323
6324     PL_lex_allbrackets--;
6325
6326     if (PL_lex_state == LEX_INTERPNORMAL) {
6327         if (PL_lex_brackets == 0) {
6328             if (PL_expect & XFAKEBRACK) {
6329                 PL_expect &= XENUMMASK;
6330                 PL_lex_state = LEX_INTERPEND;
6331                 PL_bufptr = s;
6332                 return yylex(); /* ignore fake brackets */
6333             }
6334             if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6335              && SvEVALED(PL_lex_repl))
6336                 PL_lex_state = LEX_INTERPEND;
6337             else if (*s == '-' && s[1] == '>')
6338                 PL_lex_state = LEX_INTERPENDMAYBE;
6339             else if (*s != '[' && *s != '{')
6340                 PL_lex_state = LEX_INTERPEND;
6341         }
6342     }
6343
6344     if (PL_expect & XFAKEBRACK) {
6345         PL_expect &= XENUMMASK;
6346         PL_bufptr = s;
6347         return yylex();         /* ignore fake brackets */
6348     }
6349
6350     force_next(formbrack ? PERLY_DOT : PERLY_BRACE_CLOSE);
6351     if (formbrack) LEAVE_with_name("lex_format");
6352     if (formbrack == 2) { /* means . where arguments were expected */
6353         force_next(PERLY_SEMICOLON);
6354         TOKEN(FORMRBRACK);
6355     }
6356
6357     TOKEN(PERLY_SEMICOLON);
6358 }
6359
6360 static int
6361 yyl_ampersand(pTHX_ char *s)
6362 {
6363     if (PL_expect == XPOSTDEREF)
6364         POSTDEREF(PERLY_AMPERSAND);
6365
6366     s++;
6367     if (*s++ == '&') {
6368         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6369                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6370             s -= 2;
6371             TOKEN(0);
6372         }
6373         AOPERATOR(ANDAND);
6374     }
6375     s--;
6376
6377     if (PL_expect == XOPERATOR) {
6378         char *d;
6379         bool bof;
6380         if (   PL_bufptr == PL_linestart
6381             && ckWARN(WARN_SEMICOLON)
6382             && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6383         {
6384             CopLINE_dec(PL_curcop);
6385             Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6386             CopLINE_inc(PL_curcop);
6387         }
6388         d = s;
6389         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6390             s++;
6391         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6392                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6393             s = d;
6394             s--;
6395             TOKEN(0);
6396         }
6397         if (d == s)
6398             BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6399         else
6400             BAop(OP_SBIT_AND);
6401     }
6402
6403     PL_tokenbuf[0] = '&';
6404     s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6405     pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6406
6407     if (PL_tokenbuf[1])
6408         force_ident_maybe_lex('&');
6409     else
6410         PREREF(PERLY_AMPERSAND);
6411
6412     TERM(PERLY_AMPERSAND);
6413 }
6414
6415 static int
6416 yyl_verticalbar(pTHX_ char *s)
6417 {
6418     char *d;
6419     bool bof;
6420
6421     s++;
6422     if (*s++ == '|') {
6423         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6424                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6425             s -= 2;
6426             TOKEN(0);
6427         }
6428         AOPERATOR(OROR);
6429     }
6430
6431     s--;
6432     d = s;
6433     if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6434         s++;
6435
6436     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6437             (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6438         s = d - 1;
6439         TOKEN(0);
6440     }
6441
6442     BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6443 }
6444
6445 static int
6446 yyl_bang(pTHX_ char *s)
6447 {
6448     const char tmp = *s++;
6449     if (tmp == '=') {
6450         /* was this !=~ where !~ was meant?
6451          * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6452
6453         if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6454             const char *t = s+1;
6455
6456             while (t < PL_bufend && isSPACE(*t))
6457                 ++t;
6458
6459             if (*t == '/' || *t == '?'
6460                 || ((*t == 'm' || *t == 's' || *t == 'y')
6461                     && !isWORDCHAR(t[1]))
6462                 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6463                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6464                             "!=~ should be !~");
6465         }
6466
6467         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6468             s -= 2;
6469             TOKEN(0);
6470         }
6471
6472         ChEop(OP_NE);
6473     }
6474
6475     if (tmp == '~')
6476         PMop(OP_NOT);
6477
6478     s--;
6479     OPERATOR(PERLY_EXCLAMATION_MARK);
6480 }
6481
6482 static int
6483 yyl_snail(pTHX_ char *s)
6484 {
6485     if (PL_expect == XPOSTDEREF)
6486         POSTDEREF(PERLY_SNAIL);
6487     PL_tokenbuf[0] = '@';
6488     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6489     if (PL_expect == XOPERATOR) {
6490         char *d = s;
6491         if (PL_bufptr > s) {
6492             d = PL_bufptr-1;
6493             PL_bufptr = PL_oldbufptr;
6494         }
6495         no_op("Array", d);
6496     }
6497     pl_yylval.ival = 0;
6498     if (!PL_tokenbuf[1]) {
6499         PREREF(PERLY_SNAIL);
6500     }
6501     if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6502         s = skipspace(s);
6503     if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6504         && intuit_more(s, PL_bufend))
6505     {
6506         if (*s == '{')
6507             PL_tokenbuf[0] = '%';
6508
6509         /* Warn about @ where they meant $. */
6510         if (*s == '[' || *s == '{') {
6511             if (ckWARN(WARN_SYNTAX)) {
6512                 S_check_scalar_slice(aTHX_ s);
6513             }
6514         }
6515     }
6516     PL_expect = XOPERATOR;
6517     force_ident_maybe_lex('@');
6518     TERM(PERLY_SNAIL);
6519 }
6520
6521 static int
6522 yyl_slash(pTHX_ char *s)
6523 {
6524     if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6525         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6526                 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6527             TOKEN(0);
6528         s += 2;
6529         AOPERATOR(DORDOR);
6530     }
6531     else if (PL_expect == XOPERATOR) {
6532         s++;
6533         if (*s == '=' && !PL_lex_allbrackets
6534             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6535         {
6536             s--;
6537             TOKEN(0);
6538         }
6539         Mop(OP_DIVIDE);
6540     }
6541     else {
6542         /* Disable warning on "study /blah/" */
6543         if (    PL_oldoldbufptr == PL_last_uni
6544             && (   *PL_last_uni != 's' || s - PL_last_uni < 5
6545                 || memNE(PL_last_uni, "study", 5)
6546                 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6547          ))
6548             check_uni();
6549         s = scan_pat(s,OP_MATCH);
6550         TERM(sublex_start());
6551     }
6552 }
6553
6554 static int
6555 yyl_leftsquare(pTHX_ char *s)
6556 {
6557     if (PL_lex_brackets > 100)
6558         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6559     PL_lex_brackstack[PL_lex_brackets++] = 0;
6560     PL_lex_allbrackets++;
6561     s++;
6562     OPERATOR(PERLY_BRACKET_OPEN);
6563 }
6564
6565 static int
6566 yyl_rightsquare(pTHX_ char *s)
6567 {
6568     if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6569         TOKEN(0);
6570     s++;
6571     if (PL_lex_brackets <= 0)
6572         /* diag_listed_as: Unmatched right %s bracket */
6573         yyerror("Unmatched right square bracket");
6574     else
6575         --PL_lex_brackets;
6576     PL_lex_allbrackets--;
6577     if (PL_lex_state == LEX_INTERPNORMAL) {
6578         if (PL_lex_brackets == 0) {
6579             if (*s == '-' && s[1] == '>')
6580                 PL_lex_state = LEX_INTERPENDMAYBE;
6581             else if (*s != '[' && *s != '{')
6582                 PL_lex_state = LEX_INTERPEND;
6583         }
6584     }
6585     TERM(PERLY_BRACKET_CLOSE);
6586 }
6587
6588 static int
6589 yyl_tilde(pTHX_ char *s)
6590 {
6591     bool bof;
6592     if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) {
6593         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6594             TOKEN(0);
6595         s += 2;
6596         Perl_ck_warner_d(aTHX_
6597             packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
6598             "Smartmatch is experimental");
6599         NCEop(OP_SMARTMATCH);
6600     }
6601     s++;
6602     if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
6603         s++;
6604         BCop(OP_SCOMPLEMENT);
6605     }
6606     BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
6607 }
6608
6609 static int
6610 yyl_leftparen(pTHX_ char *s)
6611 {
6612     if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6613         PL_oldbufptr = PL_oldoldbufptr;         /* allow print(STDOUT 123) */
6614     else
6615         PL_expect = XTERM;
6616     s = skipspace(s);
6617     PL_lex_allbrackets++;
6618     TOKEN(PERLY_PAREN_OPEN);
6619 }
6620
6621 static int
6622 yyl_rightparen(pTHX_ char *s)
6623 {
6624     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6625         TOKEN(0);
6626     s++;
6627     PL_lex_allbrackets--;
6628     s = skipspace(s);
6629     if (*s == '{')
6630         PREBLOCK(PERLY_PAREN_CLOSE);
6631     TERM(PERLY_PAREN_CLOSE);
6632 }
6633
6634 static int
6635 yyl_leftpointy(pTHX_ char *s)
6636 {
6637     char tmp;
6638
6639     if (PL_expect != XOPERATOR) {
6640         if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6641             check_uni();
6642         if (s[1] == '<' && s[2] != '>')
6643             s = scan_heredoc(s);
6644         else
6645             s = scan_inputsymbol(s);
6646         PL_expect = XOPERATOR;
6647         TOKEN(sublex_start());
6648     }
6649
6650     s++;
6651
6652     tmp = *s++;
6653     if (tmp == '<') {
6654         if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6655             s -= 2;
6656             TOKEN(0);
6657         }
6658         SHop(OP_LEFT_SHIFT);
6659     }
6660     if (tmp == '=') {
6661         tmp = *s++;
6662         if (tmp == '>') {
6663             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6664                 s -= 3;
6665                 TOKEN(0);
6666             }
6667             NCEop(OP_NCMP);
6668         }
6669         s--;
6670         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6671             s -= 2;
6672             TOKEN(0);
6673         }
6674         ChRop(OP_LE);
6675     }
6676
6677     s--;
6678     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6679         s--;
6680         TOKEN(0);
6681     }
6682
6683     ChRop(OP_LT);
6684 }
6685
6686 static int
6687 yyl_rightpointy(pTHX_ char *s)
6688 {
6689     const char tmp = *s++;
6690
6691     if (tmp == '>') {
6692         if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6693             s -= 2;
6694             TOKEN(0);
6695         }
6696         SHop(OP_RIGHT_SHIFT);
6697     }
6698     else if (tmp == '=') {
6699         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6700             s -= 2;
6701             TOKEN(0);
6702         }
6703         ChRop(OP_GE);
6704     }
6705
6706     s--;
6707     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6708         s--;
6709         TOKEN(0);
6710     }
6711
6712     ChRop(OP_GT);
6713 }
6714
6715 static int
6716 yyl_sglquote(pTHX_ char *s)
6717 {
6718     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6719     if (!s)
6720         missingterm(NULL, 0);
6721     COPLINE_SET_FROM_MULTI_END;
6722     DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6723     if (PL_expect == XOPERATOR) {
6724         no_op("String",s);
6725     }
6726     pl_yylval.ival = OP_CONST;
6727     TERM(sublex_start());
6728 }
6729
6730 static int
6731 yyl_dblquote(pTHX_ char *s)
6732 {
6733     char *d;
6734     STRLEN len;
6735     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6736     DEBUG_T( {
6737         if (s)
6738             printbuf("### Saw string before %s\n", s);
6739         else
6740             PerlIO_printf(Perl_debug_log,
6741                          "### Saw unterminated string\n");
6742     } );
6743     if (PL_expect == XOPERATOR) {
6744             no_op("String",s);
6745     }
6746     if (!s)
6747         missingterm(NULL, 0);
6748     pl_yylval.ival = OP_CONST;
6749     /* FIXME. I think that this can be const if char *d is replaced by
6750        more localised variables.  */
6751     for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6752         if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6753             pl_yylval.ival = OP_STRINGIFY;
6754             break;
6755         }
6756     }
6757     if (pl_yylval.ival == OP_CONST)
6758         COPLINE_SET_FROM_MULTI_END;
6759     TERM(sublex_start());
6760 }
6761
6762 static int
6763 yyl_backtick(pTHX_ char *s)
6764 {
6765     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6766     DEBUG_T( {
6767         if (s)
6768             printbuf("### Saw backtick string before %s\n", s);
6769         else
6770             PerlIO_printf(Perl_debug_log,
6771                          "### Saw unterminated backtick string\n");
6772     } );
6773     if (PL_expect == XOPERATOR)
6774         no_op("Backticks",s);
6775     if (!s)
6776         missingterm(NULL, 0);
6777     pl_yylval.ival = OP_BACKTICK;
6778     TERM(sublex_start());
6779 }
6780
6781 static int
6782 yyl_backslash(pTHX_ char *s)
6783 {
6784     if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s))
6785         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6786                        *s, *s);
6787     if (PL_expect == XOPERATOR)
6788         no_op("Backslash",s);
6789     OPERATOR(REFGEN);
6790 }
6791
6792 static void
6793 yyl_data_handle(pTHX)
6794 {
6795     HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6796                             ? PL_curstash
6797                             : PL_defstash;
6798     GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6799
6800     if (!isGV(gv))
6801         gv_init(gv,stash,"DATA",4,0);
6802
6803     GvMULTI_on(gv);
6804     if (!GvIO(gv))
6805         GvIOp(gv) = newIO();
6806     IoIFP(GvIOp(gv)) = PL_rsfp;
6807
6808     /* Mark this internal pseudo-handle as clean */
6809     IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6810     if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6811         IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6812     else
6813         IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6814
6815 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6816     /* if the script was opened in binmode, we need to revert
6817      * it to text mode for compatibility; but only iff it has CRs
6818      * XXX this is a questionable hack at best. */
6819     if (PL_bufend-PL_bufptr > 2
6820         && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6821     {
6822         Off_t loc = 0;
6823         if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6824             loc = PerlIO_tell(PL_rsfp);
6825             (void)PerlIO_seek(PL_rsfp, 0L, 0);
6826         }
6827         if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6828             if (loc > 0)
6829                 PerlIO_seek(PL_rsfp, loc, 0);
6830         }
6831     }
6832 #endif
6833
6834 #ifdef PERLIO_LAYERS
6835     if (!IN_BYTES) {
6836         if (UTF)
6837             PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6838     }
6839 #endif
6840
6841     PL_rsfp = NULL;
6842 }
6843
6844 PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*)
6845     __attribute__noreturn__;
6846
6847 PERL_STATIC_NO_RET void
6848 yyl_croak_unrecognised(pTHX_ char *s)
6849 {
6850     SV *dsv = newSVpvs_flags("", SVs_TEMP);
6851     const char *c;
6852     char *d;
6853     STRLEN len;
6854
6855     if (UTF) {
6856         STRLEN skiplen = UTF8SKIP(s);
6857         STRLEN stravail = PL_bufend - s;
6858         c = sv_uni_display(dsv, newSVpvn_flags(s,
6859                                                skiplen > stravail ? stravail : skiplen,
6860                                                SVs_TEMP | SVf_UTF8),
6861                            10, UNI_DISPLAY_ISPRINT);
6862     }
6863     else {
6864         c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
6865     }
6866
6867     if (s >= PL_linestart) {
6868         d = PL_linestart;
6869     }
6870     else {
6871         /* somehow (probably due to a parse failure), PL_linestart has advanced
6872          * pass PL_bufptr, get a reasonable beginning of line
6873          */
6874         d = s;
6875         while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
6876             --d;
6877     }
6878     len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
6879     if (len > UNRECOGNIZED_PRECEDE_COUNT) {
6880         d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
6881     }
6882
6883     Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
6884                       UTF8fARG(UTF, (s - d), d),
6885                      (int) len + 1);
6886 }
6887
6888 static int
6889 yyl_require(pTHX_ char *s, I32 orig_keyword)
6890 {
6891     s = skipspace(s);
6892     if (isDIGIT(*s)) {
6893         s = force_version(s, FALSE);
6894     }
6895     else if (*s != 'v' || !isDIGIT(s[1])
6896             || (s = force_version(s, TRUE), *s == 'v'))
6897     {
6898         *PL_tokenbuf = '\0';
6899         s = force_word(s,BAREWORD,TRUE,TRUE);
6900         if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
6901                                    PL_tokenbuf + sizeof(PL_tokenbuf),
6902                                    UTF))
6903         {
6904             gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
6905                         GV_ADD | (UTF ? SVf_UTF8 : 0));
6906         }
6907         else if (*s == '<')
6908             yyerror("<> at require-statement should be quotes");
6909     }
6910
6911     if (orig_keyword == KEY_require)
6912         pl_yylval.ival = 1;
6913     else
6914         pl_yylval.ival = 0;
6915
6916     PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
6917     PL_bufptr = s;
6918     PL_last_uni = PL_oldbufptr;
6919     PL_last_lop_op = OP_REQUIRE;
6920     s = skipspace(s);
6921     return REPORT( (int)REQUIRE );
6922 }
6923
6924 static int
6925 yyl_foreach(pTHX_ char *s)
6926 {
6927     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6928         return REPORT(0);
6929     pl_yylval.ival = CopLINE(PL_curcop);
6930     s = skipspace(s);
6931     if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6932         char *p = s;
6933         SSize_t s_off = s - SvPVX(PL_linestr);
6934         bool paren_is_valid = FALSE;
6935         bool maybe_package = FALSE;
6936         bool saw_core = FALSE;
6937         bool core_valid = FALSE;
6938
6939         if (UNLIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "CORE::"))) {
6940             saw_core = TRUE;
6941             p += 6;
6942         }
6943         if (LIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "my"))) {
6944             core_valid = TRUE;
6945             paren_is_valid = TRUE;
6946             if (isSPACE(p[2])) {
6947                 p = skipspace(p + 3);
6948                 maybe_package = TRUE;
6949             }
6950             else {
6951                 p += 2;
6952             }
6953         }
6954         else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")) {
6955             core_valid = TRUE;
6956             if (isSPACE(p[3])) {
6957                 p = skipspace(p + 4);
6958                 maybe_package = TRUE;
6959             }
6960             else {
6961                 p += 3;
6962             }
6963         }
6964         else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "state")) {
6965             core_valid = TRUE;
6966             if (isSPACE(p[5])) {
6967                 p = skipspace(p + 6);
6968             }
6969             else {
6970                 p += 5;
6971             }
6972         }
6973         if (saw_core && !core_valid) {
6974             Perl_croak(aTHX_ "Missing $ on loop variable");
6975         }
6976
6977         if (maybe_package && !saw_core) {
6978             /* skip optional package name, as in "for my abc $x (..)" */
6979             if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) {
6980                 STRLEN len;
6981                 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6982                 p = skipspace(p);
6983                 paren_is_valid = FALSE;
6984             }
6985         }
6986
6987         if (UNLIKELY(paren_is_valid && *p == '(')) {
6988             Perl_ck_warner_d(aTHX_
6989                              packWARN(WARN_EXPERIMENTAL__FOR_LIST),
6990                              "for my (...) is experimental");
6991         }
6992         else if (UNLIKELY(*p != '$' && *p != '\\')) {
6993             /* "for myfoo (" will end up here, but with p pointing at the 'f' */
6994             Perl_croak(aTHX_ "Missing $ on loop variable");
6995         }
6996         /* The buffer may have been reallocated, update s */
6997         s = SvPVX(PL_linestr) + s_off;
6998     }
6999     OPERATOR(FOR);
7000 }
7001
7002 static int
7003 yyl_do(pTHX_ char *s, I32 orig_keyword)
7004 {
7005     s = skipspace(s);
7006     if (*s == '{')
7007         PRETERMBLOCK(DO);
7008     if (*s != '\'') {
7009         char *d;
7010         STRLEN len;
7011         *PL_tokenbuf = '&';
7012         d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7013                       1, &len);
7014         if (len && memNEs(PL_tokenbuf+1, len, "CORE")
7015          && !keyword(PL_tokenbuf + 1, len, 0)) {
7016             SSize_t off = s-SvPVX(PL_linestr);
7017             d = skipspace(d);
7018             s = SvPVX(PL_linestr)+off;
7019             if (*d == '(') {
7020                 force_ident_maybe_lex('&');
7021                 s = d;
7022             }
7023         }
7024     }
7025     if (orig_keyword == KEY_do)
7026         pl_yylval.ival = 1;
7027     else
7028         pl_yylval.ival = 0;
7029     OPERATOR(DO);
7030 }
7031
7032 static int
7033 yyl_my(pTHX_ char *s, I32 my)
7034 {
7035     if (PL_in_my) {
7036         PL_bufptr = s;
7037         yyerror(Perl_form(aTHX_
7038                           "Can't redeclare \"%s\" in \"%s\"",
7039                            my       == KEY_my    ? "my" :
7040                            my       == KEY_state ? "state" : "our",
7041                            PL_in_my == KEY_my    ? "my" :
7042                            PL_in_my == KEY_state ? "state" : "our"));
7043     }
7044     PL_in_my = (U16)my;
7045     s = skipspace(s);
7046     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
7047         STRLEN len;
7048         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7049         if (memEQs(PL_tokenbuf, len, "sub"))
7050             return yyl_sub(aTHX_ s, my);
7051         PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7052         if (!PL_in_my_stash) {
7053             char tmpbuf[1024];
7054             int i;
7055             PL_bufptr = s;
7056             i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7057             PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf));
7058             yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7059         }
7060     }
7061     else if (*s == '\\') {
7062         if (!FEATURE_MYREF_IS_ENABLED)
7063             Perl_croak(aTHX_ "The experimental declared_refs "
7064                              "feature is not enabled");
7065         Perl_ck_warner_d(aTHX_
7066              packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
7067             "Declaring references is experimental");
7068     }
7069     OPERATOR(MY);
7070 }
7071
7072 static int yyl_try(pTHX_ char*);
7073
7074 static bool
7075 yyl_eol_needs_semicolon(pTHX_ char **ps)
7076 {
7077     char *s = *ps;
7078     if (PL_lex_state != LEX_NORMAL
7079         || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
7080     {
7081         const bool in_comment = *s == '#';
7082         char *d;
7083         if (*s == '#' && s == PL_linestart && PL_in_eval
7084          && !PL_rsfp && !PL_parser->filtered) {
7085             /* handle eval qq[#line 1 "foo"\n ...] */
7086             CopLINE_dec(PL_curcop);
7087             incline(s, PL_bufend);
7088         }
7089         d = s;
7090         while (d < PL_bufend && *d != '\n')
7091             d++;
7092         if (d < PL_bufend)
7093             d++;
7094         s = d;
7095         if (in_comment && d == PL_bufend
7096             && PL_lex_state == LEX_INTERPNORMAL
7097             && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
7098             && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
7099         else
7100             incline(s, PL_bufend);
7101         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7102             PL_lex_state = LEX_FORMLINE;
7103             force_next(FORMRBRACK);
7104             *ps = s;
7105             return TRUE;
7106         }
7107     }
7108     else {
7109         while (s < PL_bufend && *s != '\n')
7110             s++;
7111         if (s < PL_bufend) {
7112             s++;
7113             if (s < PL_bufend)
7114                 incline(s, PL_bufend);
7115         }
7116     }
7117     *ps = s;
7118     return FALSE;
7119 }
7120
7121 static int
7122 yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s)
7123 {
7124     char *d;
7125
7126     goto start;
7127
7128     do {
7129         fake_eof = 0;
7130         bof = cBOOL(PL_rsfp);
7131       start:
7132
7133         PL_bufptr = PL_bufend;
7134         COPLINE_INC_WITH_HERELINES;
7135         if (!lex_next_chunk(fake_eof)) {
7136             CopLINE_dec(PL_curcop);
7137             s = PL_bufptr;
7138             TOKEN(PERLY_SEMICOLON);     /* not infinite loop because rsfp is NULL now */
7139         }
7140         CopLINE_dec(PL_curcop);
7141         s = PL_bufptr;
7142         /* If it looks like the start of a BOM or raw UTF-16,
7143          * check if it in fact is. */
7144         if (bof && PL_rsfp
7145             && (   *s == 0
7146                 || *(U8*)s == BOM_UTF8_FIRST_BYTE
7147                 || *(U8*)s >= 0xFE
7148                 || s[1] == 0))
7149         {
7150             Off_t offset = (IV)PerlIO_tell(PL_rsfp);
7151             bof = (offset == (Off_t)SvCUR(PL_linestr));
7152 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
7153             /* offset may include swallowed CR */
7154             if (!bof)
7155                 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
7156 #endif
7157             if (bof) {
7158                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7159                 s = swallow_bom((U8*)s);
7160             }
7161         }
7162         if (PL_parser->in_pod) {
7163             /* Incest with pod. */
7164             if (    memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
7165                 && !isALPHA(s[4]))
7166             {
7167                 SvPVCLEAR(PL_linestr);
7168                 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7169                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7170                 PL_last_lop = PL_last_uni = NULL;
7171                 PL_parser->in_pod = 0;
7172             }
7173         }
7174         if (PL_rsfp || PL_parser->filtered)
7175             incline(s, PL_bufend);
7176     } while (PL_parser->in_pod);
7177
7178     PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
7179     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7180     PL_last_lop = PL_last_uni = NULL;
7181     if (CopLINE(PL_curcop) == 1) {
7182         while (s < PL_bufend && isSPACE(*s))
7183             s++;
7184         if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
7185             s++;
7186         d = NULL;
7187         if (!PL_in_eval) {
7188             if (*s == '#' && *(s+1) == '!')
7189                 d = s + 2;
7190 #ifdef ALTERNATE_SHEBANG
7191             else {
7192                 static char const as[] = ALTERNATE_SHEBANG;
7193                 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
7194                     d = s + (sizeof(as) - 1);
7195             }
7196 #endif /* ALTERNATE_SHEBANG */
7197         }
7198         if (d) {
7199             char *ipath;
7200             char *ipathend;
7201
7202             while (isSPACE(*d))
7203                 d++;
7204             ipath = d;
7205             while (*d && !isSPACE(*d))
7206                 d++;
7207             ipathend = d;
7208
7209 #ifdef ARG_ZERO_IS_SCRIPT
7210             if (ipathend > ipath) {
7211                 /*
7212                  * HP-UX (at least) sets argv[0] to the script name,
7213                  * which makes $^X incorrect.  And Digital UNIX and Linux,
7214                  * at least, set argv[0] to the basename of the Perl
7215                  * interpreter. So, having found "#!", we'll set it right.
7216                  */
7217                 SV* copfilesv = CopFILESV(PL_curcop);
7218                 if (copfilesv) {
7219                     SV * const x =
7220                         GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
7221                                          SVt_PV)); /* $^X */
7222                     assert(SvPOK(x) || SvGMAGICAL(x));
7223                     if (sv_eq(x, copfilesv)) {
7224                         sv_setpvn(x, ipath, ipathend - ipath);
7225                         SvSETMAGIC(x);
7226                     }
7227                     else {
7228                         STRLEN blen;
7229                         STRLEN llen;
7230                         const char *bstart = SvPV_const(copfilesv, blen);
7231                         const char * const lstart = SvPV_const(x, llen);
7232                         if (llen < blen) {
7233                             bstart += blen - llen;
7234                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
7235                                 sv_setpvn(x, ipath, ipathend - ipath);
7236                                 SvSETMAGIC(x);
7237                             }
7238                         }
7239                     }
7240                 }
7241                 else {
7242                     /* Anything to do if no copfilesv? */
7243                 }
7244                 TAINT_NOT;      /* $^X is always tainted, but that's OK */
7245             }
7246 #endif /* ARG_ZERO_IS_SCRIPT */
7247
7248             /*
7249              * Look for options.
7250              */
7251             d = instr(s,"perl -");
7252             if (!d) {
7253                 d = instr(s,"perl");
7254 #if defined(DOSISH)
7255                 /* avoid getting into infinite loops when shebang
7256                  * line contains "Perl" rather than "perl" */
7257                 if (!d) {
7258                     for (d = ipathend-4; d >= ipath; --d) {
7259                         if (isALPHA_FOLD_EQ(*d, 'p')
7260                             && !ibcmp(d, "perl", 4))
7261                         {
7262                             break;
7263                         }
7264                     }
7265                     if (d < ipath)
7266                         d = NULL;
7267                 }
7268 #endif
7269             }
7270 #ifdef ALTERNATE_SHEBANG
7271             /*
7272              * If the ALTERNATE_SHEBANG on this system starts with a
7273              * character that can be part of a Perl expression, then if
7274              * we see it but not "perl", we're probably looking at the
7275              * start of Perl code, not a request to hand off to some
7276              * other interpreter.  Similarly, if "perl" is there, but
7277              * not in the first 'word' of the line, we assume the line
7278              * contains the start of the Perl program.
7279              */
7280             if (d && *s != '#') {
7281                 const char *c = ipath;
7282                 while (*c && !memCHRs("; \t\r\n\f\v#", *c))
7283                     c++;
7284                 if (c < d)
7285                     d = NULL;   /* "perl" not in first word; ignore */
7286                 else
7287                     *s = '#';   /* Don't try to parse shebang line */
7288             }
7289 #endif /* ALTERNATE_SHEBANG */
7290             if (!d
7291                 && *s == '#'
7292                 && ipathend > ipath
7293                 && !PL_minus_c
7294                 && !instr(s,"indir")
7295                 && instr(PL_origargv[0],"perl"))
7296             {
7297                 char **newargv;
7298
7299                 *ipathend = '\0';
7300                 s = ipathend + 1;
7301                 while (s < PL_bufend && isSPACE(*s))
7302                     s++;
7303                 if (s < PL_bufend) {
7304                     Newx(newargv,PL_origargc+3,char*);
7305                     newargv[1] = s;
7306                     while (s < PL_bufend && !isSPACE(*s))
7307                         s++;
7308                     *s = '\0';
7309                     Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
7310                 }
7311                 else
7312                     newargv = PL_origargv;
7313                 newargv[0] = ipath;
7314                 PERL_FPU_PRE_EXEC
7315                 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
7316                 PERL_FPU_POST_EXEC
7317                 Perl_croak(aTHX_ "Can't exec %s", ipath);
7318             }
7319             if (d) {
7320                 while (*d && !isSPACE(*d))
7321                     d++;
7322                 while (SPACE_OR_TAB(*d))
7323                     d++;
7324
7325                 if (*d++ == '-') {
7326                     const bool switches_done = PL_doswitches;
7327                     const U32 oldpdb = PL_perldb;
7328                     const bool oldn = PL_minus_n;
7329                     const bool oldp = PL_minus_p;
7330                     const char *d1 = d;
7331
7332                     do {
7333                         bool baduni = FALSE;
7334                         if (*d1 == 'C') {
7335                             const char *d2 = d1 + 1;
7336                             if (parse_unicode_opts((const char **)&d2)
7337                                 != PL_unicode)
7338                                 baduni = TRUE;
7339                         }
7340                         if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
7341                             const char * const m = d1;
7342                             while (*d1 && !isSPACE(*d1))
7343                                 d1++;
7344                             Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
7345                                   (int)(d1 - m), m);
7346                         }
7347                         d1 = moreswitches(d1);
7348                     } while (d1);
7349                     if (PL_doswitches && !switches_done) {
7350                         int argc = PL_origargc;
7351                         char **argv = PL_origargv;
7352                         do {
7353                             argc--,argv++;
7354                         } while (argc && argv[0][0] == '-' && argv[0][1]);
7355                         init_argv_symbols(argc,argv);
7356                     }
7357                     if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
7358                         || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
7359                           /* if we have already added "LINE: while (<>) {",
7360                              we must not do it again */
7361                     {
7362                         SvPVCLEAR(PL_linestr);
7363                         PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7364                         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7365                         PL_last_lop = PL_last_uni = NULL;
7366                         PL_preambled = FALSE;
7367                         if (PERLDB_LINE_OR_SAVESRC)
7368                             (void)gv_fetchfile(PL_origfilename);
7369                         return YYL_RETRY;
7370                     }
7371                 }
7372             }
7373         }
7374     }
7375
7376     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7377         PL_lex_state = LEX_FORMLINE;
7378         force_next(FORMRBRACK);
7379         TOKEN(PERLY_SEMICOLON);
7380     }
7381
7382     PL_bufptr = s;
7383     return YYL_RETRY;
7384 }
7385
7386 static int
7387 yyl_fatcomma(pTHX_ char *s, STRLEN len)
7388 {
7389     CLINE;
7390     pl_yylval.opval
7391         = newSVOP(OP_CONST, 0,
7392                        S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7393     pl_yylval.opval->op_private = OPpCONST_BARE;
7394     TERM(BAREWORD);
7395 }
7396
7397 static int
7398 yyl_safe_bareword(pTHX_ char *s, const char lastchar)
7399 {
7400     if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7401         && PL_parser->saw_infix_sigil)
7402     {
7403         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7404                          "Operator or semicolon missing before %c%" UTF8f,
7405                          lastchar,
7406                          UTF8fARG(UTF, strlen(PL_tokenbuf),
7407                                   PL_tokenbuf));
7408         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7409                          "Ambiguous use of %c resolved as operator %c",
7410                          lastchar, lastchar);
7411     }
7412     TOKEN(BAREWORD);
7413 }
7414
7415 static int
7416 yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off)
7417 {
7418     if (sv) {
7419         op_free(rv2cv_op);
7420         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7421         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7422         if (SvTYPE(sv) == SVt_PVAV)
7423             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7424                                       pl_yylval.opval);
7425         else {
7426             pl_yylval.opval->op_private = 0;
7427             pl_yylval.opval->op_folded = 1;
7428             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7429         }
7430         TOKEN(BAREWORD);
7431     }
7432
7433     op_free(pl_yylval.opval);
7434     pl_yylval.opval =
7435         off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7436     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7437     PL_last_lop = PL_oldbufptr;
7438     PL_last_lop_op = OP_ENTERSUB;
7439
7440     /* Is there a prototype? */
7441     if (SvPOK(cv)) {
7442         int k = yyl_subproto(aTHX_ s, cv);
7443         if (k != KEY_NULL)
7444             return k;
7445     }
7446
7447     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7448     PL_expect = XTERM;
7449     force_next(off ? PRIVATEREF : BAREWORD);
7450     if (!PL_lex_allbrackets
7451         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7452     {
7453         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7454     }
7455
7456     TOKEN(NOAMP);
7457 }
7458
7459 /* Honour "reserved word" warnings, and enforce strict subs */
7460 static void
7461 yyl_strictwarn_bareword(pTHX_ const char lastchar)
7462 {
7463     /* after "print" and similar functions (corresponding to
7464      * "F? L" in opcode.pl), whatever wasn't already parsed as
7465      * a filehandle should be subject to "strict subs".
7466      * Likewise for the optional indirect-object argument to system
7467      * or exec, which can't be a bareword */
7468     if ((PL_last_lop_op == OP_PRINT
7469             || PL_last_lop_op == OP_PRTF
7470             || PL_last_lop_op == OP_SAY
7471             || PL_last_lop_op == OP_SYSTEM
7472             || PL_last_lop_op == OP_EXEC)
7473         && (PL_hints & HINT_STRICT_SUBS))
7474     {
7475         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7476     }
7477
7478     if (lastchar != '-' && ckWARN(WARN_RESERVED)) {
7479         char *d = PL_tokenbuf;
7480         while (isLOWER(*d))
7481             d++;
7482         if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7483             /* PL_warn_reserved is constant */
7484             GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7485             Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7486                         PL_tokenbuf);
7487             GCC_DIAG_RESTORE_STMT;
7488         }
7489     }
7490 }
7491
7492 static int
7493 yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c)
7494 {
7495     int pkgname = 0;
7496     const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7497     bool safebw;
7498     bool no_op_error = FALSE;
7499     /* Use this var to track whether intuit_method has been
7500        called.  intuit_method returns 0 or > 255.  */
7501     int key = 1;
7502
7503     if (PL_expect == XOPERATOR) {
7504         if (PL_bufptr == PL_linestart) {
7505             CopLINE_dec(PL_curcop);
7506             Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7507             CopLINE_inc(PL_curcop);
7508         }
7509         else
7510             /* We want to call no_op with s pointing after the
7511                bareword, so defer it.  But we want it to come
7512                before the Bad name croak.  */
7513             no_op_error = TRUE;
7514     }
7515
7516     /* Get the rest if it looks like a package qualifier */
7517
7518     if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7519         STRLEN morelen;
7520         s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7521                       TRUE, &morelen);
7522         if (no_op_error) {
7523             no_op("Bareword",s);
7524             no_op_error = FALSE;
7525         }
7526         if (!morelen)
7527             Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7528                     UTF8fARG(UTF, len, PL_tokenbuf),
7529                     *s == '\'' ? "'" : "::");
7530         len += morelen;
7531         pkgname = 1;
7532     }
7533
7534     if (no_op_error)
7535         no_op("Bareword",s);
7536
7537     /* See if the name is "Foo::",
7538        in which case Foo is a bareword
7539        (and a package name). */
7540
7541     if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') {
7542         if (ckWARN(WARN_BAREWORD)
7543             && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7544             Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7545                         "Bareword \"%" UTF8f
7546                         "\" refers to nonexistent package",
7547                         UTF8fARG(UTF, len, PL_tokenbuf));
7548         len -= 2;
7549         PL_tokenbuf[len] = '\0';
7550         c.gv = NULL;
7551         c.gvp = 0;
7552         safebw = TRUE;
7553     }
7554     else {
7555         safebw = FALSE;
7556     }
7557
7558     /* if we saw a global override before, get the right name */
7559
7560     if (!c.sv)
7561         c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len);
7562     if (c.gvp) {
7563         SV *sv = newSVpvs("CORE::GLOBAL::");
7564         sv_catsv(sv, c.sv);
7565         SvREFCNT_dec(c.sv);
7566         c.sv = sv;
7567     }
7568
7569     /* Presume this is going to be a bareword of some sort. */
7570     CLINE;
7571     pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv);
7572     pl_yylval.opval->op_private = OPpCONST_BARE;
7573
7574     /* And if "Foo::", then that's what it certainly is. */
7575     if (safebw)
7576         return yyl_safe_bareword(aTHX_ s, lastchar);
7577
7578     if (!c.off) {
7579         OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv));
7580         const_op->op_private = OPpCONST_BARE;
7581         c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7582         c.cv = c.lex
7583             ? isGV(c.gv)
7584                 ? GvCV(c.gv)
7585                 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV
7586                     ? (CV *)SvRV(c.gv)
7587                     : ((CV *)c.gv)
7588             : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB);
7589     }
7590
7591     /* See if it's the indirect object for a list operator. */
7592
7593     if (PL_oldoldbufptr
7594         && PL_oldoldbufptr < PL_bufptr
7595         && (PL_oldoldbufptr == PL_last_lop
7596             || PL_oldoldbufptr == PL_last_uni)
7597         && /* NO SKIPSPACE BEFORE HERE! */
7598            (PL_expect == XREF
7599             || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7600                                                    == OA_FILEREF))
7601     {
7602         bool immediate_paren = *s == '(';
7603         SSize_t s_off;
7604
7605         /* (Now we can afford to cross potential line boundary.) */
7606         s = skipspace(s);
7607
7608         /* intuit_method() can indirectly call lex_next_chunk(),
7609          * invalidating s
7610          */
7611         s_off = s - SvPVX(PL_linestr);
7612         /* Two barewords in a row may indicate method call. */
7613         if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7614                 || *s == '$')
7615             && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7616         {
7617             /* the code at method: doesn't use s */
7618             goto method;
7619         }
7620         s = SvPVX(PL_linestr) + s_off;
7621
7622         if (((PL_opargs[PL_last_lop_op] >> OASHIFT) & 7) == OA_FILEREF
7623             && !immediate_paren && !c.cv
7624             && !FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) {
7625             no_bareword_filehandle(PL_tokenbuf);
7626         }
7627
7628         /* If not a declared subroutine, it's an indirect object. */
7629         /* (But it's an indir obj regardless for sort.) */
7630         /* Also, if "_" follows a filetest operator, it's a bareword */
7631
7632         if (
7633             ( !immediate_paren && (PL_last_lop_op == OP_SORT
7634              || (!c.cv
7635                  && (PL_last_lop_op != OP_MAPSTART
7636                      && PL_last_lop_op != OP_GREPSTART))))
7637            || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7638                 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7639                                                 == OA_FILESTATOP))
7640            )
7641         {
7642             PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7643             yyl_strictwarn_bareword(aTHX_ lastchar);
7644             op_free(c.rv2cv_op);
7645             return yyl_safe_bareword(aTHX_ s, lastchar);
7646         }
7647     }
7648
7649     PL_expect = XOPERATOR;
7650     s = skipspace(s);
7651
7652     /* Is this a word before a => operator? */
7653     if (*s == '=' && s[1] == '>' && !pkgname) {
7654         op_free(c.rv2cv_op);
7655         CLINE;
7656         if (c.gvp || (c.lex && !c.off)) {
7657             assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7658             /* This is our own scalar, created a few lines
7659                above, so this is safe. */
7660             SvREADONLY_off(c.sv);
7661             sv_setpv(c.sv, PL_tokenbuf);
7662             if (UTF && !IN_BYTES
7663              && is_utf8_string((U8*)PL_tokenbuf, len))
7664                   SvUTF8_on(c.sv);
7665             SvREADONLY_on(c.sv);
7666         }
7667         TERM(BAREWORD);
7668     }
7669
7670     /* If followed by a paren, it's certainly a subroutine. */
7671     if (*s == '(') {
7672         CLINE;
7673         if (c.cv) {
7674             char *d = s + 1;
7675             while (SPACE_OR_TAB(*d))
7676                 d++;
7677             if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv)))
7678                 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off);
7679         }
7680         NEXTVAL_NEXTTOKE.opval =
7681             c.off ? c.rv2cv_op : pl_yylval.opval;
7682         if (c.off)
7683              op_free(pl_yylval.opval), force_next(PRIVATEREF);
7684         else op_free(c.rv2cv_op),      force_next(BAREWORD);
7685         pl_yylval.ival = 0;
7686         TOKEN(PERLY_AMPERSAND);
7687     }
7688
7689     /* If followed by var or block, call it a method (unless sub) */
7690
7691     if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) {
7692         op_free(c.rv2cv_op);
7693         PL_last_lop = PL_oldbufptr;
7694         PL_last_lop_op = OP_METHOD;
7695         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7696             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7697         PL_expect = XBLOCKTERM;
7698         PL_bufptr = s;
7699         return REPORT(METHOD);
7700     }
7701
7702     /* If followed by a bareword, see if it looks like indir obj. */
7703
7704     if (   key == 1
7705         && !orig_keyword
7706         && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7707         && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7708     {
7709       method:
7710         if (c.lex && !c.off) {
7711             assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7712             SvREADONLY_off(c.sv);
7713             sv_setpvn(c.sv, PL_tokenbuf, len);
7714             if (UTF && !IN_BYTES
7715              && is_utf8_string((U8*)PL_tokenbuf, len))
7716                 SvUTF8_on(c.sv);
7717             else SvUTF8_off(c.sv);
7718         }
7719         op_free(c.rv2cv_op);
7720         if (key == METHOD && !PL_lex_allbrackets
7721             && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7722         {
7723             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7724         }
7725         return REPORT(key);
7726     }
7727
7728     /* Not a method, so call it a subroutine (if defined) */
7729
7730     if (c.cv) {
7731         /* Check for a constant sub */
7732         c.sv = cv_const_sv_or_av(c.cv);
7733         return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off);
7734     }
7735
7736     /* Call it a bare word */
7737
7738     if (PL_hints & HINT_STRICT_SUBS)
7739         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7740     else
7741         yyl_strictwarn_bareword(aTHX_ lastchar);
7742
7743     op_free(c.rv2cv_op);
7744
7745     return yyl_safe_bareword(aTHX_ s, lastchar);
7746 }
7747
7748 static int
7749 yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c)
7750 {
7751     switch (key) {
7752     default:                    /* not a keyword */
7753         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7754
7755     case KEY___FILE__:
7756         FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) );
7757
7758     case KEY___LINE__:
7759         FUN0OP(
7760             newSVOP(OP_CONST, 0,
7761                 Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7762         );
7763
7764     case KEY___PACKAGE__:
7765         FUN0OP(
7766             newSVOP(OP_CONST, 0, (PL_curstash
7767                                      ? newSVhek(HvNAME_HEK(PL_curstash))
7768                                      : &PL_sv_undef))
7769         );
7770
7771     case KEY___DATA__:
7772     case KEY___END__:
7773         if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D'))
7774             yyl_data_handle(aTHX);
7775         return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s);
7776
7777     case KEY___SUB__:
7778         FUN0OP(CvCLONE(PL_compcv)
7779                     ? newOP(OP_RUNCV, 0)
7780                     : newPVOP(OP_RUNCV,0,NULL));
7781
7782     case KEY_AUTOLOAD:
7783     case KEY_DESTROY:
7784     case KEY_BEGIN:
7785     case KEY_UNITCHECK:
7786     case KEY_CHECK:
7787     case KEY_INIT:
7788     case KEY_END:
7789         if (PL_expect == XSTATE)
7790             return yyl_sub(aTHX_ PL_bufptr, key);
7791         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7792
7793     case KEY_abs:
7794         UNI(OP_ABS);
7795
7796     case KEY_alarm:
7797         UNI(OP_ALARM);
7798
7799     case KEY_accept:
7800         LOP(OP_ACCEPT,XTERM);
7801
7802     case KEY_and:
7803         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7804             return REPORT(0);
7805         OPERATOR(ANDOP);
7806
7807     case KEY_atan2:
7808         LOP(OP_ATAN2,XTERM);
7809
7810     case KEY_bind:
7811         LOP(OP_BIND,XTERM);
7812
7813     case KEY_binmode:
7814         LOP(OP_BINMODE,XTERM);
7815
7816     case KEY_bless:
7817         LOP(OP_BLESS,XTERM);
7818
7819     case KEY_break:
7820         FUN0(OP_BREAK);
7821
7822     case KEY_catch:
7823         Perl_ck_warner_d(aTHX_
7824             packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
7825         PREBLOCK(CATCH);
7826
7827     case KEY_chop:
7828         UNI(OP_CHOP);
7829
7830     case KEY_continue:
7831         /* We have to disambiguate the two senses of
7832           "continue". If the next token is a '{' then
7833           treat it as the start of a continue block;
7834           otherwise treat it as a control operator.
7835          */
7836         s = skipspace(s);
7837         if (*s == '{')
7838             PREBLOCK(CONTINUE);
7839         else
7840             FUN0(OP_CONTINUE);
7841
7842     case KEY_chdir:
7843         /* may use HOME */
7844         (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7845         UNI(OP_CHDIR);
7846
7847     case KEY_close:
7848         UNI(OP_CLOSE);
7849
7850     case KEY_closedir:
7851         UNI(OP_CLOSEDIR);
7852
7853     case KEY_cmp:
7854         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7855             return REPORT(0);
7856         NCEop(OP_SCMP);
7857
7858     case KEY_caller:
7859         UNI(OP_CALLER);
7860
7861     case KEY_crypt:
7862
7863         LOP(OP_CRYPT,XTERM);
7864
7865     case KEY_chmod:
7866         LOP(OP_CHMOD,XTERM);
7867
7868     case KEY_chown:
7869         LOP(OP_CHOWN,XTERM);
7870
7871     case KEY_connect:
7872         LOP(OP_CONNECT,XTERM);
7873
7874     case KEY_chr:
7875         UNI(OP_CHR);
7876
7877     case KEY_cos:
7878         UNI(OP_COS);
7879
7880     case KEY_chroot:
7881         UNI(OP_CHROOT);
7882
7883     case KEY_default:
7884         PREBLOCK(DEFAULT);
7885
7886     case KEY_defer:
7887         Perl_ck_warner_d(aTHX_
7888             packWARN(WARN_EXPERIMENTAL__DEFER), "defer is experimental");
7889         PREBLOCK(DEFER);
7890
7891     case KEY_do:
7892         return yyl_do(aTHX_ s, orig_keyword);
7893
7894     case KEY_die:
7895         PL_hints |= HINT_BLOCK_SCOPE;
7896         LOP(OP_DIE,XTERM);
7897
7898     case KEY_defined:
7899         UNI(OP_DEFINED);
7900
7901     case KEY_delete:
7902         UNI(OP_DELETE);
7903
7904     case KEY_dbmopen:
7905         Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7906                           STR_WITH_LEN("NDBM_File::"),
7907                           STR_WITH_LEN("DB_File::"),
7908                           STR_WITH_LEN("GDBM_File::"),
7909                           STR_WITH_LEN("SDBM_File::"),
7910                           STR_WITH_LEN("ODBM_File::"),
7911                           NULL);
7912         LOP(OP_DBMOPEN,XTERM);
7913
7914     case KEY_dbmclose:
7915         UNI(OP_DBMCLOSE);
7916
7917     case KEY_dump:
7918         LOOPX(OP_DUMP);
7919
7920     case KEY_else:
7921         PREBLOCK(ELSE);
7922
7923     case KEY_elsif:
7924         pl_yylval.ival = CopLINE(PL_curcop);
7925         OPERATOR(ELSIF);
7926
7927     case KEY_eq:
7928         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7929             return REPORT(0);
7930         ChEop(OP_SEQ);
7931
7932     case KEY_exists:
7933         UNI(OP_EXISTS);
7934
7935     case KEY_exit:
7936         UNI(OP_EXIT);
7937
7938     case KEY_eval:
7939         s = skipspace(s);
7940         if (*s == '{') { /* block eval */
7941             PL_expect = XTERMBLOCK;
7942             UNIBRACK(OP_ENTERTRY);
7943         }
7944         else { /* string eval */
7945             PL_expect = XTERM;
7946             UNIBRACK(OP_ENTEREVAL);
7947         }
7948
7949     case KEY_evalbytes:
7950         PL_expect = XTERM;
7951         UNIBRACK(-OP_ENTEREVAL);
7952
7953     case KEY_eof:
7954         UNI(OP_EOF);
7955
7956     case KEY_exp:
7957         UNI(OP_EXP);
7958
7959     case KEY_each:
7960         UNI(OP_EACH);
7961
7962     case KEY_exec:
7963         LOP(OP_EXEC,XREF);
7964
7965     case KEY_endhostent:
7966         FUN0(OP_EHOSTENT);
7967
7968     case KEY_endnetent:
7969         FUN0(OP_ENETENT);
7970
7971     case KEY_endservent:
7972         FUN0(OP_ESERVENT);
7973
7974     case KEY_endprotoent:
7975         FUN0(OP_EPROTOENT);
7976
7977     case KEY_endpwent:
7978         FUN0(OP_EPWENT);
7979
7980     case KEY_endgrent:
7981         FUN0(OP_EGRENT);
7982
7983     case KEY_finally:
7984         Perl_ck_warner_d(aTHX_
7985             packWARN(WARN_EXPERIMENTAL__TRY), "try/catch/finally is experimental");
7986         PREBLOCK(FINALLY);
7987
7988     case KEY_for:
7989     case KEY_foreach:
7990         return yyl_foreach(aTHX_ s);
7991
7992     case KEY_formline:
7993         LOP(OP_FORMLINE,XTERM);
7994
7995     case KEY_fork:
7996         FUN0(OP_FORK);
7997
7998     case KEY_fc:
7999         UNI(OP_FC);
8000
8001     case KEY_fcntl:
8002         LOP(OP_FCNTL,XTERM);
8003
8004     case KEY_fileno:
8005         UNI(OP_FILENO);
8006
8007     case KEY_flock:
8008         LOP(OP_FLOCK,XTERM);
8009
8010     case KEY_gt:
8011         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8012             return REPORT(0);
8013         ChRop(OP_SGT);
8014
8015     case KEY_ge:
8016         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8017             return REPORT(0);
8018         ChRop(OP_SGE);
8019
8020     case KEY_grep:
8021         LOP(OP_GREPSTART, XREF);
8022
8023     case KEY_goto:
8024         LOOPX(OP_GOTO);
8025
8026     case KEY_gmtime:
8027         UNI(OP_GMTIME);
8028
8029     case KEY_getc:
8030         UNIDOR(OP_GETC);
8031
8032     case KEY_getppid:
8033         FUN0(OP_GETPPID);
8034
8035     case KEY_getpgrp:
8036         UNI(OP_GETPGRP);
8037
8038     case KEY_getpriority:
8039         LOP(OP_GETPRIORITY,XTERM);
8040
8041     case KEY_getprotobyname:
8042         UNI(OP_GPBYNAME);
8043
8044     case KEY_getprotobynumber:
8045         LOP(OP_GPBYNUMBER,XTERM);
8046
8047     case KEY_getprotoent:
8048         FUN0(OP_GPROTOENT);
8049
8050     case KEY_getpwent:
8051         FUN0(OP_GPWENT);
8052
8053     case KEY_getpwnam:
8054         UNI(OP_GPWNAM);
8055
8056     case KEY_getpwuid:
8057         UNI(OP_GPWUID);
8058
8059     case KEY_getpeername:
8060         UNI(OP_GETPEERNAME);
8061
8062     case KEY_gethostbyname:
8063         UNI(OP_GHBYNAME);
8064
8065     case KEY_gethostbyaddr:
8066         LOP(OP_GHBYADDR,XTERM);
8067
8068     case KEY_gethostent:
8069         FUN0(OP_GHOSTENT);
8070
8071     case KEY_getnetbyname:
8072         UNI(OP_GNBYNAME);
8073
8074     case KEY_getnetbyaddr:
8075         LOP(OP_GNBYADDR,XTERM);
8076
8077     case KEY_getnetent:
8078         FUN0(OP_GNETENT);
8079
8080     case KEY_getservbyname:
8081         LOP(OP_GSBYNAME,XTERM);
8082
8083     case KEY_getservbyport:
8084         LOP(OP_GSBYPORT,XTERM);
8085
8086     case KEY_getservent:
8087         FUN0(OP_GSERVENT);
8088
8089     case KEY_getsockname:
8090         UNI(OP_GETSOCKNAME);
8091
8092     case KEY_getsockopt:
8093         LOP(OP_GSOCKOPT,XTERM);
8094
8095     case KEY_getgrent:
8096         FUN0(OP_GGRENT);
8097
8098     case KEY_getgrnam:
8099         UNI(OP_GGRNAM);
8100
8101     case KEY_getgrgid:
8102         UNI(OP_GGRGID);
8103
8104     case KEY_getlogin:
8105         FUN0(OP_GETLOGIN);
8106
8107     case KEY_given:
8108         pl_yylval.ival = CopLINE(PL_curcop);
8109         Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8110                          "given is experimental");
8111         OPERATOR(GIVEN);
8112
8113     case KEY_glob:
8114         LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
8115
8116     case KEY_hex:
8117         UNI(OP_HEX);
8118
8119     case KEY_if:
8120         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8121             return REPORT(0);
8122         pl_yylval.ival = CopLINE(PL_curcop);
8123         OPERATOR(IF);
8124
8125     case KEY_index:
8126         LOP(OP_INDEX,XTERM);
8127
8128     case KEY_int:
8129         UNI(OP_INT);
8130
8131     case KEY_ioctl:
8132         LOP(OP_IOCTL,XTERM);
8133
8134     case KEY_isa:
8135         NCRop(OP_ISA);
8136
8137     case KEY_join:
8138         LOP(OP_JOIN,XTERM);
8139
8140     case KEY_keys:
8141         UNI(OP_KEYS);
8142
8143     case KEY_kill:
8144         LOP(OP_KILL,XTERM);
8145
8146     case KEY_last:
8147         LOOPX(OP_LAST);
8148
8149     case KEY_lc:
8150         UNI(OP_LC);
8151
8152     case KEY_lcfirst:
8153         UNI(OP_LCFIRST);
8154
8155     case KEY_local:
8156         OPERATOR(LOCAL);
8157
8158     case KEY_length:
8159         UNI(OP_LENGTH);
8160
8161     case KEY_lt:
8162         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8163             return REPORT(0);
8164         ChRop(OP_SLT);
8165
8166     case KEY_le:
8167         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8168             return REPORT(0);
8169         ChRop(OP_SLE);
8170
8171     case KEY_localtime:
8172         UNI(OP_LOCALTIME);
8173
8174     case KEY_log:
8175         UNI(OP_LOG);
8176
8177     case KEY_link:
8178         LOP(OP_LINK,XTERM);
8179
8180     case KEY_listen:
8181         LOP(OP_LISTEN,XTERM);
8182
8183     case KEY_lock:
8184         UNI(OP_LOCK);
8185
8186     case KEY_lstat:
8187         UNI(OP_LSTAT);
8188
8189     case KEY_m:
8190         s = scan_pat(s,OP_MATCH);
8191         TERM(sublex_start());
8192
8193     case KEY_map:
8194         LOP(OP_MAPSTART, XREF);
8195
8196     case KEY_mkdir:
8197         LOP(OP_MKDIR,XTERM);
8198
8199     case KEY_msgctl:
8200         LOP(OP_MSGCTL,XTERM);
8201
8202     case KEY_msgget:
8203         LOP(OP_MSGGET,XTERM);
8204
8205     case KEY_msgrcv:
8206         LOP(OP_MSGRCV,XTERM);
8207
8208     case KEY_msgsnd:
8209         LOP(OP_MSGSND,XTERM);
8210
8211     case KEY_our:
8212     case KEY_my:
8213     case KEY_state:
8214         return yyl_my(aTHX_ s, key);
8215
8216     case KEY_next:
8217         LOOPX(OP_NEXT);
8218
8219     case KEY_ne:
8220         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8221             return REPORT(0);
8222         ChEop(OP_SNE);
8223
8224     case KEY_no:
8225         s = tokenize_use(0, s);
8226         TOKEN(USE);
8227
8228     case KEY_not:
8229         if (*s == '(' || (s = skipspace(s), *s == '('))
8230             FUN1(OP_NOT);
8231         else {
8232             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8233                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8234             OPERATOR(NOTOP);
8235         }
8236
8237     case KEY_open:
8238         s = skipspace(s);
8239         if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8240             const char *t;
8241             char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8242             for (t=d; isSPACE(*t);)
8243                 t++;
8244             if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8245                 /* [perl #16184] */
8246                 && !(t[0] == '=' && t[1] == '>')
8247                 && !(t[0] == ':' && t[1] == ':')
8248                 && !keyword(s, d-s, 0)
8249             ) {
8250                 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8251                    "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8252                     UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8253             }
8254         }
8255         LOP(OP_OPEN,XTERM);
8256
8257     case KEY_or:
8258         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8259             return REPORT(0);
8260         pl_yylval.ival = OP_OR;
8261         OPERATOR(OROP);
8262
8263     case KEY_ord:
8264         UNI(OP_ORD);
8265
8266     case KEY_oct:
8267         UNI(OP_OCT);
8268
8269     case KEY_opendir:
8270         LOP(OP_OPEN_DIR,XTERM);
8271
8272     case KEY_print:
8273         checkcomma(s,PL_tokenbuf,"filehandle");
8274         LOP(OP_PRINT,XREF);
8275
8276     case KEY_printf:
8277         checkcomma(s,PL_tokenbuf,"filehandle");
8278         LOP(OP_PRTF,XREF);
8279
8280     case KEY_prototype:
8281         UNI(OP_PROTOTYPE);
8282
8283     case KEY_push:
8284         LOP(OP_PUSH,XTERM);
8285
8286     case KEY_pop:
8287         UNIDOR(OP_POP);
8288
8289     case KEY_pos:
8290         UNIDOR(OP_POS);
8291
8292     case KEY_pack:
8293         LOP(OP_PACK,XTERM);
8294
8295     case KEY_package:
8296         s = force_word(s,BAREWORD,FALSE,TRUE);
8297         s = skipspace(s);
8298         s = force_strict_version(s);
8299         PREBLOCK(PACKAGE);
8300
8301     case KEY_pipe:
8302         LOP(OP_PIPE_OP,XTERM);
8303
8304     case KEY_q:
8305         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8306         if (!s)
8307             missingterm(NULL, 0);
8308         COPLINE_SET_FROM_MULTI_END;
8309         pl_yylval.ival = OP_CONST;
8310         TERM(sublex_start());
8311
8312     case KEY_quotemeta:
8313         UNI(OP_QUOTEMETA);
8314
8315     case KEY_qw:
8316         return yyl_qw(aTHX_ s, len);
8317
8318     case KEY_qq:
8319         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8320         if (!s)
8321             missingterm(NULL, 0);
8322         pl_yylval.ival = OP_STRINGIFY;
8323         if (SvIVX(PL_lex_stuff) == '\'')
8324             SvIV_set(PL_lex_stuff, 0);  /* qq'$foo' should interpolate */
8325         TERM(sublex_start());
8326
8327     case KEY_qr:
8328         s = scan_pat(s,OP_QR);
8329         TERM(sublex_start());
8330
8331     case KEY_qx:
8332         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8333         if (!s)
8334             missingterm(NULL, 0);
8335         pl_yylval.ival = OP_BACKTICK;
8336         TERM(sublex_start());
8337
8338     case KEY_return:
8339         OLDLOP(OP_RETURN);
8340
8341     case KEY_require:
8342         return yyl_require(aTHX_ s, orig_keyword);
8343
8344     case KEY_reset:
8345         UNI(OP_RESET);
8346
8347     case KEY_redo:
8348         LOOPX(OP_REDO);
8349
8350     case KEY_rename:
8351         LOP(OP_RENAME,XTERM);
8352
8353     case KEY_rand:
8354         UNI(OP_RAND);
8355
8356     case KEY_rmdir:
8357         UNI(OP_RMDIR);
8358
8359     case KEY_rindex:
8360         LOP(OP_RINDEX,XTERM);
8361
8362     case KEY_read:
8363         LOP(OP_READ,XTERM);
8364
8365     case KEY_readdir:
8366         UNI(OP_READDIR);
8367
8368     case KEY_readline:
8369         UNIDOR(OP_READLINE);
8370
8371     case KEY_readpipe:
8372         UNIDOR(OP_BACKTICK);
8373
8374     case KEY_rewinddir:
8375         UNI(OP_REWINDDIR);
8376
8377     case KEY_recv:
8378         LOP(OP_RECV,XTERM);
8379
8380     case KEY_reverse:
8381         LOP(OP_REVERSE,XTERM);
8382
8383     case KEY_readlink:
8384         UNIDOR(OP_READLINK);
8385
8386     case KEY_ref:
8387         UNI(OP_REF);
8388
8389     case KEY_s:
8390         s = scan_subst(s);
8391         if (pl_yylval.opval)
8392             TERM(sublex_start());
8393         else
8394             TOKEN(1);   /* force error */
8395
8396     case KEY_say:
8397         checkcomma(s,PL_tokenbuf,"filehandle");
8398         LOP(OP_SAY,XREF);
8399
8400     case KEY_chomp:
8401         UNI(OP_CHOMP);
8402
8403     case KEY_scalar:
8404         UNI(OP_SCALAR);
8405
8406     case KEY_select:
8407         LOP(OP_SELECT,XTERM);
8408
8409     case KEY_seek:
8410         LOP(OP_SEEK,XTERM);
8411
8412     case KEY_semctl:
8413         LOP(OP_SEMCTL,XTERM);
8414
8415     case KEY_semget:
8416         LOP(OP_SEMGET,XTERM);
8417
8418     case KEY_semop:
8419         LOP(OP_SEMOP,XTERM);
8420
8421     case KEY_send:
8422         LOP(OP_SEND,XTERM);
8423
8424     case KEY_setpgrp:
8425         LOP(OP_SETPGRP,XTERM);
8426
8427     case KEY_setpriority:
8428         LOP(OP_SETPRIORITY,XTERM);
8429
8430     case KEY_sethostent:
8431         UNI(OP_SHOSTENT);
8432
8433     case KEY_setnetent:
8434         UNI(OP_SNETENT);
8435
8436     case KEY_setservent:
8437         UNI(OP_SSERVENT);
8438
8439     case KEY_setprotoent:
8440         UNI(OP_SPROTOENT);
8441
8442     case KEY_setpwent:
8443         FUN0(OP_SPWENT);
8444
8445     case KEY_setgrent:
8446         FUN0(OP_SGRENT);
8447
8448     case KEY_seekdir:
8449         LOP(OP_SEEKDIR,XTERM);
8450
8451     case KEY_setsockopt:
8452         LOP(OP_SSOCKOPT,XTERM);
8453
8454     case KEY_shift:
8455         UNIDOR(OP_SHIFT);
8456
8457     case KEY_shmctl:
8458         LOP(OP_SHMCTL,XTERM);
8459
8460     case KEY_shmget:
8461         LOP(OP_SHMGET,XTERM);
8462
8463     case KEY_shmread:
8464         LOP(OP_SHMREAD,XTERM);
8465
8466     case KEY_shmwrite:
8467         LOP(OP_SHMWRITE,XTERM);
8468
8469     case KEY_shutdown:
8470         LOP(OP_SHUTDOWN,XTERM);
8471
8472     case KEY_sin:
8473         UNI(OP_SIN);
8474
8475     case KEY_sleep:
8476         UNI(OP_SLEEP);
8477
8478     case KEY_socket:
8479         LOP(OP_SOCKET,XTERM);
8480
8481     case KEY_socketpair:
8482         LOP(OP_SOCKPAIR,XTERM);
8483
8484     case KEY_sort:
8485         checkcomma(s,PL_tokenbuf,"subroutine name");
8486         s = skipspace(s);
8487         PL_expect = XTERM;
8488         s = force_word(s,BAREWORD,TRUE,TRUE);
8489         LOP(OP_SORT,XREF);
8490
8491     case KEY_split:
8492         LOP(OP_SPLIT,XTERM);
8493
8494     case KEY_sprintf:
8495         LOP(OP_SPRINTF,XTERM);
8496
8497     case KEY_splice:
8498         LOP(OP_SPLICE,XTERM);
8499
8500     case KEY_sqrt:
8501         UNI(OP_SQRT);
8502
8503     case KEY_srand:
8504         UNI(OP_SRAND);
8505
8506     case KEY_stat:
8507         UNI(OP_STAT);
8508
8509     case KEY_study:
8510         UNI(OP_STUDY);
8511
8512     case KEY_substr:
8513         LOP(OP_SUBSTR,XTERM);
8514
8515     case KEY_format:
8516     case KEY_sub:
8517         return yyl_sub(aTHX_ s, key);
8518
8519     case KEY_system:
8520         LOP(OP_SYSTEM,XREF);
8521
8522     case KEY_symlink:
8523         LOP(OP_SYMLINK,XTERM);
8524
8525     case KEY_syscall:
8526         LOP(OP_SYSCALL,XTERM);
8527
8528     case KEY_sysopen:
8529         LOP(OP_SYSOPEN,XTERM);
8530
8531     case KEY_sysseek:
8532         LOP(OP_SYSSEEK,XTERM);
8533
8534     case KEY_sysread:
8535         LOP(OP_SYSREAD,XTERM);
8536
8537     case KEY_syswrite:
8538         LOP(OP_SYSWRITE,XTERM);
8539
8540     case KEY_tr:
8541     case KEY_y:
8542         s = scan_trans(s);
8543         TERM(sublex_start());
8544
8545     case KEY_tell:
8546         UNI(OP_TELL);
8547
8548     case KEY_telldir:
8549         UNI(OP_TELLDIR);
8550
8551     case KEY_tie:
8552         LOP(OP_TIE,XTERM);
8553
8554     case KEY_tied:
8555         UNI(OP_TIED);
8556
8557     case KEY_time:
8558         FUN0(OP_TIME);
8559
8560     case KEY_times:
8561         FUN0(OP_TMS);
8562
8563     case KEY_truncate:
8564         LOP(OP_TRUNCATE,XTERM);
8565
8566     case KEY_try:
8567         pl_yylval.ival = CopLINE(PL_curcop);
8568         Perl_ck_warner_d(aTHX_
8569             packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
8570         PREBLOCK(TRY);
8571
8572     case KEY_uc:
8573         UNI(OP_UC);
8574
8575     case KEY_ucfirst:
8576         UNI(OP_UCFIRST);
8577
8578     case KEY_untie:
8579         UNI(OP_UNTIE);
8580
8581     case KEY_until:
8582         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8583             return REPORT(0);
8584         pl_yylval.ival = CopLINE(PL_curcop);
8585         OPERATOR(UNTIL);
8586
8587     case KEY_unless:
8588         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8589             return REPORT(0);
8590         pl_yylval.ival = CopLINE(PL_curcop);
8591         OPERATOR(UNLESS);
8592
8593     case KEY_unlink:
8594         LOP(OP_UNLINK,XTERM);
8595
8596     case KEY_undef:
8597         UNIDOR(OP_UNDEF);
8598
8599     case KEY_unpack:
8600         LOP(OP_UNPACK,XTERM);
8601
8602     case KEY_utime:
8603         LOP(OP_UTIME,XTERM);
8604
8605     case KEY_umask:
8606         UNIDOR(OP_UMASK);
8607
8608     case KEY_unshift:
8609         LOP(OP_UNSHIFT,XTERM);
8610
8611     case KEY_use:
8612         s = tokenize_use(1, s);
8613         TOKEN(USE);
8614
8615     case KEY_values:
8616         UNI(OP_VALUES);
8617
8618     case KEY_vec:
8619         LOP(OP_VEC,XTERM);
8620
8621     case KEY_when:
8622         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8623             return REPORT(0);
8624         pl_yylval.ival = CopLINE(PL_curcop);
8625         Perl_ck_warner_d(aTHX_
8626             packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8627             "when is experimental");
8628         OPERATOR(WHEN);
8629
8630     case KEY_while:
8631         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8632             return REPORT(0);
8633         pl_yylval.ival = CopLINE(PL_curcop);
8634         OPERATOR(WHILE);
8635
8636     case KEY_warn:
8637         PL_hints |= HINT_BLOCK_SCOPE;
8638         LOP(OP_WARN,XTERM);
8639
8640     case KEY_wait:
8641         FUN0(OP_WAIT);
8642
8643     case KEY_waitpid:
8644         LOP(OP_WAITPID,XTERM);
8645
8646     case KEY_wantarray:
8647         FUN0(OP_WANTARRAY);
8648
8649     case KEY_write:
8650         /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8651          * we use the same number on EBCDIC */
8652         gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8653         UNI(OP_ENTERWRITE);
8654
8655     case KEY_x:
8656         if (PL_expect == XOPERATOR) {
8657             if (*s == '=' && !PL_lex_allbrackets
8658                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8659             {
8660                 return REPORT(0);
8661             }
8662             Mop(OP_REPEAT);
8663         }
8664         check_uni();
8665         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
8666
8667     case KEY_xor:
8668         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8669             return REPORT(0);
8670         pl_yylval.ival = OP_XOR;
8671         OPERATOR(OROP);
8672     }
8673 }
8674
8675 static int
8676 yyl_key_core(pTHX_ char *s, STRLEN len, struct code c)
8677 {
8678     I32 key = 0;
8679     I32 orig_keyword = 0;
8680     STRLEN olen = len;
8681     char *d = s;
8682     s += 2;
8683     s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8684     if ((*s == ':' && s[1] == ':')
8685         || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
8686     {
8687         Copy(PL_bufptr, PL_tokenbuf, olen, char);
8688         return yyl_just_a_word(aTHX_ d, olen, 0, c);
8689     }
8690     if (!key)
8691         Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
8692                           UTF8fARG(UTF, len, PL_tokenbuf));
8693     if (key < 0)
8694         key = -key;
8695     else if (key == KEY_require || key == KEY_do
8696           || key == KEY_glob)
8697         /* that's a way to remember we saw "CORE::" */
8698         orig_keyword = key;
8699
8700     /* Known to be a reserved word at this point */
8701     return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8702 }
8703
8704 static int
8705 yyl_keylookup(pTHX_ char *s, GV *gv)
8706 {
8707     STRLEN len;
8708     bool anydelim;
8709     I32 key;
8710     struct code c = no_code;
8711     I32 orig_keyword = 0;
8712     char *d;
8713
8714     c.gv = gv;
8715
8716     PL_bufptr = s;
8717     s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8718
8719     /* Some keywords can be followed by any delimiter, including ':' */
8720     anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
8721
8722     /* x::* is just a word, unless x is "CORE" */
8723     if (!anydelim && *s == ':' && s[1] == ':') {
8724         if (memEQs(PL_tokenbuf, len, "CORE"))
8725             return yyl_key_core(aTHX_ s, len, c);
8726         return yyl_just_a_word(aTHX_ s, len, 0, c);
8727     }
8728
8729     d = s;
8730     while (d < PL_bufend && isSPACE(*d))
8731             d++;        /* no comments skipped here, or s### is misparsed */
8732
8733     /* Is this a word before a => operator? */
8734     if (*d == '=' && d[1] == '>') {
8735         return yyl_fatcomma(aTHX_ s, len);
8736     }
8737
8738     /* Check for plugged-in keyword */
8739     {
8740         OP *o;
8741         int result;
8742         char *saved_bufptr = PL_bufptr;
8743         PL_bufptr = s;
8744         result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
8745         s = PL_bufptr;
8746         if (result == KEYWORD_PLUGIN_DECLINE) {
8747             /* not a plugged-in keyword */
8748             PL_bufptr = saved_bufptr;
8749         } else if (result == KEYWORD_PLUGIN_STMT) {
8750             pl_yylval.opval = o;
8751             CLINE;
8752             if (!PL_nexttoke) PL_expect = XSTATE;
8753             return REPORT(PLUGSTMT);
8754         } else if (result == KEYWORD_PLUGIN_EXPR) {
8755             pl_yylval.opval = o;
8756             CLINE;
8757             if (!PL_nexttoke) PL_expect = XOPERATOR;
8758             return REPORT(PLUGEXPR);
8759         } else {
8760             Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf);
8761         }
8762     }
8763
8764     /* Is this a label? */
8765     if (!anydelim && PL_expect == XSTATE
8766           && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8767         s = d + 1;
8768         pl_yylval.opval =
8769             newSVOP(OP_CONST, 0,
8770                 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
8771         CLINE;
8772         TOKEN(LABEL);
8773     }
8774
8775     /* Check for lexical sub */
8776     if (PL_expect != XOPERATOR) {
8777         char tmpbuf[sizeof PL_tokenbuf + 1];
8778         *tmpbuf = '&';
8779         Copy(PL_tokenbuf, tmpbuf+1, len, char);
8780         c.off = pad_findmy_pvn(tmpbuf, len+1, 0);
8781         if (c.off != NOT_IN_PAD) {
8782             assert(c.off); /* we assume this is boolean-true below */
8783             if (PAD_COMPNAME_FLAGS_isOUR(c.off)) {
8784                 HV *  const stash = PAD_COMPNAME_OURSTASH(c.off);
8785                 HEK * const stashname = HvNAME_HEK(stash);
8786                 c.sv = newSVhek(stashname);
8787                 sv_catpvs(c.sv, "::");
8788                 sv_catpvn_flags(c.sv, PL_tokenbuf, len,
8789                                 (UTF ? SV_CATUTF8 : SV_CATBYTES));
8790                 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv),
8791                                   SVt_PVCV);
8792                 c.off = 0;
8793                 if (!c.gv) {
8794                     sv_free(c.sv);
8795                     c.sv = NULL;
8796                     return yyl_just_a_word(aTHX_ s, len, 0, c);
8797                 }
8798             }
8799             else {
8800                 c.rv2cv_op = newOP(OP_PADANY, 0);
8801                 c.rv2cv_op->op_targ = c.off;
8802                 c.cv = find_lexical_cv(c.off);
8803             }
8804             c.lex = TRUE;
8805             return yyl_just_a_word(aTHX_ s, len, 0, c);
8806         }
8807         c.off = 0;
8808     }
8809
8810     /* Check for built-in keyword */
8811     key = keyword(PL_tokenbuf, len, 0);
8812
8813     if (key < 0)
8814         key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp);
8815
8816     if (key && key != KEY___DATA__ && key != KEY___END__
8817      && (!anydelim || *s != '#')) {
8818         /* no override, and not s### either; skipspace is safe here
8819          * check for => on following line */
8820         bool arrow;
8821         STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
8822         STRLEN   soff = s         - SvPVX(PL_linestr);
8823         s = peekspace(s);
8824         arrow = *s == '=' && s[1] == '>';
8825         PL_bufptr = SvPVX(PL_linestr) + bufoff;
8826         s         = SvPVX(PL_linestr) +   soff;
8827         if (arrow)
8828             return yyl_fatcomma(aTHX_ s, len);
8829     }
8830
8831     return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8832 }
8833
8834 static int
8835 yyl_try(pTHX_ char *s)
8836 {
8837     char *d;
8838     GV *gv = NULL;
8839     int tok;
8840
8841   retry:
8842     switch (*s) {
8843     default:
8844         if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
8845             if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
8846                 return tok;
8847             goto retry_bufptr;
8848         }
8849         yyl_croak_unrecognised(aTHX_ s);
8850
8851     case 4:
8852     case 26:
8853         /* emulate EOF on ^D or ^Z */
8854         if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY)
8855             return tok;
8856     retry_bufptr:
8857         s = PL_bufptr;
8858         goto retry;
8859
8860     case 0:
8861         if ((!PL_rsfp || PL_lex_inwhat)
8862          && (!PL_parser->filtered || s+1 < PL_bufend)) {
8863             PL_last_uni = 0;
8864             PL_last_lop = 0;
8865             if (PL_lex_brackets
8866                 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
8867             {
8868                 yyerror((const char *)
8869                         (PL_lex_formbrack
8870                          ? "Format not terminated"
8871                          : "Missing right curly or square bracket"));
8872             }
8873             DEBUG_T({
8874                 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
8875             });
8876             TOKEN(0);
8877         }
8878         if (s++ < PL_bufend)
8879             goto retry;  /* ignore stray nulls */
8880         PL_last_uni = 0;
8881         PL_last_lop = 0;
8882         if (!PL_in_eval && !PL_preambled) {
8883             PL_preambled = TRUE;
8884             if (PL_perldb) {
8885                 /* Generate a string of Perl code to load the debugger.
8886                  * If PERL5DB is set, it will return the contents of that,
8887                  * otherwise a compile-time require of perl5db.pl.  */
8888
8889                 const char * const pdb = PerlEnv_getenv("PERL5DB");
8890
8891                 if (pdb) {
8892                     sv_setpv(PL_linestr, pdb);
8893                     sv_catpvs(PL_linestr,";");
8894                 } else {
8895                     SETERRNO(0,SS_NORMAL);
8896                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
8897                 }
8898                 PL_parser->preambling = CopLINE(PL_curcop);
8899             } else
8900                 SvPVCLEAR(PL_linestr);
8901             if (PL_preambleav) {
8902                 SV **svp = AvARRAY(PL_preambleav);
8903                 SV **const end = svp + AvFILLp(PL_preambleav);
8904                 while(svp <= end) {
8905                     sv_catsv(PL_linestr, *svp);
8906                     ++svp;
8907                     sv_catpvs(PL_linestr, ";");
8908                 }
8909                 sv_free(MUTABLE_SV(PL_preambleav));
8910                 PL_preambleav = NULL;
8911             }
8912             if (PL_minus_E)
8913                 sv_catpvs(PL_linestr,
8914                           "use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';");
8915             if (PL_minus_n || PL_minus_p) {
8916                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
8917                 if (PL_minus_l)
8918                     sv_catpvs(PL_linestr,"chomp;");
8919                 if (PL_minus_a) {
8920                     if (PL_minus_F) {
8921                         if (   (   *PL_splitstr == '/'
8922                                 || *PL_splitstr == '\''
8923                                 || *PL_splitstr == '"')
8924                             && strchr(PL_splitstr + 1, *PL_splitstr))
8925                         {
8926                             /* strchr is ok, because -F pattern can't contain
8927                              * embeddded NULs */
8928                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
8929                         }
8930                         else {
8931                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
8932                                bytes can be used as quoting characters.  :-) */
8933                             const char *splits = PL_splitstr;
8934                             sv_catpvs(PL_linestr, "our @F=split(q\0");
8935                             do {
8936                                 /* Need to \ \s  */
8937                                 if (*splits == '\\')
8938                                     sv_catpvn(PL_linestr, splits, 1);
8939                                 sv_catpvn(PL_linestr, splits, 1);
8940                             } while (*splits++);
8941                             /* This loop will embed the trailing NUL of
8942                                PL_linestr as the last thing it does before
8943                                terminating.  */
8944                             sv_catpvs(PL_linestr, ");");
8945                         }
8946                     }
8947                     else
8948                         sv_catpvs(PL_linestr,"our @F=split(' ');");
8949                 }
8950             }
8951             sv_catpvs(PL_linestr, "\n");
8952             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
8953             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
8954             PL_last_lop = PL_last_uni = NULL;
8955             if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
8956                 update_debugger_info(PL_linestr, NULL, 0);
8957             goto retry;
8958         }
8959         if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY)
8960             return tok;
8961         goto retry_bufptr;
8962
8963     case '\r':
8964 #ifdef PERL_STRICT_CR
8965         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
8966         Perl_croak(aTHX_
8967       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
8968 #endif
8969     case ' ': case '\t': case '\f': case '\v':
8970         s++;
8971         goto retry;
8972
8973     case '#':
8974     case '\n': {
8975         const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s);
8976         if (needs_semicolon)
8977             TOKEN(PERLY_SEMICOLON);
8978         else
8979             goto retry;
8980     }
8981
8982     case '-':
8983         return yyl_hyphen(aTHX_ s);
8984
8985     case '+':
8986         return yyl_plus(aTHX_ s);
8987
8988     case '*':
8989         return yyl_star(aTHX_ s);
8990
8991     case '%':
8992         return yyl_percent(aTHX_ s);
8993
8994     case '^':
8995         return yyl_caret(aTHX_ s);
8996
8997     case '[':
8998         return yyl_leftsquare(aTHX_ s);
8999
9000     case '~':
9001         return yyl_tilde(aTHX_ s);
9002
9003     case ',':
9004         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9005             TOKEN(0);
9006         s++;
9007         OPERATOR(PERLY_COMMA);
9008     case ':':
9009         if (s[1] == ':')
9010             return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
9011         return yyl_colon(aTHX_ s + 1);
9012
9013     case '(':
9014         return yyl_leftparen(aTHX_ s + 1);
9015
9016     case ';':
9017         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
9018             TOKEN(0);
9019         CLINE;
9020         s++;
9021         PL_expect = XSTATE;
9022         TOKEN(PERLY_SEMICOLON);
9023
9024     case ')':
9025         return yyl_rightparen(aTHX_ s);
9026
9027     case ']':
9028         return yyl_rightsquare(aTHX_ s);
9029
9030     case '{':
9031         return yyl_leftcurly(aTHX_ s + 1, 0);
9032
9033     case '}':
9034         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
9035             TOKEN(0);
9036         return yyl_rightcurly(aTHX_ s, 0);
9037
9038     case '&':
9039         return yyl_ampersand(aTHX_ s);
9040
9041     case '|':
9042         return yyl_verticalbar(aTHX_ s);
9043
9044     case '=':
9045         if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n')
9046             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "====="))
9047         {
9048             s = vcs_conflict_marker(s + 7);
9049             goto retry;
9050         }
9051
9052         s++;
9053         {
9054             const char tmp = *s++;
9055             if (tmp == '=') {
9056                 if (!PL_lex_allbrackets
9057                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
9058                 {
9059                     s -= 2;
9060                     TOKEN(0);
9061                 }
9062                 ChEop(OP_EQ);
9063             }
9064             if (tmp == '>') {
9065                 if (!PL_lex_allbrackets
9066                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9067                 {
9068                     s -= 2;
9069                     TOKEN(0);
9070                 }
9071                 OPERATOR(PERLY_COMMA);
9072             }
9073             if (tmp == '~')
9074                 PMop(OP_MATCH);
9075             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
9076                 && memCHRs("+-*/%.^&|<",tmp))
9077                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9078                             "Reversed %c= operator",(int)tmp);
9079             s--;
9080             if (PL_expect == XSTATE
9081                 && isALPHA(tmp)
9082                 && (s == PL_linestart+1 || s[-2] == '\n') )
9083             {
9084                 if (   (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
9085                     || PL_lex_state != LEX_NORMAL)
9086                 {
9087                     d = PL_bufend;
9088                     while (s < d) {
9089                         if (*s++ == '\n') {
9090                             incline(s, PL_bufend);
9091                             if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
9092                             {
9093                                 s = (char *) memchr(s,'\n', d - s);
9094                                 if (s)
9095                                     s++;
9096                                 else
9097                                     s = d;
9098                                 incline(s, PL_bufend);
9099                                 goto retry;
9100                             }
9101                         }
9102                     }
9103                     goto retry;
9104                 }
9105                 s = PL_bufend;
9106                 PL_parser->in_pod = 1;
9107                 goto retry;
9108             }
9109         }
9110         if (PL_expect == XBLOCK) {
9111             const char *t = s;
9112 #ifdef PERL_STRICT_CR
9113             while (SPACE_OR_TAB(*t))
9114 #else
9115             while (SPACE_OR_TAB(*t) || *t == '\r')
9116 #endif
9117                 t++;
9118             if (*t == '\n' || *t == '#') {
9119                 ENTER_with_name("lex_format");
9120                 SAVEI8(PL_parser->form_lex_state);
9121                 SAVEI32(PL_lex_formbrack);
9122                 PL_parser->form_lex_state = PL_lex_state;
9123                 PL_lex_formbrack = PL_lex_brackets + 1;
9124                 PL_parser->sub_error_count = PL_error_count;
9125                 return yyl_leftcurly(aTHX_ s, 1);
9126             }
9127         }
9128         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
9129             s--;
9130             TOKEN(0);
9131         }
9132         pl_yylval.ival = 0;
9133         OPERATOR(ASSIGNOP);
9134
9135         case '!':
9136         return yyl_bang(aTHX_ s + 1);
9137
9138     case '<':
9139         if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n')
9140             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<"))
9141         {
9142             s = vcs_conflict_marker(s + 7);
9143             goto retry;
9144         }
9145         return yyl_leftpointy(aTHX_ s);
9146
9147     case '>':
9148         if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n')
9149             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>"))
9150         {
9151             s = vcs_conflict_marker(s + 7);
9152             goto retry;
9153         }
9154         return yyl_rightpointy(aTHX_ s + 1);
9155
9156     case '$':
9157         return yyl_dollar(aTHX_ s);
9158
9159     case '@':
9160         return yyl_snail(aTHX_ s);
9161
9162     case '/':                   /* may be division, defined-or, or pattern */
9163         return yyl_slash(aTHX_ s);
9164
9165      case '?':                  /* conditional */
9166         s++;
9167         if (!PL_lex_allbrackets
9168             && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
9169         {
9170             s--;
9171             TOKEN(0);
9172         }
9173         PL_lex_allbrackets++;
9174         OPERATOR(PERLY_QUESTION_MARK);
9175
9176     case '.':
9177         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
9178 #ifdef PERL_STRICT_CR
9179             && s[1] == '\n'
9180 #else
9181             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
9182 #endif
9183             && (s == PL_linestart || s[-1] == '\n') )
9184         {
9185             PL_expect = XSTATE;
9186             /* formbrack==2 means dot seen where arguments expected */
9187             return yyl_rightcurly(aTHX_ s, 2);
9188         }
9189         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
9190             s += 3;
9191             OPERATOR(YADAYADA);
9192         }
9193         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
9194             char tmp = *s++;
9195             if (*s == tmp) {
9196                 if (!PL_lex_allbrackets
9197                     && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
9198                 {
9199                     s--;
9200                     TOKEN(0);
9201                 }
9202                 s++;
9203                 if (*s == tmp) {
9204                     s++;
9205                     pl_yylval.ival = OPf_SPECIAL;
9206                 }
9207                 else
9208                     pl_yylval.ival = 0;
9209                 OPERATOR(DOTDOT);
9210             }
9211             if (*s == '=' && !PL_lex_allbrackets
9212                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9213             {
9214                 s--;
9215                 TOKEN(0);
9216             }
9217             Aop(OP_CONCAT);
9218         }
9219         /* FALLTHROUGH */
9220     case '0': case '1': case '2': case '3': case '4':
9221     case '5': case '6': case '7': case '8': case '9':
9222         s = scan_num(s, &pl_yylval);
9223         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
9224         if (PL_expect == XOPERATOR)
9225             no_op("Number",s);
9226         TERM(THING);
9227
9228     case '\'':
9229         return yyl_sglquote(aTHX_ s);
9230
9231     case '"':
9232         return yyl_dblquote(aTHX_ s);
9233
9234     case '`':
9235         return yyl_backtick(aTHX_ s);
9236
9237     case '\\':
9238         return yyl_backslash(aTHX_ s + 1);
9239
9240     case 'v':
9241         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
9242             char *start = s + 2;
9243             while (isDIGIT(*start) || *start == '_')
9244                 start++;
9245             if (*start == '.' && isDIGIT(start[1])) {
9246                 s = scan_num(s, &pl_yylval);
9247                 TERM(THING);
9248             }
9249             else if ((*start == ':' && start[1] == ':')
9250                      || (PL_expect == XSTATE && *start == ':')) {
9251                 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9252                     return tok;
9253                 goto retry_bufptr;
9254             }
9255             else if (PL_expect == XSTATE) {
9256                 d = start;
9257                 while (d < PL_bufend && isSPACE(*d)) d++;
9258                 if (*d == ':') {
9259                     if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9260                         return tok;
9261                     goto retry_bufptr;
9262                 }
9263             }
9264             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
9265             if (!isALPHA(*start) && (PL_expect == XTERM
9266                         || PL_expect == XREF || PL_expect == XSTATE
9267                         || PL_expect == XTERMORDORDOR)) {
9268                 GV *const gv = gv_fetchpvn_flags(s, start - s,
9269                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
9270                 if (!gv) {
9271                     s = scan_num(s, &pl_yylval);
9272                     TERM(THING);
9273                 }
9274             }
9275         }
9276         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9277             return tok;
9278         goto retry_bufptr;
9279
9280     case 'x':
9281         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
9282             s++;
9283             Mop(OP_REPEAT);
9284         }
9285         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9286             return tok;
9287         goto retry_bufptr;
9288
9289     case '_':
9290     case 'a': case 'A':
9291     case 'b': case 'B':
9292     case 'c': case 'C':
9293     case 'd': case 'D':
9294     case 'e': case 'E':
9295     case 'f': case 'F':
9296     case 'g': case 'G':
9297     case 'h': case 'H':
9298     case 'i': case 'I':
9299     case 'j': case 'J':
9300     case 'k': case 'K':
9301     case 'l': case 'L':
9302     case 'm': case 'M':
9303     case 'n': case 'N':
9304     case 'o': case 'O':
9305     case 'p': case 'P':
9306     case 'q': case 'Q':
9307     case 'r': case 'R':
9308     case 's': case 'S':
9309     case 't': case 'T':
9310     case 'u': case 'U':
9311               case 'V':
9312     case 'w': case 'W':
9313               case 'X':
9314     case 'y': case 'Y':
9315     case 'z': case 'Z':
9316         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9317             return tok;
9318         goto retry_bufptr;
9319     }
9320 }
9321
9322
9323 /*
9324   yylex
9325
9326   Works out what to call the token just pulled out of the input
9327   stream.  The yacc parser takes care of taking the ops we return and
9328   stitching them into a tree.
9329
9330   Returns:
9331     The type of the next token
9332
9333   Structure:
9334       Check if we have already built the token; if so, use it.
9335       Switch based on the current state:
9336           - if we have a case modifier in a string, deal with that
9337           - handle other cases of interpolation inside a string
9338           - scan the next line if we are inside a format
9339       In the normal state, switch on the next character:
9340           - default:
9341             if alphabetic, go to key lookup
9342             unrecognized character - croak
9343           - 0/4/26: handle end-of-line or EOF
9344           - cases for whitespace
9345           - \n and #: handle comments and line numbers
9346           - various operators, brackets and sigils
9347           - numbers
9348           - quotes
9349           - 'v': vstrings (or go to key lookup)
9350           - 'x' repetition operator (or go to key lookup)
9351           - other ASCII alphanumerics (key lookup begins here):
9352               word before => ?
9353               keyword plugin
9354               scan built-in keyword (but do nothing with it yet)
9355               check for statement label
9356               check for lexical subs
9357                   return yyl_just_a_word if there is one
9358               see whether built-in keyword is overridden
9359               switch on keyword number:
9360                   - default: return yyl_just_a_word:
9361                       not a built-in keyword; handle bareword lookup
9362                       disambiguate between method and sub call
9363                       fall back to bareword
9364                   - cases for built-in keywords
9365 */
9366
9367 int
9368 Perl_yylex(pTHX)
9369 {
9370     char *s = PL_bufptr;
9371
9372     if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
9373         const U8* first_bad_char_loc;
9374         if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
9375                                                         PL_bufend - PL_bufptr,
9376                                                         &first_bad_char_loc)))
9377         {
9378             _force_out_malformed_utf8_message(first_bad_char_loc,
9379                                               (U8 *) PL_bufend,
9380                                               0,
9381                                               1 /* 1 means die */ );
9382             NOT_REACHED; /* NOTREACHED */
9383         }
9384         PL_parser->recheck_utf8_validity = FALSE;
9385     }
9386     DEBUG_T( {
9387         SV* tmp = newSVpvs("");
9388         PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
9389             (IV)CopLINE(PL_curcop),
9390             lex_state_names[PL_lex_state],
9391             exp_name[PL_expect],
9392             pv_display(tmp, s, strlen(s), 0, 60));
9393         SvREFCNT_dec(tmp);
9394     } );
9395
9396     /* when we've already built the next token, just pull it out of the queue */
9397     if (PL_nexttoke) {
9398         PL_nexttoke--;
9399         pl_yylval = PL_nextval[PL_nexttoke];
9400         {
9401             I32 next_type;
9402             next_type = PL_nexttype[PL_nexttoke];
9403             if (next_type & (7<<24)) {
9404                 if (next_type & (1<<24)) {
9405                     if (PL_lex_brackets > 100)
9406                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
9407                     PL_lex_brackstack[PL_lex_brackets++] =
9408                         (char) ((U8) (next_type >> 16));
9409                 }
9410                 if (next_type & (2<<24))
9411                     PL_lex_allbrackets++;
9412                 if (next_type & (4<<24))
9413                     PL_lex_allbrackets--;
9414                 next_type &= 0xffff;
9415             }
9416             return REPORT(next_type == 'p' ? pending_ident() : next_type);
9417         }
9418     }
9419
9420     switch (PL_lex_state) {
9421     case LEX_NORMAL:
9422     case LEX_INTERPNORMAL:
9423         break;
9424
9425     /* interpolated case modifiers like \L \U, including \Q and \E.
9426        when we get here, PL_bufptr is at the \
9427     */
9428     case LEX_INTERPCASEMOD:
9429         /* handle \E or end of string */
9430         return yyl_interpcasemod(aTHX_ s);
9431
9432     case LEX_INTERPPUSH:
9433         return REPORT(sublex_push());
9434
9435     case LEX_INTERPSTART:
9436         if (PL_bufptr == PL_bufend)
9437             return REPORT(sublex_done());
9438         DEBUG_T({
9439             if(*PL_bufptr != '(')
9440                 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
9441         });
9442         PL_expect = XTERM;
9443         /* for /@a/, we leave the joining for the regex engine to do
9444          * (unless we're within \Q etc) */
9445         PL_lex_dojoin = (*PL_bufptr == '@'
9446                             && (!PL_lex_inpat || PL_lex_casemods));
9447         PL_lex_state = LEX_INTERPNORMAL;
9448         if (PL_lex_dojoin) {
9449             NEXTVAL_NEXTTOKE.ival = 0;
9450             force_next(PERLY_COMMA);
9451             force_ident("\"", PERLY_DOLLAR);
9452             NEXTVAL_NEXTTOKE.ival = 0;
9453             force_next(PERLY_DOLLAR);
9454             NEXTVAL_NEXTTOKE.ival = 0;
9455             force_next((2<<24)|PERLY_PAREN_OPEN);
9456             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
9457             force_next(FUNC);
9458         }
9459         /* Convert (?{...}) and friends to 'do {...}' */
9460         if (PL_lex_inpat && *PL_bufptr == '(') {
9461             PL_parser->lex_shared->re_eval_start = PL_bufptr;
9462             PL_bufptr += 2;
9463             if (*PL_bufptr != '{')
9464                 PL_bufptr++;
9465             PL_expect = XTERMBLOCK;
9466             force_next(DO);
9467         }
9468
9469         if (PL_lex_starts++) {
9470             s = PL_bufptr;
9471             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9472             if (!PL_lex_casemods && PL_lex_inpat)
9473                 TOKEN(PERLY_COMMA);
9474             else
9475                 AopNOASSIGN(OP_CONCAT);
9476         }
9477         return yylex();
9478
9479     case LEX_INTERPENDMAYBE:
9480         if (intuit_more(PL_bufptr, PL_bufend)) {
9481             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
9482             break;
9483         }
9484         /* FALLTHROUGH */
9485
9486     case LEX_INTERPEND:
9487         if (PL_lex_dojoin) {
9488             const U8 dojoin_was = PL_lex_dojoin;
9489             PL_lex_dojoin = FALSE;
9490             PL_lex_state = LEX_INTERPCONCAT;
9491             PL_lex_allbrackets--;
9492             return REPORT(dojoin_was == 1 ? (int)PERLY_PAREN_CLOSE : (int)POSTJOIN);
9493         }
9494         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
9495             && SvEVALED(PL_lex_repl))
9496         {
9497             if (PL_bufptr != PL_bufend)
9498                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
9499             PL_lex_repl = NULL;
9500         }
9501         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
9502            re_eval_str.  If the here-doc body’s length equals the previous
9503            value of re_eval_start, re_eval_start will now be null.  So
9504            check re_eval_str as well. */
9505         if (PL_parser->lex_shared->re_eval_start
9506          || PL_parser->lex_shared->re_eval_str) {
9507             SV *sv;
9508             if (*PL_bufptr != ')')
9509                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
9510             PL_bufptr++;
9511             /* having compiled a (?{..}) expression, return the original
9512              * text too, as a const */
9513             if (PL_parser->lex_shared->re_eval_str) {
9514                 sv = PL_parser->lex_shared->re_eval_str;
9515                 PL_parser->lex_shared->re_eval_str = NULL;
9516                 SvCUR_set(sv,
9517                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
9518                 SvPV_shrink_to_cur(sv);
9519             }
9520             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
9521                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
9522             NEXTVAL_NEXTTOKE.opval =
9523                     newSVOP(OP_CONST, 0,
9524                                  sv);
9525             force_next(THING);
9526             PL_parser->lex_shared->re_eval_start = NULL;
9527             PL_expect = XTERM;
9528             return REPORT(PERLY_COMMA);
9529         }
9530
9531         /* FALLTHROUGH */
9532     case LEX_INTERPCONCAT:
9533 #ifdef DEBUGGING
9534         if (PL_lex_brackets)
9535             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
9536                        (long) PL_lex_brackets);
9537 #endif
9538         if (PL_bufptr == PL_bufend)
9539             return REPORT(sublex_done());
9540
9541         /* m'foo' still needs to be parsed for possible (?{...}) */
9542         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
9543             SV *sv = newSVsv(PL_linestr);
9544             sv = tokeq(sv);
9545             pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
9546             s = PL_bufend;
9547         }
9548         else {
9549             int save_error_count = PL_error_count;
9550
9551             s = scan_const(PL_bufptr);
9552
9553             /* Set flag if this was a pattern and there were errors.  op.c will
9554              * refuse to compile a pattern with this flag set.  Otherwise, we
9555              * could get segfaults, etc. */
9556             if (PL_lex_inpat && PL_error_count > save_error_count) {
9557                 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
9558             }
9559             if (*s == '\\')
9560                 PL_lex_state = LEX_INTERPCASEMOD;
9561             else
9562                 PL_lex_state = LEX_INTERPSTART;
9563         }
9564
9565         if (s != PL_bufptr) {
9566             NEXTVAL_NEXTTOKE = pl_yylval;
9567             PL_expect = XTERM;
9568             force_next(THING);
9569             if (PL_lex_starts++) {
9570                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9571                 if (!PL_lex_casemods && PL_lex_inpat)
9572                     TOKEN(PERLY_COMMA);
9573                 else
9574                     AopNOASSIGN(OP_CONCAT);
9575             }
9576             else {
9577                 PL_bufptr = s;
9578                 return yylex();
9579             }
9580         }
9581
9582         return yylex();
9583     case LEX_FORMLINE:
9584         if (PL_parser->sub_error_count != PL_error_count) {
9585             /* There was an error parsing a formline, which tends to
9586                mess up the parser.
9587                Unlike interpolated sub-parsing, we can't treat any of
9588                these as recoverable, so no need to check sub_no_recover.
9589             */
9590             yyquit();
9591         }
9592         assert(PL_lex_formbrack);
9593         s = scan_formline(PL_bufptr);
9594         if (!PL_lex_formbrack)
9595             return yyl_rightcurly(aTHX_ s, 1);
9596         PL_bufptr = s;
9597         return yylex();
9598     }
9599
9600     /* We really do *not* want PL_linestr ever becoming a COW. */
9601     assert (!SvIsCOW(PL_linestr));
9602     s = PL_bufptr;
9603     PL_oldoldbufptr = PL_oldbufptr;
9604     PL_oldbufptr = s;
9605
9606     if (PL_in_my == KEY_sigvar) {
9607         PL_parser->saw_infix_sigil = 0;
9608         return yyl_sigvar(aTHX_ s);
9609     }
9610
9611     {
9612         /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil.
9613            On its return, we then need to set it to indicate whether the token
9614            we just encountered was an infix operator that (if we hadn't been
9615            expecting an operator) have been a sigil.
9616         */
9617         bool expected_operator = (PL_expect == XOPERATOR);
9618         int ret = yyl_try(aTHX_ s);
9619         switch (pl_yylval.ival) {
9620         case OP_BIT_AND:
9621         case OP_MODULO:
9622         case OP_MULTIPLY:
9623         case OP_NBIT_AND:
9624             if (expected_operator) {
9625                 PL_parser->saw_infix_sigil = 1;
9626                 break;
9627             }
9628             /* FALLTHROUGH */
9629         default:
9630             PL_parser->saw_infix_sigil = 0;
9631         }
9632         return ret;
9633     }
9634 }
9635
9636
9637 /*
9638   S_pending_ident
9639
9640   Looks up an identifier in the pad or in a package
9641
9642   PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
9643   rather than a plain pad var.
9644
9645   Returns:
9646     PRIVATEREF if this is a lexical name.
9647     BAREWORD   if this belongs to a package.
9648
9649   Structure:
9650       if we're in a my declaration
9651           croak if they tried to say my($foo::bar)
9652           build the ops for a my() declaration
9653       if it's an access to a my() variable
9654           build ops for access to a my() variable
9655       if in a dq string, and they've said @foo and we can't find @foo
9656           warn
9657       build ops for a bareword
9658 */
9659
9660 static int
9661 S_pending_ident(pTHX)
9662 {
9663     PADOFFSET tmp = 0;
9664     const char pit = (char)pl_yylval.ival;
9665     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9666     /* All routes through this function want to know if there is a colon.  */
9667     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9668
9669     DEBUG_T({ PerlIO_printf(Perl_debug_log,
9670           "### Pending identifier '%s'\n", PL_tokenbuf); });
9671     assert(tokenbuf_len >= 2);
9672
9673     /* if we're in a my(), we can't allow dynamics here.
9674        $foo'bar has already been turned into $foo::bar, so
9675        just check for colons.
9676
9677        if it's a legal name, the OP is a PADANY.
9678     */
9679     if (PL_in_my) {
9680         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
9681             if (has_colon)
9682                 /* diag_listed_as: No package name allowed for variable %s
9683                                    in "our" */
9684                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9685                                   "%s %s in \"our\"",
9686                                   *PL_tokenbuf=='&' ? "subroutine" : "variable",
9687                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9688             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9689         }
9690         else {
9691             OP *o;
9692             if (has_colon) {
9693                 /* "my" variable %s can't be in a package */
9694                 /* PL_no_myglob is constant */
9695                 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
9696                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9697                             PL_in_my == KEY_my ? "my" : "state",
9698                             *PL_tokenbuf == '&' ? "subroutine" : "variable",
9699                             PL_tokenbuf),
9700                             UTF ? SVf_UTF8 : 0);
9701                 GCC_DIAG_RESTORE_STMT;
9702             }
9703
9704             if (PL_in_my == KEY_sigvar) {
9705                 /* A signature 'padop' needs in addition, an op_first to
9706                  * point to a child sigdefelem, and an extra field to hold
9707                  * the signature index. We can achieve both by using an
9708                  * UNOP_AUX and (ab)using the op_aux field to hold the
9709                  * index. If we ever need more fields, use a real malloced
9710                  * aux strut instead.
9711                  */
9712                 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
9713                                     INT2PTR(UNOP_AUX_item *,
9714                                         (PL_parser->sig_elems)));
9715                 o->op_private |= (  PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
9716                                   : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
9717                                   :                         OPpARGELEM_HV);
9718             }
9719             else
9720                 o = newOP(OP_PADANY, 0);
9721             o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9722                                                         UTF ? SVf_UTF8 : 0);
9723             if (PL_in_my == KEY_sigvar)
9724                 PL_in_my = 0;
9725
9726             pl_yylval.opval = o;
9727             return PRIVATEREF;
9728         }
9729     }
9730
9731     /*
9732        build the ops for accesses to a my() variable.
9733     */
9734
9735     if (!has_colon) {
9736         if (!PL_in_my)
9737             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9738                                  0);
9739         if (tmp != NOT_IN_PAD) {
9740             /* might be an "our" variable" */
9741             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9742                 /* build ops for a bareword */
9743                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
9744                 HEK * const stashname = HvNAME_HEK(stash);
9745                 SV *  const sym = newSVhek(stashname);
9746                 sv_catpvs(sym, "::");
9747                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9748                 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9749                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9750                 if (pit != '&')
9751                   gv_fetchsv(sym,
9752                     GV_ADDMULTI,
9753                     ((PL_tokenbuf[0] == '$') ? SVt_PV
9754                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9755                      : SVt_PVHV));
9756                 return BAREWORD;
9757             }
9758
9759             pl_yylval.opval = newOP(OP_PADANY, 0);
9760             pl_yylval.opval->op_targ = tmp;
9761             return PRIVATEREF;
9762         }
9763     }
9764
9765     /*
9766        Whine if they've said @foo or @foo{key} in a doublequoted string,
9767        and @foo (or %foo) isn't a variable we can find in the symbol
9768        table.
9769     */
9770     if (ckWARN(WARN_AMBIGUOUS)
9771         && pit == '@'
9772         && PL_lex_state != LEX_NORMAL
9773         && !PL_lex_brackets)
9774     {
9775         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9776                                          ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9777                                          SVt_PVAV);
9778         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9779            )
9780         {
9781             /* Downgraded from fatal to warning 20000522 mjd */
9782             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9783                         "Possible unintended interpolation of %" UTF8f
9784                         " in string",
9785                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9786         }
9787     }
9788
9789     /* build ops for a bareword */
9790     pl_yylval.opval = newSVOP(OP_CONST, 0,
9791                                    newSVpvn_flags(PL_tokenbuf + 1,
9792                                                       tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9793                                                       UTF ? SVf_UTF8 : 0 ));
9794     pl_yylval.opval->op_private = OPpCONST_ENTERED;
9795     if (pit != '&')
9796         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9797                      (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9798                      | ( UTF ? SVf_UTF8 : 0 ),
9799                      ((PL_tokenbuf[0] == '$') ? SVt_PV
9800                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9801                       : SVt_PVHV));
9802     return BAREWORD;
9803 }
9804
9805 STATIC void
9806 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9807 {
9808     PERL_ARGS_ASSERT_CHECKCOMMA;
9809
9810     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
9811         if (ckWARN(WARN_SYNTAX)) {
9812             int level = 1;
9813             const char *w;
9814             for (w = s+2; *w && level; w++) {
9815                 if (*w == '(')
9816                     ++level;
9817                 else if (*w == ')')
9818                     --level;
9819             }
9820             while (isSPACE(*w))
9821                 ++w;
9822             /* the list of chars below is for end of statements or
9823              * block / parens, boolean operators (&&, ||, //) and branch
9824              * constructs (or, and, if, until, unless, while, err, for).
9825              * Not a very solid hack... */
9826             if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
9827                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9828                             "%s (...) interpreted as function",name);
9829         }
9830     }
9831     while (s < PL_bufend && isSPACE(*s))
9832         s++;
9833     if (*s == '(')
9834         s++;
9835     while (s < PL_bufend && isSPACE(*s))
9836         s++;
9837     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9838         const char * const w = s;
9839         s += UTF ? UTF8SKIP(s) : 1;
9840         while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9841             s += UTF ? UTF8SKIP(s) : 1;
9842         while (s < PL_bufend && isSPACE(*s))
9843             s++;
9844         if (*s == ',') {
9845             GV* gv;
9846             if (keyword(w, s - w, 0))
9847                 return;
9848
9849             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9850             if (gv && GvCVu(gv))
9851                 return;
9852             if (s - w <= 254) {
9853                 PADOFFSET off;
9854                 char tmpbuf[256];
9855                 Copy(w, tmpbuf+1, s - w, char);
9856                 *tmpbuf = '&';
9857                 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9858                 if (off != NOT_IN_PAD) return;
9859             }
9860             Perl_croak(aTHX_ "No comma allowed after %s", what);
9861         }
9862     }
9863 }
9864
9865 /* S_new_constant(): do any overload::constant lookup.
9866
9867    Either returns sv, or mortalizes/frees sv and returns a new SV*.
9868    Best used as sv=new_constant(..., sv, ...).
9869    If s, pv are NULL, calls subroutine with one argument,
9870    and <type> is used with error messages only.
9871    <type> is assumed to be well formed UTF-8.
9872
9873    If error_msg is not NULL, *error_msg will be set to any error encountered.
9874    Otherwise yyerror() will be used to output it */
9875
9876 STATIC SV *
9877 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9878                SV *sv, SV *pv, const char *type, STRLEN typelen,
9879                const char ** error_msg)
9880 {
9881     dSP;
9882     HV * table = GvHV(PL_hintgv);                /* ^H */
9883     SV *res;
9884     SV *errsv = NULL;
9885     SV **cvp;
9886     SV *cv, *typesv;
9887     const char *why1 = "", *why2 = "", *why3 = "";
9888     const char * optional_colon = ":";  /* Only some messages have a colon */
9889     char *msg;
9890
9891     PERL_ARGS_ASSERT_NEW_CONSTANT;
9892     /* We assume that this is true: */
9893     assert(type || s);
9894
9895     sv_2mortal(sv);                     /* Parent created it permanently */
9896
9897     if (   ! table
9898         || ! (PL_hints & HINT_LOCALIZE_HH))
9899     {
9900         why1 = "unknown";
9901         optional_colon = "";
9902         goto report;
9903     }
9904
9905     cvp = hv_fetch(table, key, keylen, FALSE);
9906     if (!cvp || !SvOK(*cvp)) {
9907         why1 = "$^H{";
9908         why2 = key;
9909         why3 = "} is not defined";
9910         goto report;
9911     }
9912
9913     cv = *cvp;
9914     if (!pv && s)
9915         pv = newSVpvn_flags(s, len, SVs_TEMP);
9916     if (type && pv)
9917         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9918     else
9919         typesv = &PL_sv_undef;
9920
9921     PUSHSTACKi(PERLSI_OVERLOAD);
9922     ENTER ;
9923     SAVETMPS;
9924
9925     PUSHMARK(SP) ;
9926     EXTEND(sp, 3);
9927     if (pv)
9928         PUSHs(pv);
9929     PUSHs(sv);
9930     if (pv)
9931         PUSHs(typesv);
9932     PUTBACK;
9933     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9934
9935     SPAGAIN ;
9936
9937     /* Check the eval first */
9938     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9939         STRLEN errlen;
9940         const char * errstr;
9941         sv_catpvs(errsv, "Propagated");
9942         errstr = SvPV_const(errsv, errlen);
9943         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9944         (void)POPs;
9945         res = SvREFCNT_inc_simple_NN(sv);
9946     }
9947     else {
9948         res = POPs;
9949         SvREFCNT_inc_simple_void_NN(res);
9950     }
9951
9952     PUTBACK ;
9953     FREETMPS ;
9954     LEAVE ;
9955     POPSTACK;
9956
9957     if (SvOK(res)) {
9958         return res;
9959     }
9960
9961     sv = res;
9962     (void)sv_2mortal(sv);
9963
9964     why1 = "Call to &{$^H{";
9965     why2 = key;
9966     why3 = "}} did not return a defined value";
9967
9968   report:
9969
9970     msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s",
9971                         (int)(type ? typelen : len),
9972                         (type ? type: s),
9973                         optional_colon,
9974                         why1, why2, why3);
9975     if (error_msg) {
9976         *error_msg = msg;
9977     }
9978     else {
9979         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9980     }
9981     return SvREFCNT_inc_simple_NN(sv);
9982 }
9983
9984 PERL_STATIC_INLINE void
9985 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9986                     bool is_utf8, bool check_dollar, bool tick_warn)
9987 {
9988     int saw_tick = 0;
9989     const char *olds = *s;
9990     PERL_ARGS_ASSERT_PARSE_IDENT;
9991
9992     while (*s < PL_bufend) {
9993         if (*d >= e)
9994             Perl_croak(aTHX_ "%s", ident_too_long);
9995         if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9996              /* The UTF-8 case must come first, otherwise things
9997              * like c\N{COMBINING TILDE} would start failing, as the
9998              * isWORDCHAR_A case below would gobble the 'c' up.
9999              */
10000
10001             char *t = *s + UTF8SKIP(*s);
10002             while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
10003                 t += UTF8SKIP(t);
10004             }
10005             if (*d + (t - *s) > e)
10006                 Perl_croak(aTHX_ "%s", ident_too_long);
10007             Copy(*s, *d, t - *s, char);
10008             *d += t - *s;
10009             *s = t;
10010         }
10011         else if ( isWORDCHAR_A(**s) ) {
10012             do {
10013                 *(*d)++ = *(*s)++;
10014             } while (isWORDCHAR_A(**s) && *d < e);
10015         }
10016         else if (   allow_package
10017                  && **s == '\''
10018                  && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
10019         {
10020             *(*d)++ = ':';
10021             *(*d)++ = ':';
10022             (*s)++;
10023             saw_tick++;
10024         }
10025         else if (allow_package && **s == ':' && (*s)[1] == ':'
10026            /* Disallow things like Foo::$bar. For the curious, this is
10027             * the code path that triggers the "Bad name after" warning
10028             * when looking for barewords.
10029             */
10030            && !(check_dollar && (*s)[2] == '$')) {
10031             *(*d)++ = *(*s)++;
10032             *(*d)++ = *(*s)++;
10033         }
10034         else
10035             break;
10036     }
10037     if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
10038               && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
10039         char *this_d;
10040         char *d2;
10041         Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
10042         d2 = this_d;
10043         SAVEFREEPV(this_d);
10044         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10045                          "Old package separator used in string");
10046         if (olds[-1] == '#')
10047             *d2++ = olds[-2];
10048         *d2++ = olds[-1];
10049         while (olds < *s) {
10050             if (*olds == '\'') {
10051                 *d2++ = '\\';
10052                 *d2++ = *olds++;
10053             }
10054             else
10055                 *d2++ = *olds++;
10056         }
10057         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10058                          "\t(Did you mean \"%" UTF8f "\" instead?)\n",
10059                           UTF8fARG(is_utf8, d2-this_d, this_d));
10060     }
10061     return;
10062 }
10063
10064 /* Returns a NUL terminated string, with the length of the string written to
10065    *slp
10066    */
10067 char *
10068 Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10069 {
10070     char *d = dest;
10071     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
10072     bool is_utf8 = cBOOL(UTF);
10073
10074     PERL_ARGS_ASSERT_SCAN_WORD;
10075
10076     parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
10077     *d = '\0';
10078     *slp = d - dest;
10079     return s;
10080 }
10081
10082 /* Is the byte 'd' a legal single character identifier name?  'u' is true
10083  * iff Unicode semantics are to be used.  The legal ones are any of:
10084  *  a) all ASCII characters except:
10085  *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
10086  *          2) '{'
10087  *     The final case currently doesn't get this far in the program, so we
10088  *     don't test for it.  If that were to change, it would be ok to allow it.
10089  *  b) When not under Unicode rules, any upper Latin1 character
10090  *  c) Otherwise, when unicode rules are used, all XIDS characters.
10091  *
10092  *      Because all ASCII characters have the same representation whether
10093  *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
10094  *      '{' without knowing if is UTF-8 or not. */
10095 #define VALID_LEN_ONE_IDENT(s, e, is_utf8)                                  \
10096     (isGRAPH_A(*(s)) || ((is_utf8)                                          \
10097                          ? isIDFIRST_utf8_safe(s, e)                        \
10098                          : (isGRAPH_L1(*s)                                  \
10099                             && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
10100
10101 STATIC char *
10102 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
10103 {
10104     I32 herelines = PL_parser->herelines;
10105     SSize_t bracket = -1;
10106     char funny = *s++;
10107     char *d = dest;
10108     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
10109     bool is_utf8 = cBOOL(UTF);
10110     I32 orig_copline = 0, tmp_copline = 0;
10111
10112     PERL_ARGS_ASSERT_SCAN_IDENT;
10113
10114     if (isSPACE(*s) || !*s)
10115         s = skipspace(s);
10116     if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
10117         bool is_zero= *s == '0' ? TRUE : FALSE;
10118         char *digit_start= d;
10119         *d++ = *s++;
10120         while (s < PL_bufend && isDIGIT(*s)) {
10121             if (d >= e)
10122                 Perl_croak(aTHX_ "%s", ident_too_long);
10123             *d++ = *s++;
10124         }
10125         if (is_zero && d - digit_start > 1)
10126             Perl_croak(aTHX_ ident_var_zero_multi_digit);
10127     }
10128     else {  /* See if it is a "normal" identifier */
10129         parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
10130     }
10131     *d = '\0';
10132     d = dest;
10133     if (*d) {
10134         /* Either a digit variable, or parse_ident() found an identifier
10135            (anything valid as a bareword), so job done and return.  */
10136         if (PL_lex_state != LEX_NORMAL)
10137             PL_lex_state = LEX_INTERPENDMAYBE;
10138         return s;
10139     }
10140
10141     /* Here, it is not a run-of-the-mill identifier name */
10142
10143     if (*s == '$' && s[1]
10144         && (   isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
10145             || isDIGIT_A((U8)s[1])
10146             || s[1] == '$'
10147             || s[1] == '{'
10148             || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
10149     {
10150         /* Dereferencing a value in a scalar variable.
10151            The alternatives are different syntaxes for a scalar variable.
10152            Using ' as a leading package separator isn't allowed. :: is.   */
10153         return s;
10154     }
10155     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
10156     if (*s == '{') {
10157         bracket = s - SvPVX(PL_linestr);
10158         s++;
10159         orig_copline = CopLINE(PL_curcop);
10160         if (s < PL_bufend && isSPACE(*s)) {
10161             s = skipspace(s);
10162         }
10163     }
10164     if ((s <= PL_bufend - ((is_utf8)
10165                           ? UTF8SKIP(s)
10166                           : 1))
10167         && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
10168     {
10169         if (is_utf8) {
10170             const STRLEN skip = UTF8SKIP(s);
10171             STRLEN i;
10172             d[skip] = '\0';
10173             for ( i = 0; i < skip; i++ )
10174                 d[i] = *s++;
10175         }
10176         else {
10177             *d = *s++;
10178             /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
10179             if (isDIGIT(*d)) {
10180                 bool is_zero= *d == '0' ? TRUE : FALSE;
10181                 char *digit_start= d;
10182                 while (s < PL_bufend && isDIGIT(*s)) {
10183                     d++;
10184                     if (d >= e)
10185                         Perl_croak(aTHX_ "%s", ident_too_long);
10186                     *d= *s++;
10187                 }
10188                 if (is_zero && d - digit_start > 1)
10189                     Perl_croak(aTHX_ ident_var_zero_multi_digit);
10190             }
10191             d[1] = '\0';
10192         }
10193     }
10194     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
10195     if (*d == '^' && *s && isCONTROLVAR(*s)) {
10196         *d = toCTRL(*s);
10197         s++;
10198     }
10199     /* Warn about ambiguous code after unary operators if {...} notation isn't
10200        used.  There's no difference in ambiguity; it's merely a heuristic
10201        about when not to warn.  */
10202     else if (ck_uni && bracket == -1)
10203         check_uni();
10204     if (bracket != -1) {
10205         bool skip;
10206         char *s2;
10207         /* If we were processing {...} notation then...  */
10208         if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
10209             || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
10210                  && isWORDCHAR(*s))
10211         ) {
10212             /* note we have to check for a normal identifier first,
10213              * as it handles utf8 symbols, and only after that has
10214              * been ruled out can we look at the caret words */
10215             if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
10216                 /* if it starts as a valid identifier, assume that it is one.
10217                    (the later check for } being at the expected point will trap
10218                    cases where this doesn't pan out.)  */
10219                 d += is_utf8 ? UTF8SKIP(d) : 1;
10220                 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
10221                 *d = '\0';
10222             }
10223             else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
10224                 d++;
10225                 while (isWORDCHAR(*s) && d < e) {
10226                     *d++ = *s++;
10227                 }
10228                 if (d >= e)
10229                     Perl_croak(aTHX_ "%s", ident_too_long);
10230                 *d = '\0';
10231             }
10232             tmp_copline = CopLINE(PL_curcop);
10233             if (s < PL_bufend && isSPACE(*s)) {
10234                 s = skipspace(s);
10235             }
10236             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10237                 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation.  */
10238                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10239                     const char * const brack =
10240                         (const char *)
10241                         ((*s == '[') ? "[...]" : "{...}");
10242                     orig_copline = CopLINE(PL_curcop);
10243                     CopLINE_set(PL_curcop, tmp_copline);
10244    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
10245                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10246                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10247                         funny, dest, brack, funny, dest, brack);
10248                     CopLINE_set(PL_curcop, orig_copline);
10249                 }
10250                 bracket++;
10251                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10252                 PL_lex_allbrackets++;
10253                 return s;
10254             }
10255         }
10256
10257         if ( !tmp_copline )
10258             tmp_copline = CopLINE(PL_curcop);
10259         if ((skip = s < PL_bufend && isSPACE(*s))) {
10260             /* Avoid incrementing line numbers or resetting PL_linestart,
10261                in case we have to back up.  */
10262             STRLEN s_off = s - SvPVX(PL_linestr);
10263             s2 = peekspace(s);
10264             s = SvPVX(PL_linestr) + s_off;
10265         }
10266         else
10267             s2 = s;
10268
10269         /* Expect to find a closing } after consuming any trailing whitespace.
10270          */
10271         if (*s2 == '}') {
10272             /* Now increment line numbers if applicable.  */
10273             if (skip)
10274                 s = skipspace(s);
10275             s++;
10276             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10277                 PL_lex_state = LEX_INTERPEND;
10278                 PL_expect = XREF;
10279             }
10280             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
10281                 if (ckWARN(WARN_AMBIGUOUS)
10282                     && (keyword(dest, d - dest, 0)
10283                         || get_cvn_flags(dest, d - dest, is_utf8
10284                            ? SVf_UTF8
10285                            : 0)))
10286                 {
10287                     SV *tmp = newSVpvn_flags( dest, d - dest,
10288                                         SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
10289                     if (funny == '#')
10290                         funny = '@';
10291                     orig_copline = CopLINE(PL_curcop);
10292                     CopLINE_set(PL_curcop, tmp_copline);
10293                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10294                         "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
10295                         funny, SVfARG(tmp), funny, SVfARG(tmp));
10296                     CopLINE_set(PL_curcop, orig_copline);
10297                 }
10298             }
10299         }
10300         else {
10301             /* Didn't find the closing } at the point we expected, so restore
10302                state such that the next thing to process is the opening { and */
10303             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
10304             CopLINE_set(PL_curcop, orig_copline);
10305             PL_parser->herelines = herelines;
10306             *dest = '\0';
10307             PL_parser->sub_no_recover = TRUE;
10308         }
10309     }
10310     else if (   PL_lex_state == LEX_INTERPNORMAL
10311              && !PL_lex_brackets
10312              && !intuit_more(s, PL_bufend))
10313         PL_lex_state = LEX_INTERPEND;
10314     return s;
10315 }
10316
10317 static bool
10318 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
10319
10320     /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
10321      * found in the parse starting at 's', based on the subset that are valid
10322      * in this context input to this routine in 'valid_flags'. Advances s.
10323      * Returns TRUE if the input should be treated as a valid flag, so the next
10324      * char may be as well; otherwise FALSE. 'charset' should point to a NUL
10325      * upon first call on the current regex.  This routine will set it to any
10326      * charset modifier found.  The caller shouldn't change it.  This way,
10327      * another charset modifier encountered in the parse can be detected as an
10328      * error, as we have decided to allow only one */
10329
10330     const char c = **s;
10331     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
10332
10333     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
10334         if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
10335             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
10336                        UTF ? SVf_UTF8 : 0);
10337             (*s) += charlen;
10338             /* Pretend that it worked, so will continue processing before
10339              * dieing */
10340             return TRUE;
10341         }
10342         return FALSE;
10343     }
10344
10345     switch (c) {
10346
10347         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
10348         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
10349         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
10350         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
10351         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
10352         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
10353         case LOCALE_PAT_MOD:
10354             if (*charset) {
10355                 goto multiple_charsets;
10356             }
10357             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
10358             *charset = c;
10359             break;
10360         case UNICODE_PAT_MOD:
10361             if (*charset) {
10362                 goto multiple_charsets;
10363             }
10364             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
10365             *charset = c;
10366             break;
10367         case ASCII_RESTRICT_PAT_MOD:
10368             if (! *charset) {
10369                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
10370             }
10371             else {
10372
10373                 /* Error if previous modifier wasn't an 'a', but if it was, see
10374                  * if, and accept, a second occurrence (only) */
10375                 if (*charset != 'a'
10376                     || get_regex_charset(*pmfl)
10377                         != REGEX_ASCII_RESTRICTED_CHARSET)
10378                 {
10379                         goto multiple_charsets;
10380                 }
10381                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
10382             }
10383             *charset = c;
10384             break;
10385         case DEPENDS_PAT_MOD:
10386             if (*charset) {
10387                 goto multiple_charsets;
10388             }
10389             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
10390             *charset = c;
10391             break;
10392     }
10393
10394     (*s)++;
10395     return TRUE;
10396
10397     multiple_charsets:
10398         if (*charset != c) {
10399             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
10400         }
10401         else if (c == 'a') {
10402   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
10403             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
10404         }
10405         else {
10406             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
10407         }
10408
10409         /* Pretend that it worked, so will continue processing before dieing */
10410         (*s)++;
10411         return TRUE;
10412 }
10413
10414 STATIC char *
10415 S_scan_pat(pTHX_ char *start, I32 type)
10416 {
10417     PMOP *pm;
10418     char *s;
10419     const char * const valid_flags =
10420         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10421     char charset = '\0';    /* character set modifier */
10422     unsigned int x_mod_count = 0;
10423
10424     PERL_ARGS_ASSERT_SCAN_PAT;
10425
10426     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
10427     if (!s)
10428         Perl_croak(aTHX_ "Search pattern not terminated");
10429
10430     pm = (PMOP*)newPMOP(type, 0);
10431     if (PL_multi_open == '?') {
10432         /* This is the only point in the code that sets PMf_ONCE:  */
10433         pm->op_pmflags |= PMf_ONCE;
10434
10435         /* Hence it's safe to do this bit of PMOP book-keeping here, which
10436            allows us to restrict the list needed by reset to just the ??
10437            matches.  */
10438         assert(type != OP_TRANS);
10439         if (PL_curstash) {
10440             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10441             U32 elements;
10442             if (!mg) {
10443                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10444                                  0);
10445             }
10446             elements = mg->mg_len / sizeof(PMOP**);
10447             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10448             ((PMOP**)mg->mg_ptr) [elements++] = pm;
10449             mg->mg_len = elements * sizeof(PMOP**);
10450             PmopSTASH_set(pm,PL_curstash);
10451         }
10452     }
10453
10454     /* if qr/...(?{..}).../, then need to parse the pattern within a new
10455      * anon CV. False positives like qr/[(?{]/ are harmless */
10456
10457     if (type == OP_QR) {
10458         STRLEN len;
10459         char *e, *p = SvPV(PL_lex_stuff, len);
10460         e = p + len;
10461         for (; p < e; p++) {
10462             if (p[0] == '(' && p[1] == '?'
10463                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
10464             {
10465                 pm->op_pmflags |= PMf_HAS_CV;
10466                 break;
10467             }
10468         }
10469         pm->op_pmflags |= PMf_IS_QR;
10470     }
10471
10472     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
10473                                 &s, &charset, &x_mod_count))
10474     {};
10475     /* issue a warning if /c is specified,but /g is not */
10476     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
10477     {
10478         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
10479                        "Use of /c modifier is meaningless without /g" );
10480     }
10481
10482     PL_lex_op = (OP*)pm;
10483     pl_yylval.ival = OP_MATCH;
10484     return s;
10485 }
10486
10487 STATIC char *
10488 S_scan_subst(pTHX_ char *start)
10489 {
10490     char *s;
10491     PMOP *pm;
10492     I32 first_start;
10493     line_t first_line;
10494     line_t linediff = 0;
10495     I32 es = 0;
10496     char charset = '\0';    /* character set modifier */
10497     unsigned int x_mod_count = 0;
10498     char *t;
10499
10500     PERL_ARGS_ASSERT_SCAN_SUBST;
10501
10502     pl_yylval.ival = OP_NULL;
10503
10504     s = scan_str(start, TRUE, FALSE, FALSE, &t);
10505
10506     if (!s)
10507         Perl_croak(aTHX_ "Substitution pattern not terminated");
10508
10509     s = t;
10510
10511     first_start = PL_multi_start;
10512     first_line = CopLINE(PL_curcop);
10513     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10514     if (!s) {
10515         SvREFCNT_dec_NN(PL_lex_stuff);
10516         PL_lex_stuff = NULL;
10517         Perl_croak(aTHX_ "Substitution replacement not terminated");
10518     }
10519     PL_multi_start = first_start;       /* so whole substitution is taken together */
10520
10521     pm = (PMOP*)newPMOP(OP_SUBST, 0);
10522
10523
10524     while (*s) {
10525         if (*s == EXEC_PAT_MOD) {
10526             s++;
10527             es++;
10528         }
10529         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
10530                                   &s, &charset, &x_mod_count))
10531         {
10532             break;
10533         }
10534     }
10535
10536     if ((pm->op_pmflags & PMf_CONTINUE)) {
10537         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10538     }
10539
10540     if (es) {
10541         SV * const repl = newSVpvs("");
10542
10543         PL_multi_end = 0;
10544         pm->op_pmflags |= PMf_EVAL;
10545         for (; es > 1; es--) {
10546             sv_catpvs(repl, "eval ");
10547         }
10548         sv_catpvs(repl, "do {");
10549         sv_catsv(repl, PL_parser->lex_sub_repl);
10550         sv_catpvs(repl, "}");
10551         SvREFCNT_dec(PL_parser->lex_sub_repl);
10552         PL_parser->lex_sub_repl = repl;
10553     }
10554
10555
10556     linediff = CopLINE(PL_curcop) - first_line;
10557     if (linediff)
10558         CopLINE_set(PL_curcop, first_line);
10559
10560     if (linediff || es) {
10561         /* the IVX field indicates that the replacement string is a s///e;
10562          * the NVX field indicates how many src code lines the replacement
10563          * spreads over */
10564         sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
10565         ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
10566         ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
10567                                                                     cBOOL(es);
10568     }
10569
10570     PL_lex_op = (OP*)pm;
10571     pl_yylval.ival = OP_SUBST;
10572     return s;
10573 }
10574
10575 STATIC char *
10576 S_scan_trans(pTHX_ char *start)
10577 {
10578     char* s;
10579     OP *o;
10580     U8 squash;
10581     U8 del;
10582     U8 complement;
10583     bool nondestruct = 0;
10584     char *t;
10585
10586     PERL_ARGS_ASSERT_SCAN_TRANS;
10587
10588     pl_yylval.ival = OP_NULL;
10589
10590     s = scan_str(start,FALSE,FALSE,FALSE,&t);
10591     if (!s)
10592         Perl_croak(aTHX_ "Transliteration pattern not terminated");
10593
10594     s = t;
10595
10596     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10597     if (!s) {
10598         SvREFCNT_dec_NN(PL_lex_stuff);
10599         PL_lex_stuff = NULL;
10600         Perl_croak(aTHX_ "Transliteration replacement not terminated");
10601     }
10602
10603     complement = del = squash = 0;
10604     while (1) {
10605         switch (*s) {
10606         case 'c':
10607             complement = OPpTRANS_COMPLEMENT;
10608             break;
10609         case 'd':
10610             del = OPpTRANS_DELETE;
10611             break;
10612         case 's':
10613             squash = OPpTRANS_SQUASH;
10614             break;
10615         case 'r':
10616             nondestruct = 1;
10617             break;
10618         default:
10619             goto no_more;
10620         }
10621         s++;
10622     }
10623   no_more:
10624
10625     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
10626     o->op_private &= ~OPpTRANS_ALL;
10627     o->op_private |= del|squash|complement;
10628
10629     PL_lex_op = o;
10630     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
10631
10632
10633     return s;
10634 }
10635
10636 /* scan_heredoc
10637    Takes a pointer to the first < in <<FOO.
10638    Returns a pointer to the byte following <<FOO.
10639
10640    This function scans a heredoc, which involves different methods
10641    depending on whether we are in a string eval, quoted construct, etc.
10642    This is because PL_linestr could containing a single line of input, or
10643    a whole string being evalled, or the contents of the current quote-
10644    like operator.
10645
10646    The two basic methods are:
10647     - Steal lines from the input stream
10648     - Scan the heredoc in PL_linestr and remove it therefrom
10649
10650    In a file scope or filtered eval, the first method is used; in a
10651    string eval, the second.
10652
10653    In a quote-like operator, we have to choose between the two,
10654    depending on where we can find a newline.  We peek into outer lex-
10655    ing scopes until we find one with a newline in it.  If we reach the
10656    outermost lexing scope and it is a file, we use the stream method.
10657    Otherwise it is treated as an eval.
10658 */
10659
10660 STATIC char *
10661 S_scan_heredoc(pTHX_ char *s)
10662 {
10663     I32 op_type = OP_SCALAR;
10664     I32 len;
10665     SV *tmpstr;
10666     char term;
10667     char *d;
10668     char *e;
10669     char *peek;
10670     char *indent = 0;
10671     I32 indent_len = 0;
10672     bool indented = FALSE;
10673     const bool infile = PL_rsfp || PL_parser->filtered;
10674     const line_t origline = CopLINE(PL_curcop);
10675     LEXSHARED *shared = PL_parser->lex_shared;
10676
10677     PERL_ARGS_ASSERT_SCAN_HEREDOC;
10678
10679     s += 2;
10680     d = PL_tokenbuf + 1;
10681     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10682     *PL_tokenbuf = '\n';
10683     peek = s;
10684
10685     if (*peek == '~') {
10686         indented = TRUE;
10687         peek++; s++;
10688     }
10689
10690     while (SPACE_OR_TAB(*peek))
10691         peek++;
10692
10693     if (*peek == '`' || *peek == '\'' || *peek =='"') {
10694         s = peek;
10695         term = *s++;
10696         s = delimcpy(d, e, s, PL_bufend, term, &len);
10697         if (s == PL_bufend)
10698             Perl_croak(aTHX_ "Unterminated delimiter for here document");
10699         d += len;
10700         s++;
10701     }
10702     else {
10703         if (*s == '\\')
10704             /* <<\FOO is equivalent to <<'FOO' */
10705             s++, term = '\'';
10706         else
10707             term = '"';
10708
10709         if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10710             Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
10711
10712         peek = s;
10713
10714         while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
10715             peek += UTF ? UTF8SKIP(peek) : 1;
10716         }
10717
10718         len = (peek - s >= e - d) ? (e - d) : (peek - s);
10719         Copy(s, d, len, char);
10720         s += len;
10721         d += len;
10722     }
10723
10724     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10725         Perl_croak(aTHX_ "Delimiter for here document is too long");
10726
10727     *d++ = '\n';
10728     *d = '\0';
10729     len = d - PL_tokenbuf;
10730
10731 #ifndef PERL_STRICT_CR
10732     d = (char *) memchr(s, '\r', PL_bufend - s);
10733     if (d) {
10734         char * const olds = s;
10735         s = d;
10736         while (s < PL_bufend) {
10737             if (*s == '\r') {
10738                 *d++ = '\n';
10739                 if (*++s == '\n')
10740                     s++;
10741             }
10742             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
10743                 *d++ = *s++;
10744                 s++;
10745             }
10746             else
10747                 *d++ = *s++;
10748         }
10749         *d = '\0';
10750         PL_bufend = d;
10751         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10752         s = olds;
10753     }
10754 #endif
10755
10756     tmpstr = newSV_type(SVt_PVIV);
10757     SvGROW(tmpstr, 80);
10758     if (term == '\'') {
10759         op_type = OP_CONST;
10760         SvIV_set(tmpstr, -1);
10761     }
10762     else if (term == '`') {
10763         op_type = OP_BACKTICK;
10764         SvIV_set(tmpstr, '\\');
10765     }
10766
10767     PL_multi_start = origline + 1 + PL_parser->herelines;
10768     PL_multi_open = PL_multi_close = '<';
10769
10770     /* inside a string eval or quote-like operator */
10771     if (!infile || PL_lex_inwhat) {
10772         SV *linestr;
10773         char *bufend;
10774         char * const olds = s;
10775         PERL_CONTEXT * const cx = CX_CUR();
10776         /* These two fields are not set until an inner lexing scope is
10777            entered.  But we need them set here. */
10778         shared->ls_bufptr  = s;
10779         shared->ls_linestr = PL_linestr;
10780
10781         if (PL_lex_inwhat) {
10782             /* Look for a newline.  If the current buffer does not have one,
10783              peek into the line buffer of the parent lexing scope, going
10784              up as many levels as necessary to find one with a newline
10785              after bufptr.
10786             */
10787             while (!(s = (char *)memchr(
10788                                 (void *)shared->ls_bufptr, '\n',
10789                                 SvEND(shared->ls_linestr)-shared->ls_bufptr
10790                 )))
10791             {
10792                 shared = shared->ls_prev;
10793                 /* shared is only null if we have gone beyond the outermost
10794                    lexing scope.  In a file, we will have broken out of the
10795                    loop in the previous iteration.  In an eval, the string buf-
10796                    fer ends with "\n;", so the while condition above will have
10797                    evaluated to false.  So shared can never be null.  Or so you
10798                    might think.  Odd syntax errors like s;@{<<; can gobble up
10799                    the implicit semicolon at the end of a flie, causing the
10800                    file handle to be closed even when we are not in a string
10801                    eval.  So shared may be null in that case.
10802                    (Closing '>>}' here to balance the earlier open brace for
10803                    editors that look for matched pairs.) */
10804                 if (UNLIKELY(!shared))
10805                     goto interminable;
10806                 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10807                    most lexing scope.  In a file, shared->ls_linestr at that
10808                    level is just one line, so there is no body to steal. */
10809                 if (infile && !shared->ls_prev) {
10810                     s = olds;
10811                     goto streaming;
10812                 }
10813             }
10814         }
10815         else {  /* eval or we've already hit EOF */
10816             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10817             if (!s)
10818                 goto interminable;
10819         }
10820
10821         linestr = shared->ls_linestr;
10822         bufend = SvEND(linestr);
10823         d = s;
10824         if (indented) {
10825             char *myolds = s;
10826
10827             while (s < bufend - len + 1) {
10828                 if (*s++ == '\n')
10829                     ++PL_parser->herelines;
10830
10831                 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
10832                     char *backup = s;
10833                     indent_len = 0;
10834
10835                     /* Only valid if it's preceded by whitespace only */
10836                     while (backup != myolds && --backup >= myolds) {
10837                         if (! SPACE_OR_TAB(*backup)) {
10838                             break;
10839                         }
10840                         indent_len++;
10841                     }
10842
10843                     /* No whitespace or all! */
10844                     if (backup == s || *backup == '\n') {
10845                         Newx(indent, indent_len + 1, char);
10846                         memcpy(indent, backup + 1, indent_len);
10847                         indent[indent_len] = 0;
10848                         s--; /* before our delimiter */
10849                         PL_parser->herelines--; /* this line doesn't count */
10850                         break;
10851                     }
10852                 }
10853             }
10854         }
10855         else {
10856             while (s < bufend - len + 1
10857                    && memNE(s,PL_tokenbuf,len) )
10858             {
10859                 if (*s++ == '\n')
10860                     ++PL_parser->herelines;
10861             }
10862         }
10863
10864         if (s >= bufend - len + 1) {
10865             goto interminable;
10866         }
10867
10868         sv_setpvn(tmpstr,d+1,s-d);
10869         s += len - 1;
10870         /* the preceding stmt passes a newline */
10871         PL_parser->herelines++;
10872
10873         /* s now points to the newline after the heredoc terminator.
10874            d points to the newline before the body of the heredoc.
10875          */
10876
10877         /* We are going to modify linestr in place here, so set
10878            aside copies of the string if necessary for re-evals or
10879            (caller $n)[6]. */
10880         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10881            check shared->re_eval_str. */
10882         if (shared->re_eval_start || shared->re_eval_str) {
10883             /* Set aside the rest of the regexp */
10884             if (!shared->re_eval_str)
10885                 shared->re_eval_str =
10886                        newSVpvn(shared->re_eval_start,
10887                                 bufend - shared->re_eval_start);
10888             shared->re_eval_start -= s-d;
10889         }
10890
10891         if (cxstack_ix >= 0
10892             && CxTYPE(cx) == CXt_EVAL
10893             && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10894             && cx->blk_eval.cur_text == linestr)
10895         {
10896             cx->blk_eval.cur_text = newSVsv(linestr);
10897             cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10898         }
10899
10900         /* Copy everything from s onwards back to d. */
10901         Move(s,d,bufend-s + 1,char);
10902         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10903         /* Setting PL_bufend only applies when we have not dug deeper
10904            into other scopes, because sublex_done sets PL_bufend to
10905            SvEND(PL_linestr). */
10906         if (shared == PL_parser->lex_shared)
10907             PL_bufend = SvEND(linestr);
10908         s = olds;
10909     }
10910     else {
10911         SV *linestr_save;
10912         char *oldbufptr_save;
10913         char *oldoldbufptr_save;
10914       streaming:
10915         SvPVCLEAR(tmpstr);   /* avoid "uninitialized" warning */
10916         term = PL_tokenbuf[1];
10917         len--;
10918         linestr_save = PL_linestr; /* must restore this afterwards */
10919         d = s;                   /* and this */
10920         oldbufptr_save = PL_oldbufptr;
10921         oldoldbufptr_save = PL_oldoldbufptr;
10922         PL_linestr = newSVpvs("");
10923         PL_bufend = SvPVX(PL_linestr);
10924
10925         while (1) {
10926             PL_bufptr = PL_bufend;
10927             CopLINE_set(PL_curcop,
10928                         origline + 1 + PL_parser->herelines);
10929
10930             if (   !lex_next_chunk(LEX_NO_TERM)
10931                 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
10932             {
10933                 /* Simply freeing linestr_save might seem simpler here, as it
10934                    does not matter what PL_linestr points to, since we are
10935                    about to croak; but in a quote-like op, linestr_save
10936                    will have been prospectively freed already, via
10937                    SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
10938                    restore PL_linestr. */
10939                 SvREFCNT_dec_NN(PL_linestr);
10940                 PL_linestr = linestr_save;
10941                 PL_oldbufptr = oldbufptr_save;
10942                 PL_oldoldbufptr = oldoldbufptr_save;
10943                 goto interminable;
10944             }
10945
10946             CopLINE_set(PL_curcop, origline);
10947
10948             if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10949                 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10950                 /* ^That should be enough to avoid this needing to grow:  */
10951                 sv_catpvs(PL_linestr, "\n\0");
10952                 assert(s == SvPVX(PL_linestr));
10953                 PL_bufend = SvEND(PL_linestr);
10954             }
10955
10956             s = PL_bufptr;
10957             PL_parser->herelines++;
10958             PL_last_lop = PL_last_uni = NULL;
10959
10960 #ifndef PERL_STRICT_CR
10961             if (PL_bufend - PL_linestart >= 2) {
10962                 if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10963                     || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10964                 {
10965                     PL_bufend[-2] = '\n';
10966                     PL_bufend--;
10967                     SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10968                 }
10969                 else if (PL_bufend[-1] == '\r')
10970                     PL_bufend[-1] = '\n';
10971             }
10972             else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10973                 PL_bufend[-1] = '\n';
10974 #endif
10975
10976             if (indented && (PL_bufend-s) >= len) {
10977                 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
10978
10979                 if (found) {
10980                     char *backup = found;
10981                     indent_len = 0;
10982
10983                     /* Only valid if it's preceded by whitespace only */
10984                     while (backup != s && --backup >= s) {
10985                         if (! SPACE_OR_TAB(*backup)) {
10986                             break;
10987                         }
10988                         indent_len++;
10989                     }
10990
10991                     /* All whitespace or none! */
10992                     if (backup == found || SPACE_OR_TAB(*backup)) {
10993                         Newx(indent, indent_len + 1, char);
10994                         memcpy(indent, backup, indent_len);
10995                         indent[indent_len] = 0;
10996                         SvREFCNT_dec(PL_linestr);
10997                         PL_linestr = linestr_save;
10998                         PL_linestart = SvPVX(linestr_save);
10999                         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11000                         PL_oldbufptr = oldbufptr_save;
11001                         PL_oldoldbufptr = oldoldbufptr_save;
11002                         s = d;
11003                         break;
11004                     }
11005                 }
11006
11007                 /* Didn't find it */
11008                 sv_catsv(tmpstr,PL_linestr);
11009             }
11010             else {
11011                 if (*s == term && PL_bufend-s >= len
11012                     && memEQ(s,PL_tokenbuf + 1,len))
11013                 {
11014                     SvREFCNT_dec(PL_linestr);
11015                     PL_linestr = linestr_save;
11016                     PL_linestart = SvPVX(linestr_save);
11017                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11018                     PL_oldbufptr = oldbufptr_save;
11019                     PL_oldoldbufptr = oldoldbufptr_save;
11020                     s = d;
11021                     break;
11022                 }
11023                 else {
11024                     sv_catsv(tmpstr,PL_linestr);
11025                 }
11026             }
11027         } /* while (1) */
11028     }
11029
11030     PL_multi_end = origline + PL_parser->herelines;
11031
11032     if (indented && indent) {
11033         STRLEN linecount = 1;
11034         STRLEN herelen = SvCUR(tmpstr);
11035         char *ss = SvPVX(tmpstr);
11036         char *se = ss + herelen;
11037         SV *newstr = newSV(herelen+1);
11038         SvPOK_on(newstr);
11039
11040         /* Trim leading whitespace */
11041         while (ss < se) {
11042             /* newline only? Copy and move on */
11043             if (*ss == '\n') {
11044                 sv_catpvs(newstr,"\n");
11045                 ss++;
11046                 linecount++;
11047
11048             /* Found our indentation? Strip it */
11049             }
11050             else if (se - ss >= indent_len
11051                        && memEQ(ss, indent, indent_len))
11052             {
11053                 STRLEN le = 0;
11054                 ss += indent_len;
11055
11056                 while ((ss + le) < se && *(ss + le) != '\n')
11057                     le++;
11058
11059                 sv_catpvn(newstr, ss, le);
11060                 ss += le;
11061
11062             /* Line doesn't begin with our indentation? Croak */
11063             }
11064             else {
11065                 Safefree(indent);
11066                 Perl_croak(aTHX_
11067                     "Indentation on line %d of here-doc doesn't match delimiter",
11068                     (int)linecount
11069                 );
11070             }
11071         } /* while */
11072
11073         /* avoid sv_setsv() as we dont wan't to COW here */
11074         sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
11075         Safefree(indent);
11076         SvREFCNT_dec_NN(newstr);
11077     }
11078
11079     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11080         SvPV_shrink_to_cur(tmpstr);
11081     }
11082
11083     if (!IN_BYTES) {
11084         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11085             SvUTF8_on(tmpstr);
11086     }
11087
11088     PL_lex_stuff = tmpstr;
11089     pl_yylval.ival = op_type;
11090     return s;
11091
11092   interminable:
11093     if (indent)
11094         Safefree(indent);
11095     SvREFCNT_dec(tmpstr);
11096     CopLINE_set(PL_curcop, origline);
11097     missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
11098 }
11099
11100
11101 /* scan_inputsymbol
11102    takes: position of first '<' in input buffer
11103    returns: position of first char following the matching '>' in
11104             input buffer
11105    side-effects: pl_yylval and lex_op are set.
11106
11107    This code handles:
11108
11109    <>           read from ARGV
11110    <<>>         read from ARGV without magic open
11111    <FH>         read from filehandle
11112    <pkg::FH>    read from package qualified filehandle
11113    <pkg'FH>     read from package qualified filehandle
11114    <$fh>        read from filehandle in $fh
11115    <*.h>        filename glob
11116
11117 */
11118
11119 STATIC char *
11120 S_scan_inputsymbol(pTHX_ char *start)
11121 {
11122     char *s = start;            /* current position in buffer */
11123     char *end;
11124     I32 len;
11125     bool nomagicopen = FALSE;
11126     char *d = PL_tokenbuf;                                      /* start of temp holding space */
11127     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
11128
11129     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
11130
11131     end = (char *) memchr(s, '\n', PL_bufend - s);
11132     if (!end)
11133         end = PL_bufend;
11134     if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
11135         nomagicopen = TRUE;
11136         *d = '\0';
11137         len = 0;
11138         s += 3;
11139     }
11140     else
11141         s = delimcpy(d, e, s + 1, end, '>', &len);      /* extract until > */
11142
11143     /* die if we didn't have space for the contents of the <>,
11144        or if it didn't end, or if we see a newline
11145     */
11146
11147     if (len >= (I32)sizeof PL_tokenbuf)
11148         Perl_croak(aTHX_ "Excessively long <> operator");
11149     if (s >= end)
11150         Perl_croak(aTHX_ "Unterminated <> operator");
11151
11152     s++;
11153
11154     /* check for <$fh>
11155        Remember, only scalar variables are interpreted as filehandles by
11156        this code.  Anything more complex (e.g., <$fh{$num}>) will be
11157        treated as a glob() call.
11158        This code makes use of the fact that except for the $ at the front,
11159        a scalar variable and a filehandle look the same.
11160     */
11161     if (*d == '$' && d[1]) d++;
11162
11163     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11164     while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
11165         d += UTF ? UTF8SKIP(d) : 1;
11166     }
11167
11168     /* If we've tried to read what we allow filehandles to look like, and
11169        there's still text left, then it must be a glob() and not a getline.
11170        Use scan_str to pull out the stuff between the <> and treat it
11171        as nothing more than a string.
11172     */
11173
11174     if (d - PL_tokenbuf != len) {
11175         pl_yylval.ival = OP_GLOB;
11176         s = scan_str(start,FALSE,FALSE,FALSE,NULL);
11177         if (!s)
11178            Perl_croak(aTHX_ "Glob not terminated");
11179         return s;
11180     }
11181     else {
11182         bool readline_overriden = FALSE;
11183         GV *gv_readline;
11184         /* we're in a filehandle read situation */
11185         d = PL_tokenbuf;
11186
11187         /* turn <> into <ARGV> */
11188         if (!len)
11189             Copy("ARGV",d,5,char);
11190
11191         /* Check whether readline() is overriden */
11192         if ((gv_readline = gv_override("readline",8)))
11193             readline_overriden = TRUE;
11194
11195         /* if <$fh>, create the ops to turn the variable into a
11196            filehandle
11197         */
11198         if (*d == '$') {
11199             /* try to find it in the pad for this block, otherwise find
11200                add symbol table ops
11201             */
11202             const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
11203             if (tmp != NOT_IN_PAD) {
11204                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11205                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11206                     HEK * const stashname = HvNAME_HEK(stash);
11207                     SV * const sym = sv_2mortal(newSVhek(stashname));
11208                     sv_catpvs(sym, "::");
11209                     sv_catpv(sym, d+1);
11210                     d = SvPVX(sym);
11211                     goto intro_sym;
11212                 }
11213                 else {
11214                     OP * const o = newOP(OP_PADSV, 0);
11215                     o->op_targ = tmp;
11216                     PL_lex_op = readline_overriden
11217                         ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11218                                 op_append_elem(OP_LIST, o,
11219                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11220                         : newUNOP(OP_READLINE, 0, o);
11221                 }
11222             }
11223             else {
11224                 GV *gv;
11225                 ++d;
11226               intro_sym:
11227                 gv = gv_fetchpv(d,
11228                                 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
11229                                 SVt_PV);
11230                 PL_lex_op = readline_overriden
11231                     ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11232                             op_append_elem(OP_LIST,
11233                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11234                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11235                     : newUNOP(OP_READLINE, 0,
11236                             newUNOP(OP_RV2SV, 0,
11237                                 newGVOP(OP_GV, 0, gv)));
11238             }
11239             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11240             pl_yylval.ival = OP_NULL;
11241         }
11242
11243         /* If it's none of the above, it must be a literal filehandle
11244            (<Foo::BAR> or <FOO>) so build a simple readline OP */
11245         else {
11246             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
11247             PL_lex_op = readline_overriden
11248                 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11249                         op_append_elem(OP_LIST,
11250                             newGVOP(OP_GV, 0, gv),
11251                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11252                 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
11253             pl_yylval.ival = OP_NULL;
11254
11255             /* leave the token generation above to avoid confusing the parser */
11256             if (!FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) {
11257                 no_bareword_filehandle(d);
11258             }
11259         }
11260     }
11261
11262     return s;
11263 }
11264
11265
11266 /* scan_str
11267    takes:
11268         start                   position in buffer
11269         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
11270                                 only if they are of the open/close form
11271         keep_delims             preserve the delimiters around the string
11272         re_reparse              compiling a run-time /(?{})/:
11273                                    collapse // to /,  and skip encoding src
11274         delimp                  if non-null, this is set to the position of
11275                                 the closing delimiter, or just after it if
11276                                 the closing and opening delimiters differ
11277                                 (i.e., the opening delimiter of a substitu-
11278                                 tion replacement)
11279    returns: position to continue reading from buffer
11280    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11281         updates the read buffer.
11282
11283    This subroutine pulls a string out of the input.  It is called for:
11284         q               single quotes           q(literal text)
11285         '               single quotes           'literal text'
11286         qq              double quotes           qq(interpolate $here please)
11287         "               double quotes           "interpolate $here please"
11288         qx              backticks               qx(/bin/ls -l)
11289         `               backticks               `/bin/ls -l`
11290         qw              quote words             @EXPORT_OK = qw( func() $spam )
11291         m//             regexp match            m/this/
11292         s///            regexp substitute       s/this/that/
11293         tr///           string transliterate    tr/this/that/
11294         y///            string transliterate    y/this/that/
11295         ($*@)           sub prototypes          sub foo ($)
11296         (stuff)         sub attr parameters     sub foo : attr(stuff)
11297         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
11298
11299    In most of these cases (all but <>, patterns and transliterate)
11300    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
11301    calls scan_str().  s/// makes yylex() call scan_subst() which calls
11302    scan_str().  tr/// and y/// make yylex() call scan_trans() which
11303    calls scan_str().
11304
11305    It skips whitespace before the string starts, and treats the first
11306    character as the delimiter.  If the delimiter is one of ([{< then
11307    the corresponding "close" character )]}> is used as the closing
11308    delimiter.  It allows quoting of delimiters, and if the string has
11309    balanced delimiters ([{<>}]) it allows nesting.
11310
11311    On success, the SV with the resulting string is put into lex_stuff or,
11312    if that is already non-NULL, into lex_repl. The second case occurs only
11313    when parsing the RHS of the special constructs s/// and tr/// (y///).
11314    For convenience, the terminating delimiter character is stuffed into
11315    SvIVX of the SV.
11316 */
11317
11318 char *
11319 Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
11320                  char **delimp
11321     )
11322 {
11323     SV *sv;                     /* scalar value: string */
11324     const char *tmps;           /* temp string, used for delimiter matching */
11325     char *s = start;            /* current position in the buffer */
11326     char term;                  /* terminating character */
11327     char *to;                   /* current position in the sv's data */
11328     int brackets = 1;           /* bracket nesting level */
11329     bool d_is_utf8 = FALSE;     /* is there any utf8 content? */
11330     IV termcode;                /* terminating char. code */
11331     U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
11332     STRLEN termlen;             /* length of terminating string */
11333     line_t herelines;
11334
11335     /* The delimiters that have a mirror-image closing one */
11336     const char * opening_delims = "([{<";
11337     const char * closing_delims = ")]}>";
11338
11339     /* The only non-UTF character that isn't a stand alone grapheme is
11340      * white-space, hence can't be a delimiter. */
11341     const char * non_grapheme_msg = "Use of unassigned code point or"
11342                                     " non-standalone grapheme for a delimiter"
11343                                     " is not allowed";
11344     PERL_ARGS_ASSERT_SCAN_STR;
11345
11346     /* skip space before the delimiter */
11347     if (isSPACE(*s)) {
11348         s = skipspace(s);
11349     }
11350
11351     /* mark where we are, in case we need to report errors */
11352     CLINE;
11353
11354     /* after skipping whitespace, the next character is the terminator */
11355     term = *s;
11356     if (!UTF || UTF8_IS_INVARIANT(term)) {
11357         termcode = termstr[0] = term;
11358         termlen = 1;
11359     }
11360     else {
11361         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
11362         if (UTF && UNLIKELY(! is_grapheme((U8 *) start,
11363                                            (U8 *) s,
11364                                            (U8 *) PL_bufend,
11365                                                   termcode)))
11366         {
11367             yyerror(non_grapheme_msg);
11368         }
11369
11370         Copy(s, termstr, termlen, U8);
11371     }
11372
11373     /* mark where we are */
11374     PL_multi_start = CopLINE(PL_curcop);
11375     PL_multi_open = termcode;
11376     herelines = PL_parser->herelines;
11377
11378     /* If the delimiter has a mirror-image closing one, get it */
11379     if (term && (tmps = strchr(opening_delims, term))) {
11380         termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
11381     }
11382
11383     PL_multi_close = termcode;
11384
11385     if (PL_multi_open == PL_multi_close) {
11386         keep_bracketed_quoted = FALSE;
11387     }
11388
11389     /* create a new SV to hold the contents.  79 is the SV's initial length.
11390        What a random number. */
11391     sv = newSV_type(SVt_PVIV);
11392     SvGROW(sv, 80);
11393     SvIV_set(sv, termcode);
11394     (void)SvPOK_only(sv);               /* validate pointer */
11395
11396     /* move past delimiter and try to read a complete string */
11397     if (keep_delims)
11398         sv_catpvn(sv, s, termlen);
11399     s += termlen;
11400     for (;;) {
11401         /* extend sv if need be */
11402         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11403         /* set 'to' to the next character in the sv's string */
11404         to = SvPVX(sv)+SvCUR(sv);
11405
11406         /* if open delimiter is the close delimiter read unbridle */
11407         if (PL_multi_open == PL_multi_close) {
11408             for (; s < PL_bufend; s++,to++) {
11409                 /* embedded newlines increment the current line number */
11410                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11411                     COPLINE_INC_WITH_HERELINES;
11412                 /* handle quoted delimiters */
11413                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11414                     if (!keep_bracketed_quoted
11415                         && (s[1] == term
11416                             || (re_reparse && s[1] == '\\'))
11417                     )
11418                         s++;
11419                     else /* any other quotes are simply copied straight through */
11420                         *to++ = *s++;
11421                 }
11422                 /* terminate when run out of buffer (the for() condition), or
11423                    have found the terminator */
11424                 else if (*s == term) {  /* First byte of terminator matches */
11425                     if (termlen == 1)   /* If is the only byte, are done */
11426                         break;
11427
11428                     /* If the remainder of the terminator matches, also are
11429                      * done, after checking that is a separate grapheme */
11430                     if (   s + termlen <= PL_bufend
11431                         && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
11432                     {
11433                         if (   UTF
11434                             && UNLIKELY(! is_grapheme((U8 *) start,
11435                                                        (U8 *) s,
11436                                                        (U8 *) PL_bufend,
11437                                                               termcode)))
11438                         {
11439                             yyerror(non_grapheme_msg);
11440                         }
11441                         break;
11442                     }
11443                 }
11444                 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
11445                     d_is_utf8 = TRUE;
11446                 }
11447
11448                 *to = *s;
11449             }
11450         }
11451
11452         /* if the terminator isn't the same as the start character (e.g.,
11453            matched brackets), we have to allow more in the quoting, and
11454            be prepared for nested brackets.
11455         */
11456         else {
11457             /* read until we run out of string, or we find the terminator */
11458             for (; s < PL_bufend; s++,to++) {
11459                 /* embedded newlines increment the line count */
11460                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11461                     COPLINE_INC_WITH_HERELINES;
11462                 /* backslashes can escape the open or closing characters */
11463                 if (*s == '\\' && s+1 < PL_bufend) {
11464                     if (!keep_bracketed_quoted
11465                        && ( ((UV)s[1] == PL_multi_open)
11466                          || ((UV)s[1] == PL_multi_close) ))
11467                     {
11468                         s++;
11469                     }
11470                     else
11471                         *to++ = *s++;
11472                 }
11473                 /* allow nested opens and closes */
11474                 else if (*(U8 *) s == PL_multi_close && --brackets <= 0)
11475                     break;
11476                 else if (*(U8 *) s == PL_multi_open)
11477                     brackets++;
11478                 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11479                     d_is_utf8 = TRUE;
11480                 *to = *s;
11481             }
11482         }
11483         /* terminate the copied string and update the sv's end-of-string */
11484         *to = '\0';
11485         SvCUR_set(sv, to - SvPVX_const(sv));
11486
11487         /*
11488          * this next chunk reads more into the buffer if we're not done yet
11489          */
11490
11491         if (s < PL_bufend)
11492             break;              /* handle case where we are done yet :-) */
11493
11494 #ifndef PERL_STRICT_CR
11495         if (to - SvPVX_const(sv) >= 2) {
11496             if (   (to[-2] == '\r' && to[-1] == '\n')
11497                 || (to[-2] == '\n' && to[-1] == '\r'))
11498             {
11499                 to[-2] = '\n';
11500                 to--;
11501                 SvCUR_set(sv, to - SvPVX_const(sv));
11502             }
11503             else if (to[-1] == '\r')
11504                 to[-1] = '\n';
11505         }
11506         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11507             to[-1] = '\n';
11508 #endif
11509
11510         /* if we're out of file, or a read fails, bail and reset the current
11511            line marker so we can report where the unterminated string began
11512         */
11513         COPLINE_INC_WITH_HERELINES;
11514         PL_bufptr = PL_bufend;
11515         if (!lex_next_chunk(0)) {
11516             sv_free(sv);
11517             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11518             return NULL;
11519         }
11520         s = start = PL_bufptr;
11521     }
11522
11523     /* at this point, we have successfully read the delimited string */
11524
11525     if (keep_delims)
11526             sv_catpvn(sv, s, termlen);
11527     s += termlen;
11528
11529     if (d_is_utf8)
11530         SvUTF8_on(sv);
11531
11532     PL_multi_end = CopLINE(PL_curcop);
11533     CopLINE_set(PL_curcop, PL_multi_start);
11534     PL_parser->herelines = herelines;
11535
11536     /* if we allocated too much space, give some back */
11537     if (SvCUR(sv) + 5 < SvLEN(sv)) {
11538         SvLEN_set(sv, SvCUR(sv) + 1);
11539         SvPV_shrink_to_cur(sv);
11540     }
11541
11542     /* decide whether this is the first or second quoted string we've read
11543        for this op
11544     */
11545
11546     if (PL_lex_stuff)
11547         PL_parser->lex_sub_repl = sv;
11548     else
11549         PL_lex_stuff = sv;
11550     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
11551     return s;
11552 }
11553
11554 /*
11555   scan_num
11556   takes: pointer to position in buffer
11557   returns: pointer to new position in buffer
11558   side-effects: builds ops for the constant in pl_yylval.op
11559
11560   Read a number in any of the formats that Perl accepts:
11561
11562   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
11563   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
11564   0b[01](_?[01])*                                       binary integers
11565   0o?[0-7](_?[0-7])*                                    octal integers
11566   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
11567   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
11568
11569   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11570   thing it reads.
11571
11572   If it reads a number without a decimal point or an exponent, it will
11573   try converting the number to an integer and see if it can do so
11574   without loss of precision.
11575 */
11576
11577 char *
11578 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11579 {
11580     const char *s = start;      /* current position in buffer */
11581     char *d;                    /* destination in temp buffer */
11582     char *e;                    /* end of temp buffer */
11583     NV nv;                              /* number read, as a double */
11584     SV *sv = NULL;                      /* place to put the converted number */
11585     bool floatit;                       /* boolean: int or float? */
11586     const char *lastub = NULL;          /* position of last underbar */
11587     static const char* const number_too_long = "Number too long";
11588     bool warned_about_underscore = 0;
11589     I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
11590 #define WARN_ABOUT_UNDERSCORE() \
11591         do { \
11592             if (!warned_about_underscore) { \
11593                 warned_about_underscore = 1; \
11594                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
11595                                "Misplaced _ in number"); \
11596             } \
11597         } while(0)
11598     /* Hexadecimal floating point.
11599      *
11600      * In many places (where we have quads and NV is IEEE 754 double)
11601      * we can fit the mantissa bits of a NV into an unsigned quad.
11602      * (Note that UVs might not be quads even when we have quads.)
11603      * This will not work everywhere, though (either no quads, or
11604      * using long doubles), in which case we have to resort to NV,
11605      * which will probably mean horrible loss of precision due to
11606      * multiple fp operations. */
11607     bool hexfp = FALSE;
11608     int total_bits = 0;
11609     int significant_bits = 0;
11610 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
11611 #  define HEXFP_UQUAD
11612     Uquad_t hexfp_uquad = 0;
11613     int hexfp_frac_bits = 0;
11614 #else
11615 #  define HEXFP_NV
11616     NV hexfp_nv = 0.0;
11617 #endif
11618     NV hexfp_mult = 1.0;
11619     UV high_non_zero = 0; /* highest digit */
11620     int non_zero_integer_digits = 0;
11621     bool new_octal = FALSE;     /* octal with "0o" prefix */
11622
11623     PERL_ARGS_ASSERT_SCAN_NUM;
11624
11625     /* We use the first character to decide what type of number this is */
11626
11627     switch (*s) {
11628     default:
11629         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11630
11631     /* if it starts with a 0, it could be an octal number, a decimal in
11632        0.13 disguise, or a hexadecimal number, or a binary number. */
11633     case '0':
11634         {
11635           /* variables:
11636              u          holds the "number so far"
11637              overflowed was the number more than we can hold?
11638
11639              Shift is used when we add a digit.  It also serves as an "are
11640              we in octal/hex/binary?" indicator to disallow hex characters
11641              when in octal mode.
11642            */
11643             NV n = 0.0;
11644             UV u = 0;
11645             bool overflowed = FALSE;
11646             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
11647             bool has_digs = FALSE;
11648             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11649             static const char* const bases[5] =
11650               { "", "binary", "", "octal", "hexadecimal" };
11651             static const char* const Bases[5] =
11652               { "", "Binary", "", "Octal", "Hexadecimal" };
11653             static const char* const maxima[5] =
11654               { "",
11655                 "0b11111111111111111111111111111111",
11656                 "",
11657                 "037777777777",
11658                 "0xffffffff" };
11659
11660             /* check for hex */
11661             if (isALPHA_FOLD_EQ(s[1], 'x')) {
11662                 shift = 4;
11663                 s += 2;
11664                 just_zero = FALSE;
11665             } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
11666                 shift = 1;
11667                 s += 2;
11668                 just_zero = FALSE;
11669             }
11670             /* check for a decimal in disguise */
11671             else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
11672                 goto decimal;
11673             /* so it must be octal */
11674             else {
11675                 shift = 3;
11676                 s++;
11677                 if (isALPHA_FOLD_EQ(*s, 'o')) {
11678                     s++;
11679                     just_zero = FALSE;
11680                     new_octal = TRUE;
11681                 }
11682             }
11683
11684             if (*s == '_') {
11685                 WARN_ABOUT_UNDERSCORE();
11686                lastub = s++;
11687             }
11688
11689             /* read the rest of the number */
11690             for (;;) {
11691                 /* x is used in the overflow test,
11692                    b is the digit we're adding on. */
11693                 UV x, b;
11694
11695                 switch (*s) {
11696
11697                 /* if we don't mention it, we're done */
11698                 default:
11699                     goto out;
11700
11701                 /* _ are ignored -- but warned about if consecutive */
11702                 case '_':
11703                     if (lastub && s == lastub + 1)
11704                         WARN_ABOUT_UNDERSCORE();
11705                     lastub = s++;
11706                     break;
11707
11708                 /* 8 and 9 are not octal */
11709                 case '8': case '9':
11710                     if (shift == 3)
11711                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11712                     /* FALLTHROUGH */
11713
11714                 /* octal digits */
11715                 case '2': case '3': case '4':
11716                 case '5': case '6': case '7':
11717                     if (shift == 1)
11718                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11719                     /* FALLTHROUGH */
11720
11721                 case '0': case '1':
11722                     b = *s++ & 15;              /* ASCII digit -> value of digit */
11723                     goto digit;
11724
11725                 /* hex digits */
11726                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11727                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11728                     /* make sure they said 0x */
11729                     if (shift != 4)
11730                         goto out;
11731                     b = (*s++ & 7) + 9;
11732
11733                     /* Prepare to put the digit we have onto the end
11734                        of the number so far.  We check for overflows.
11735                     */
11736
11737                   digit:
11738                     just_zero = FALSE;
11739                     has_digs = TRUE;
11740                     if (!overflowed) {
11741                         assert(shift >= 0);
11742                         x = u << shift; /* make room for the digit */
11743
11744                         total_bits += shift;
11745
11746                         if ((x >> shift) != u
11747                             && !(PL_hints & HINT_NEW_BINARY)) {
11748                             overflowed = TRUE;
11749                             n = (NV) u;
11750                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11751                                              "Integer overflow in %s number",
11752                                              bases[shift]);
11753                         } else
11754                             u = x | b;          /* add the digit to the end */
11755                     }
11756                     if (overflowed) {
11757                         n *= nvshift[shift];
11758                         /* If an NV has not enough bits in its
11759                          * mantissa to represent an UV this summing of
11760                          * small low-order numbers is a waste of time
11761                          * (because the NV cannot preserve the
11762                          * low-order bits anyway): we could just
11763                          * remember when did we overflow and in the
11764                          * end just multiply n by the right
11765                          * amount. */
11766                         n += (NV) b;
11767                     }
11768
11769                     if (high_non_zero == 0 && b > 0)
11770                         high_non_zero = b;
11771
11772                     if (high_non_zero)
11773                         non_zero_integer_digits++;
11774
11775                     /* this could be hexfp, but peek ahead
11776                      * to avoid matching ".." */
11777                     if (UNLIKELY(HEXFP_PEEK(s))) {
11778                         goto out;
11779                     }
11780
11781                     break;
11782                 }
11783             }
11784
11785           /* if we get here, we had success: make a scalar value from
11786              the number.
11787           */
11788           out:
11789
11790             /* final misplaced underbar check */
11791             if (s[-1] == '_')
11792                 WARN_ABOUT_UNDERSCORE();
11793
11794             if (UNLIKELY(HEXFP_PEEK(s))) {
11795                 /* Do sloppy (on the underbars) but quick detection
11796                  * (and value construction) for hexfp, the decimal
11797                  * detection will shortly be more thorough with the
11798                  * underbar checks. */
11799                 const char* h = s;
11800                 significant_bits = non_zero_integer_digits * shift;
11801 #ifdef HEXFP_UQUAD
11802                 hexfp_uquad = u;
11803 #else /* HEXFP_NV */
11804                 hexfp_nv = u;
11805 #endif
11806                 /* Ignore the leading zero bits of
11807                  * the high (first) non-zero digit. */
11808                 if (high_non_zero) {
11809                     if (high_non_zero < 0x8)
11810                         significant_bits--;
11811                     if (high_non_zero < 0x4)
11812                         significant_bits--;
11813                     if (high_non_zero < 0x2)
11814                         significant_bits--;
11815                 }
11816
11817                 if (*h == '.') {
11818 #ifdef HEXFP_NV
11819                     NV nv_mult = 1.0;
11820 #endif
11821                     bool accumulate = TRUE;
11822                     U8 b;
11823                     int lim = 1 << shift;
11824                     for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
11825                                *h == '_'); h++) {
11826                         if (isXDIGIT(*h)) {
11827                             significant_bits += shift;
11828 #ifdef HEXFP_UQUAD
11829                             if (accumulate) {
11830                                 if (significant_bits < NV_MANT_DIG) {
11831                                     /* We are in the long "run" of xdigits,
11832                                      * accumulate the full four bits. */
11833                                     assert(shift >= 0);
11834                                     hexfp_uquad <<= shift;
11835                                     hexfp_uquad |= b;
11836                                     hexfp_frac_bits += shift;
11837                                 } else if (significant_bits - shift < NV_MANT_DIG) {
11838                                     /* We are at a hexdigit either at,
11839                                      * or straddling, the edge of mantissa.
11840                                      * We will try grabbing as many as
11841                                      * possible bits. */
11842                                     int tail =
11843                                       significant_bits - NV_MANT_DIG;
11844                                     if (tail <= 0)
11845                                        tail += shift;
11846                                     assert(tail >= 0);
11847                                     hexfp_uquad <<= tail;
11848                                     assert((shift - tail) >= 0);
11849                                     hexfp_uquad |= b >> (shift - tail);
11850                                     hexfp_frac_bits += tail;
11851
11852                                     /* Ignore the trailing zero bits
11853                                      * of the last non-zero xdigit.
11854                                      *
11855                                      * The assumption here is that if
11856                                      * one has input of e.g. the xdigit
11857                                      * eight (0x8), there is only one
11858                                      * bit being input, not the full
11859                                      * four bits.  Conversely, if one
11860                                      * specifies a zero xdigit, the
11861                                      * assumption is that one really
11862                                      * wants all those bits to be zero. */
11863                                     if (b) {
11864                                         if ((b & 0x1) == 0x0) {
11865                                             significant_bits--;
11866                                             if ((b & 0x2) == 0x0) {
11867                                                 significant_bits--;
11868                                                 if ((b & 0x4) == 0x0) {
11869                                                     significant_bits--;
11870                                                 }
11871                                             }
11872                                         }
11873                                     }
11874
11875                                     accumulate = FALSE;
11876                                 }
11877                             } else {
11878                                 /* Keep skipping the xdigits, and
11879                                  * accumulating the significant bits,
11880                                  * but do not shift the uquad
11881                                  * (which would catastrophically drop
11882                                  * high-order bits) or accumulate the
11883                                  * xdigits anymore. */
11884                             }
11885 #else /* HEXFP_NV */
11886                             if (accumulate) {
11887                                 nv_mult /= nvshift[shift];
11888                                 if (nv_mult > 0.0)
11889                                     hexfp_nv += b * nv_mult;
11890                                 else
11891                                     accumulate = FALSE;
11892                             }
11893 #endif
11894                         }
11895                         if (significant_bits >= NV_MANT_DIG)
11896                             accumulate = FALSE;
11897                     }
11898                 }
11899
11900                 if ((total_bits > 0 || significant_bits > 0) &&
11901                     isALPHA_FOLD_EQ(*h, 'p')) {
11902                     bool negexp = FALSE;
11903                     h++;
11904                     if (*h == '+')
11905                         h++;
11906                     else if (*h == '-') {
11907                         negexp = TRUE;
11908                         h++;
11909                     }
11910                     if (isDIGIT(*h)) {
11911                         I32 hexfp_exp = 0;
11912                         while (isDIGIT(*h) || *h == '_') {
11913                             if (isDIGIT(*h)) {
11914                                 hexfp_exp *= 10;
11915                                 hexfp_exp += *h - '0';
11916 #ifdef NV_MIN_EXP
11917                                 if (negexp
11918                                     && -hexfp_exp < NV_MIN_EXP - 1) {
11919                                     /* NOTE: this means that the exponent
11920                                      * underflow warning happens for
11921                                      * the IEEE 754 subnormals (denormals),
11922                                      * because DBL_MIN_EXP etc are the lowest
11923                                      * possible binary (or, rather, DBL_RADIX-base)
11924                                      * exponent for normals, not subnormals.
11925                                      *
11926                                      * This may or may not be a good thing. */
11927                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11928                                                    "Hexadecimal float: exponent underflow");
11929                                     break;
11930                                 }
11931 #endif
11932 #ifdef NV_MAX_EXP
11933                                 if (!negexp
11934                                     && hexfp_exp > NV_MAX_EXP - 1) {
11935                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11936                                                    "Hexadecimal float: exponent overflow");
11937                                     break;
11938                                 }
11939 #endif
11940                             }
11941                             h++;
11942                         }
11943                         if (negexp)
11944                             hexfp_exp = -hexfp_exp;
11945 #ifdef HEXFP_UQUAD
11946                         hexfp_exp -= hexfp_frac_bits;
11947 #endif
11948                         hexfp_mult = Perl_pow(2.0, hexfp_exp);
11949                         hexfp = TRUE;
11950                         goto decimal;
11951                     }
11952                 }
11953             }
11954
11955             if (!just_zero && !has_digs) {
11956                 /* 0x, 0o or 0b with no digits, treat it as an error.
11957                    Originally this backed up the parse before the b or
11958                    x, but that has the potential for silent changes in
11959                    behaviour, like for: "0x.3" and "0x+$foo".
11960                 */
11961                 const char *d = s;
11962                 char *oldbp = PL_bufptr;
11963                 if (*d) ++d; /* so the user sees the bad non-digit */
11964                 PL_bufptr = (char *)d; /* so yyerror reports the context */
11965                 yyerror(Perl_form(aTHX_ "No digits found for %s literal",
11966                                   bases[shift]));
11967                 PL_bufptr = oldbp;
11968             }
11969
11970             if (overflowed) {
11971                 if (n > 4294967295.0)
11972                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11973                                    "%s number > %s non-portable",
11974                                    Bases[shift],
11975                                    new_octal ? "0o37777777777" : maxima[shift]);
11976                 sv = newSVnv(n);
11977             }
11978             else {
11979 #if UVSIZE > 4
11980                 if (u > 0xffffffff)
11981                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11982                                    "%s number > %s non-portable",
11983                                    Bases[shift],
11984                                    new_octal ? "0o37777777777" : maxima[shift]);
11985 #endif
11986                 sv = newSVuv(u);
11987             }
11988             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11989                 sv = new_constant(start, s - start, "integer",
11990                                   sv, NULL, NULL, 0, NULL);
11991             else if (PL_hints & HINT_NEW_BINARY)
11992                 sv = new_constant(start, s - start, "binary",
11993                                   sv, NULL, NULL, 0, NULL);
11994         }
11995         break;
11996
11997     /*
11998       handle decimal numbers.
11999       we're also sent here when we read a 0 as the first digit
12000     */
12001     case '1': case '2': case '3': case '4': case '5':
12002     case '6': case '7': case '8': case '9': case '.':
12003       decimal:
12004         d = PL_tokenbuf;
12005         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12006         floatit = FALSE;
12007         if (hexfp) {
12008             floatit = TRUE;
12009             *d++ = '0';
12010             switch (shift) {
12011             case 4:
12012                 *d++ = 'x';
12013                 s = start + 2;
12014                 break;
12015             case 3:
12016                 if (new_octal) {
12017                     *d++ = 'o';
12018                     s = start + 2;
12019                     break;
12020                 }
12021                 s = start + 1;
12022                 break;
12023             case 1:
12024                 *d++ = 'b';
12025                 s = start + 2;
12026                 break;
12027             default:
12028                 NOT_REACHED; /* NOTREACHED */
12029             }
12030         }
12031
12032         /* read next group of digits and _ and copy into d */
12033         while (isDIGIT(*s)
12034                || *s == '_'
12035                || UNLIKELY(hexfp && isXDIGIT(*s)))
12036         {
12037             /* skip underscores, checking for misplaced ones
12038                if -w is on
12039             */
12040             if (*s == '_') {
12041                 if (lastub && s == lastub + 1)
12042                     WARN_ABOUT_UNDERSCORE();
12043                 lastub = s++;
12044             }
12045             else {
12046                 /* check for end of fixed-length buffer */
12047                 if (d >= e)
12048                     Perl_croak(aTHX_ "%s", number_too_long);
12049                 /* if we're ok, copy the character */
12050                 *d++ = *s++;
12051             }
12052         }
12053
12054         /* final misplaced underbar check */
12055         if (lastub && s == lastub + 1)
12056             WARN_ABOUT_UNDERSCORE();
12057
12058         /* read a decimal portion if there is one.  avoid
12059            3..5 being interpreted as the number 3. followed
12060            by .5
12061         */
12062         if (*s == '.' && s[1] != '.') {
12063             floatit = TRUE;
12064             *d++ = *s++;
12065
12066             if (*s == '_') {
12067                 WARN_ABOUT_UNDERSCORE();
12068                 lastub = s;
12069             }
12070
12071             /* copy, ignoring underbars, until we run out of digits.
12072             */
12073             for (; isDIGIT(*s)
12074                    || *s == '_'
12075                    || UNLIKELY(hexfp && isXDIGIT(*s));
12076                  s++)
12077             {
12078                 /* fixed length buffer check */
12079                 if (d >= e)
12080                     Perl_croak(aTHX_ "%s", number_too_long);
12081                 if (*s == '_') {
12082                    if (lastub && s == lastub + 1)
12083                         WARN_ABOUT_UNDERSCORE();
12084                    lastub = s;
12085                 }
12086                 else
12087                     *d++ = *s;
12088             }
12089             /* fractional part ending in underbar? */
12090             if (s[-1] == '_')
12091                 WARN_ABOUT_UNDERSCORE();
12092             if (*s == '.' && isDIGIT(s[1])) {
12093                 /* oops, it's really a v-string, but without the "v" */
12094                 s = start;
12095                 goto vstring;
12096             }
12097         }
12098
12099         /* read exponent part, if present */
12100         if ((isALPHA_FOLD_EQ(*s, 'e')
12101               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
12102             && memCHRs("+-0123456789_", s[1]))
12103         {
12104             int exp_digits = 0;
12105             const char *save_s = s;
12106             char * save_d = d;
12107
12108             /* regardless of whether user said 3E5 or 3e5, use lower 'e',
12109                ditto for p (hexfloats) */
12110             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
12111                 /* At least some Mach atof()s don't grok 'E' */
12112                 *d++ = 'e';
12113             }
12114             else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
12115                 *d++ = 'p';
12116             }
12117
12118             s++;
12119
12120
12121             /* stray preinitial _ */
12122             if (*s == '_') {
12123                 WARN_ABOUT_UNDERSCORE();
12124                 lastub = s++;
12125             }
12126
12127             /* allow positive or negative exponent */
12128             if (*s == '+' || *s == '-')
12129                 *d++ = *s++;
12130
12131             /* stray initial _ */
12132             if (*s == '_') {
12133                 WARN_ABOUT_UNDERSCORE();
12134                 lastub = s++;
12135             }
12136
12137             /* read digits of exponent */
12138             while (isDIGIT(*s) || *s == '_') {
12139                 if (isDIGIT(*s)) {
12140                     ++exp_digits;
12141                     if (d >= e)
12142                         Perl_croak(aTHX_ "%s", number_too_long);
12143                     *d++ = *s++;
12144                 }
12145                 else {
12146                    if (((lastub && s == lastub + 1)
12147                         || (!isDIGIT(s[1]) && s[1] != '_')))
12148                         WARN_ABOUT_UNDERSCORE();
12149                    lastub = s++;
12150                 }
12151             }
12152
12153             if (!exp_digits) {
12154                 /* no exponent digits, the [eEpP] could be for something else,
12155                  * though in practice we don't get here for p since that's preparsed
12156                  * earlier, and results in only the 0xX being consumed, so behave similarly
12157                  * for decimal floats and consume only the D.DD, leaving the [eE] to the
12158                  * next token.
12159                  */
12160                 s = save_s;
12161                 d = save_d;
12162             }
12163             else {
12164                 floatit = TRUE;
12165             }
12166         }
12167
12168
12169         /*
12170            We try to do an integer conversion first if no characters
12171            indicating "float" have been found.
12172          */
12173
12174         if (!floatit) {
12175             UV uv;
12176             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12177
12178             if (flags == IS_NUMBER_IN_UV) {
12179               if (uv <= IV_MAX)
12180                 sv = newSViv(uv); /* Prefer IVs over UVs. */
12181               else
12182                 sv = newSVuv(uv);
12183             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12184               if (uv <= (UV) IV_MIN)
12185                 sv = newSViv(-(IV)uv);
12186               else
12187                 floatit = TRUE;
12188             } else
12189               floatit = TRUE;
12190         }
12191         if (floatit) {
12192             /* terminate the string */
12193             *d = '\0';
12194             if (UNLIKELY(hexfp)) {
12195 #  ifdef NV_MANT_DIG
12196                 if (significant_bits > NV_MANT_DIG)
12197                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12198                                    "Hexadecimal float: mantissa overflow");
12199 #  endif
12200 #ifdef HEXFP_UQUAD
12201                 nv = hexfp_uquad * hexfp_mult;
12202 #else /* HEXFP_NV */
12203                 nv = hexfp_nv * hexfp_mult;
12204 #endif
12205             } else {
12206                 nv = Atof(PL_tokenbuf);
12207             }
12208             sv = newSVnv(nv);
12209         }
12210
12211         if ( floatit
12212              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12213             const char *const key = floatit ? "float" : "integer";
12214             const STRLEN keylen = floatit ? 5 : 7;
12215             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12216                                 key, keylen, sv, NULL, NULL, 0, NULL);
12217         }
12218         break;
12219
12220     /* if it starts with a v, it could be a v-string */
12221     case 'v':
12222     vstring:
12223                 sv = newSV(5); /* preallocate storage space */
12224                 ENTER_with_name("scan_vstring");
12225                 SAVEFREESV(sv);
12226                 s = scan_vstring(s, PL_bufend, sv);
12227                 SvREFCNT_inc_simple_void_NN(sv);
12228                 LEAVE_with_name("scan_vstring");
12229         break;
12230     }
12231
12232     /* make the op for the constant and return */
12233
12234     if (sv)
12235         lvalp->opval = newSVOP(OP_CONST, 0, sv);
12236     else
12237         lvalp->opval = NULL;
12238
12239     return (char *)s;
12240 }
12241
12242 STATIC char *
12243 S_scan_formline(pTHX_ char *s)
12244 {
12245     SV * const stuff = newSVpvs("");
12246     bool needargs = FALSE;
12247     bool eofmt = FALSE;
12248
12249     PERL_ARGS_ASSERT_SCAN_FORMLINE;
12250
12251     while (!needargs) {
12252         char *eol;
12253         if (*s == '.') {
12254             char *t = s+1;
12255 #ifdef PERL_STRICT_CR
12256             while (SPACE_OR_TAB(*t))
12257                 t++;
12258 #else
12259             while (SPACE_OR_TAB(*t) || *t == '\r')
12260                 t++;
12261 #endif
12262             if (*t == '\n' || t == PL_bufend) {
12263                 eofmt = TRUE;
12264                 break;
12265             }
12266         }
12267         eol = (char *) memchr(s,'\n',PL_bufend-s);
12268         if (!eol++)
12269                 eol = PL_bufend;
12270         if (*s != '#') {
12271             char *t;
12272             for (t = s; t < eol; t++) {
12273                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12274                     needargs = FALSE;
12275                     goto enough;        /* ~~ must be first line in formline */
12276                 }
12277                 if (*t == '@' || *t == '^')
12278                     needargs = TRUE;
12279             }
12280             if (eol > s) {
12281                 sv_catpvn(stuff, s, eol-s);
12282 #ifndef PERL_STRICT_CR
12283                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12284                     char *end = SvPVX(stuff) + SvCUR(stuff);
12285                     end[-2] = '\n';
12286                     end[-1] = '\0';
12287                     SvCUR_set(stuff, SvCUR(stuff) - 1);
12288                 }
12289 #endif
12290             }
12291             else
12292               break;
12293         }
12294         s = (char*)eol;
12295         if ((PL_rsfp || PL_parser->filtered)
12296          && PL_parser->form_lex_state == LEX_NORMAL) {
12297             bool got_some;
12298             PL_bufptr = PL_bufend;
12299             COPLINE_INC_WITH_HERELINES;
12300             got_some = lex_next_chunk(0);
12301             CopLINE_dec(PL_curcop);
12302             s = PL_bufptr;
12303             if (!got_some)
12304                 break;
12305         }
12306         incline(s, PL_bufend);
12307     }
12308   enough:
12309     if (!SvCUR(stuff) || needargs)
12310         PL_lex_state = PL_parser->form_lex_state;
12311     if (SvCUR(stuff)) {
12312         PL_expect = XSTATE;
12313         if (needargs) {
12314             const char *s2 = s;
12315             while (isSPACE(*s2) && *s2 != '\n')
12316                 s2++;
12317             if (*s2 == '{') {
12318                 PL_expect = XTERMBLOCK;
12319                 NEXTVAL_NEXTTOKE.ival = 0;
12320                 force_next(DO);
12321             }
12322             NEXTVAL_NEXTTOKE.ival = 0;
12323             force_next(FORMLBRACK);
12324         }
12325         if (!IN_BYTES) {
12326             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12327                 SvUTF8_on(stuff);
12328         }
12329         NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
12330         force_next(THING);
12331     }
12332     else {
12333         SvREFCNT_dec(stuff);
12334         if (eofmt)
12335             PL_lex_formbrack = 0;
12336     }
12337     return s;
12338 }
12339
12340 I32
12341 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12342 {
12343     const I32 oldsavestack_ix = PL_savestack_ix;
12344     CV* const outsidecv = PL_compcv;
12345
12346     SAVEI32(PL_subline);
12347     save_item(PL_subname);
12348     SAVESPTR(PL_compcv);
12349
12350     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12351     CvFLAGS(PL_compcv) |= flags;
12352
12353     PL_subline = CopLINE(PL_curcop);
12354     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12355     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12356     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12357     if (outsidecv && CvPADLIST(outsidecv))
12358         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
12359
12360     return oldsavestack_ix;
12361 }
12362
12363
12364 /* Do extra initialisation of a CV (typically one just created by
12365  * start_subparse()) if that CV is for a named sub
12366  */
12367
12368 void
12369 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
12370 {
12371     PERL_ARGS_ASSERT_INIT_NAMED_CV;
12372
12373     if (nameop->op_type == OP_CONST) {
12374         const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
12375         if (   strEQ(name, "BEGIN")
12376             || strEQ(name, "END")
12377             || strEQ(name, "INIT")
12378             || strEQ(name, "CHECK")
12379             || strEQ(name, "UNITCHECK")
12380         )
12381           CvSPECIAL_on(cv);
12382     }
12383     else
12384     /* State subs inside anonymous subs need to be
12385      clonable themselves. */
12386     if (   CvANON(CvOUTSIDE(cv))
12387         || CvCLONE(CvOUTSIDE(cv))
12388         || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
12389                         CvOUTSIDE(cv)
12390                      ))[nameop->op_targ])
12391     )
12392       CvCLONE_on(cv);
12393 }
12394
12395
12396 static int
12397 S_yywarn(pTHX_ const char *const s, U32 flags)
12398 {
12399     PERL_ARGS_ASSERT_YYWARN;
12400
12401     PL_in_eval |= EVAL_WARNONLY;
12402     yyerror_pv(s, flags);
12403     return 0;
12404 }
12405
12406 void
12407 Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
12408 {
12409     PERL_ARGS_ASSERT_ABORT_EXECUTION;
12410
12411     if (PL_minus_c)
12412         Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
12413     else {
12414         Perl_croak(aTHX_
12415                 "%sExecution of %s aborted due to compilation errors.\n", msg, name);
12416     }
12417     NOT_REACHED; /* NOTREACHED */
12418 }
12419
12420 void
12421 Perl_yyquit(pTHX)
12422 {
12423     /* Called, after at least one error has been found, to abort the parse now,
12424      * instead of trying to forge ahead */
12425
12426     yyerror_pvn(NULL, 0, 0);
12427 }
12428
12429 int
12430 Perl_yyerror(pTHX_ const char *const s)
12431 {
12432     PERL_ARGS_ASSERT_YYERROR;
12433     return yyerror_pvn(s, strlen(s), 0);
12434 }
12435
12436 int
12437 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
12438 {
12439     PERL_ARGS_ASSERT_YYERROR_PV;
12440     return yyerror_pvn(s, strlen(s), flags);
12441 }
12442
12443 int
12444 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
12445 {
12446     const char *context = NULL;
12447     int contlen = -1;
12448     SV *msg;
12449     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
12450     int yychar  = PL_parser->yychar;
12451
12452     /* Output error message 's' with length 'len'.  'flags' are SV flags that
12453      * apply.  If the number of errors found is large enough, it abandons
12454      * parsing.  If 's' is NULL, there is no message, and it abandons
12455      * processing unconditionally */
12456
12457     if (s != NULL) {
12458         if (!yychar || (yychar == PERLY_SEMICOLON && !PL_rsfp))
12459             sv_catpvs(where_sv, "at EOF");
12460         else if (   PL_oldoldbufptr
12461                  && PL_bufptr > PL_oldoldbufptr
12462                  && PL_bufptr - PL_oldoldbufptr < 200
12463                  && PL_oldoldbufptr != PL_oldbufptr
12464                  && PL_oldbufptr != PL_bufptr)
12465         {
12466             while (isSPACE(*PL_oldoldbufptr))
12467                 PL_oldoldbufptr++;
12468             context = PL_oldoldbufptr;
12469             contlen = PL_bufptr - PL_oldoldbufptr;
12470         }
12471         else if (  PL_oldbufptr
12472                 && PL_bufptr > PL_oldbufptr
12473                 && PL_bufptr - PL_oldbufptr < 200
12474                 && PL_oldbufptr != PL_bufptr)
12475         {
12476             while (isSPACE(*PL_oldbufptr))
12477                 PL_oldbufptr++;
12478             context = PL_oldbufptr;
12479             contlen = PL_bufptr - PL_oldbufptr;
12480         }
12481         else if (yychar > 255)
12482             sv_catpvs(where_sv, "next token ???");
12483         else if (yychar == YYEMPTY) {
12484             if (PL_lex_state == LEX_NORMAL)
12485                 sv_catpvs(where_sv, "at end of line");
12486             else if (PL_lex_inpat)
12487                 sv_catpvs(where_sv, "within pattern");
12488             else
12489                 sv_catpvs(where_sv, "within string");
12490         }
12491         else {
12492             sv_catpvs(where_sv, "next char ");
12493             if (yychar < 32)
12494                 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12495             else if (isPRINT_LC(yychar)) {
12496                 const char string = yychar;
12497                 sv_catpvn(where_sv, &string, 1);
12498             }
12499             else
12500                 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12501         }
12502         msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
12503         Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
12504             OutCopFILE(PL_curcop),
12505             (IV)(PL_parser->preambling == NOLINE
12506                    ? CopLINE(PL_curcop)
12507                    : PL_parser->preambling));
12508         if (context)
12509             Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
12510                                  UTF8fARG(UTF, contlen, context));
12511         else
12512             Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
12513         if (   PL_multi_start < PL_multi_end
12514             && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
12515         {
12516             Perl_sv_catpvf(aTHX_ msg,
12517             "  (Might be a runaway multi-line %c%c string starting on"
12518             " line %" IVdf ")\n",
12519                     (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12520             PL_multi_end = 0;
12521         }
12522         if (PL_in_eval & EVAL_WARNONLY) {
12523             PL_in_eval &= ~EVAL_WARNONLY;
12524             Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
12525         }
12526         else {
12527             qerror(msg);
12528         }
12529     }
12530     if (s == NULL || PL_error_count >= 10) {
12531         const char * msg = "";
12532         const char * const name = OutCopFILE(PL_curcop);
12533
12534         if (PL_in_eval) {
12535             SV * errsv = ERRSV;
12536             if (SvCUR(errsv)) {
12537                 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
12538             }
12539         }
12540
12541         if (s == NULL) {
12542             abort_execution(msg, name);
12543         }
12544         else {
12545             Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
12546         }
12547     }
12548     PL_in_my = 0;
12549     PL_in_my_stash = NULL;
12550     return 0;
12551 }
12552
12553 STATIC char*
12554 S_swallow_bom(pTHX_ U8 *s)
12555 {
12556     const STRLEN slen = SvCUR(PL_linestr);
12557
12558     PERL_ARGS_ASSERT_SWALLOW_BOM;
12559
12560     switch (s[0]) {
12561     case 0xFF:
12562         if (s[1] == 0xFE) {
12563             /* UTF-16 little-endian? (or UTF-32LE?) */
12564             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
12565                 /* diag_listed_as: Unsupported script encoding %s */
12566                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
12567 #ifndef PERL_NO_UTF16_FILTER
12568 #ifdef DEBUGGING
12569             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
12570 #endif
12571             s += 2;
12572             if (PL_bufend > (char*)s) {
12573                 s = add_utf16_textfilter(s, TRUE);
12574             }
12575 #else
12576             /* diag_listed_as: Unsupported script encoding %s */
12577             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12578 #endif
12579         }
12580         break;
12581     case 0xFE:
12582         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
12583 #ifndef PERL_NO_UTF16_FILTER
12584 #ifdef DEBUGGING
12585             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12586 #endif
12587             s += 2;
12588             if (PL_bufend > (char *)s) {
12589                 s = add_utf16_textfilter(s, FALSE);
12590             }
12591 #else
12592             /* diag_listed_as: Unsupported script encoding %s */
12593             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12594 #endif
12595         }
12596         break;
12597     case BOM_UTF8_FIRST_BYTE: {
12598         if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
12599 #ifdef DEBUGGING
12600             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12601 #endif
12602             s += sizeof(BOM_UTF8) - 1;                     /* UTF-8 */
12603         }
12604         break;
12605     }
12606     case 0:
12607         if (slen > 3) {
12608              if (s[1] == 0) {
12609                   if (s[2] == 0xFE && s[3] == 0xFF) {
12610                        /* UTF-32 big-endian */
12611                        /* diag_listed_as: Unsupported script encoding %s */
12612                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
12613                   }
12614              }
12615              else if (s[2] == 0 && s[3] != 0) {
12616                   /* Leading bytes
12617                    * 00 xx 00 xx
12618                    * are a good indicator of UTF-16BE. */
12619 #ifndef PERL_NO_UTF16_FILTER
12620 #ifdef DEBUGGING
12621                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12622 #endif
12623                   s = add_utf16_textfilter(s, FALSE);
12624 #else
12625                   /* diag_listed_as: Unsupported script encoding %s */
12626                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12627 #endif
12628              }
12629         }
12630         break;
12631
12632     default:
12633          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12634                   /* Leading bytes
12635                    * xx 00 xx 00
12636                    * are a good indicator of UTF-16LE. */
12637 #ifndef PERL_NO_UTF16_FILTER
12638 #ifdef DEBUGGING
12639               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12640 #endif
12641               s = add_utf16_textfilter(s, TRUE);
12642 #else
12643               /* diag_listed_as: Unsupported script encoding %s */
12644               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12645 #endif
12646          }
12647     }
12648     return (char*)s;
12649 }
12650
12651
12652 #ifndef PERL_NO_UTF16_FILTER
12653 static I32
12654 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12655 {
12656     SV *const filter = FILTER_DATA(idx);
12657     /* We re-use this each time round, throwing the contents away before we
12658        return.  */
12659     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
12660     SV *const utf8_buffer = filter;
12661     IV status = IoPAGE(filter);
12662     const bool reverse = cBOOL(IoLINES(filter));
12663     I32 retval;
12664
12665     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
12666
12667     /* As we're automatically added, at the lowest level, and hence only called
12668        from this file, we can be sure that we're not called in block mode. Hence
12669        don't bother writing code to deal with block mode.  */
12670     if (maxlen) {
12671         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
12672     }
12673     if (status < 0) {
12674         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
12675     }
12676     DEBUG_P(PerlIO_printf(Perl_debug_log,
12677                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12678                           FPTR2DPTR(void *, S_utf16_textfilter),
12679                           reverse ? 'l' : 'b', idx, maxlen, status,
12680                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12681
12682     while (1) {
12683         STRLEN chars;
12684         STRLEN have;
12685         Size_t newlen;
12686         U8 *end;
12687         /* First, look in our buffer of existing UTF-8 data:  */
12688         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
12689
12690         if (nl) {
12691             ++nl;
12692         } else if (status == 0) {
12693             /* EOF */
12694             IoPAGE(filter) = 0;
12695             nl = SvEND(utf8_buffer);
12696         }
12697         if (nl) {
12698             STRLEN got = nl - SvPVX(utf8_buffer);
12699             /* Did we have anything to append?  */
12700             retval = got != 0;
12701             sv_catpvn(sv, SvPVX(utf8_buffer), got);
12702             /* Everything else in this code works just fine if SVp_POK isn't
12703                set.  This, however, needs it, and we need it to work, else
12704                we loop infinitely because the buffer is never consumed.  */
12705             sv_chop(utf8_buffer, nl);
12706             break;
12707         }
12708
12709         /* OK, not a complete line there, so need to read some more UTF-16.
12710            Read an extra octect if the buffer currently has an odd number. */
12711         while (1) {
12712             if (status <= 0)
12713                 break;
12714             if (SvCUR(utf16_buffer) >= 2) {
12715                 /* Location of the high octet of the last complete code point.
12716                    Gosh, UTF-16 is a pain. All the benefits of variable length,
12717                    *coupled* with all the benefits of partial reads and
12718                    endianness.  */
12719                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
12720                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
12721
12722                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
12723                     break;
12724                 }
12725
12726                 /* We have the first half of a surrogate. Read more.  */
12727                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
12728             }
12729
12730             status = FILTER_READ(idx + 1, utf16_buffer,
12731                                  160 + (SvCUR(utf16_buffer) & 1));
12732             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
12733             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
12734             if (status < 0) {
12735                 /* Error */
12736                 IoPAGE(filter) = status;
12737                 return status;
12738             }
12739         }
12740
12741         /* 'chars' isn't quite the right name, as code points above 0xFFFF
12742          * require 4 bytes per char */
12743         chars = SvCUR(utf16_buffer) >> 1;
12744         have = SvCUR(utf8_buffer);
12745
12746         /* Assume the worst case size as noted by the functions: twice the
12747          * number of input bytes */
12748         SvGROW(utf8_buffer, have + chars * 4 + 1);
12749
12750         if (reverse) {
12751             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
12752                                          (U8*)SvPVX_const(utf8_buffer) + have,
12753                                          chars * 2, &newlen);
12754         } else {
12755             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
12756                                 (U8*)SvPVX_const(utf8_buffer) + have,
12757                                 chars * 2, &newlen);
12758         }
12759         SvCUR_set(utf8_buffer, have + newlen);
12760         *end = '\0';
12761
12762         /* No need to keep this SV "well-formed" with a '\0' after the end, as
12763            it's private to us, and utf16_to_utf8{,reversed} take a
12764            (pointer,length) pair, rather than a NUL-terminated string.  */
12765         if(SvCUR(utf16_buffer) & 1) {
12766             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
12767             SvCUR_set(utf16_buffer, 1);
12768         } else {
12769             SvCUR_set(utf16_buffer, 0);
12770         }
12771     }
12772     DEBUG_P(PerlIO_printf(Perl_debug_log,
12773                           "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12774                           status,
12775                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12776     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
12777     return retval;
12778 }
12779
12780 static U8 *
12781 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
12782 {
12783     SV *filter = filter_add(S_utf16_textfilter, NULL);
12784
12785     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
12786
12787     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
12788     SvPVCLEAR(filter);
12789     IoLINES(filter) = reversed;
12790     IoPAGE(filter) = 1; /* Not EOF */
12791
12792     /* Sadly, we have to return a valid pointer, come what may, so we have to
12793        ignore any error return from this.  */
12794     SvCUR_set(PL_linestr, 0);
12795     if (FILTER_READ(0, PL_linestr, 0)) {
12796         SvUTF8_on(PL_linestr);
12797     } else {
12798         SvUTF8_on(PL_linestr);
12799     }
12800     PL_bufend = SvEND(PL_linestr);
12801     return (U8*)SvPVX(PL_linestr);
12802 }
12803 #endif
12804
12805 /*
12806 Returns a pointer to the next character after the parsed
12807 vstring, as well as updating the passed in sv.
12808
12809 Function must be called like
12810
12811         sv = sv_2mortal(newSV(5));
12812         s = scan_vstring(s,e,sv);
12813
12814 where s and e are the start and end of the string.
12815 The sv should already be large enough to store the vstring
12816 passed in, for performance reasons.
12817
12818 This function may croak if fatal warnings are enabled in the
12819 calling scope, hence the sv_2mortal in the example (to prevent
12820 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
12821 sv_2mortal.
12822
12823 */
12824
12825 char *
12826 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12827 {
12828     const char *pos = s;
12829     const char *start = s;
12830
12831     PERL_ARGS_ASSERT_SCAN_VSTRING;
12832
12833     if (*pos == 'v') pos++;  /* get past 'v' */
12834     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12835         pos++;
12836     if ( *pos != '.') {
12837         /* this may not be a v-string if followed by => */
12838         const char *next = pos;
12839         while (next < e && isSPACE(*next))
12840             ++next;
12841         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12842             /* return string not v-string */
12843             sv_setpvn(sv,(char *)s,pos-s);
12844             return (char *)pos;
12845         }
12846     }
12847
12848     if (!isALPHA(*pos)) {
12849         U8 tmpbuf[UTF8_MAXBYTES+1];
12850
12851         if (*s == 'v')
12852             s++;  /* get past 'v' */
12853
12854         SvPVCLEAR(sv);
12855
12856         for (;;) {
12857             /* this is atoi() that tolerates underscores */
12858             U8 *tmpend;
12859             UV rev = 0;
12860             const char *end = pos;
12861             UV mult = 1;
12862             while (--end >= s) {
12863                 if (*end != '_') {
12864                     const UV orev = rev;
12865                     rev += (*end - '0') * mult;
12866                     mult *= 10;
12867                     if (orev > rev)
12868                         /* diag_listed_as: Integer overflow in %s number */
12869                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12870                                          "Integer overflow in decimal number");
12871                 }
12872             }
12873
12874             /* Append native character for the rev point */
12875             tmpend = uvchr_to_utf8(tmpbuf, rev);
12876             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12877             if (!UVCHR_IS_INVARIANT(rev))
12878                  SvUTF8_on(sv);
12879             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12880                  s = ++pos;
12881             else {
12882                  s = pos;
12883                  break;
12884             }
12885             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12886                  pos++;
12887         }
12888         SvPOK_on(sv);
12889         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12890         SvRMAGICAL_on(sv);
12891     }
12892     return (char *)s;
12893 }
12894
12895 int
12896 Perl_keyword_plugin_standard(pTHX_
12897         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12898 {
12899     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
12900     PERL_UNUSED_CONTEXT;
12901     PERL_UNUSED_ARG(keyword_ptr);
12902     PERL_UNUSED_ARG(keyword_len);
12903     PERL_UNUSED_ARG(op_ptr);
12904     return KEYWORD_PLUGIN_DECLINE;
12905 }
12906
12907 /*
12908 =for apidoc wrap_keyword_plugin
12909
12910 Puts a C function into the chain of keyword plugins.  This is the
12911 preferred way to manipulate the L</PL_keyword_plugin> variable.
12912 C<new_plugin> is a pointer to the C function that is to be added to the
12913 keyword plugin chain, and C<old_plugin_p> points to the storage location
12914 where a pointer to the next function in the chain will be stored.  The
12915 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
12916 while the value previously stored there is written to C<*old_plugin_p>.
12917
12918 L</PL_keyword_plugin> is global to an entire process, and a module wishing
12919 to hook keyword parsing may find itself invoked more than once per
12920 process, typically in different threads.  To handle that situation, this
12921 function is idempotent.  The location C<*old_plugin_p> must initially
12922 (once per process) contain a null pointer.  A C variable of static
12923 duration (declared at file scope, typically also marked C<static> to give
12924 it internal linkage) will be implicitly initialised appropriately, if it
12925 does not have an explicit initialiser.  This function will only actually
12926 modify the plugin chain if it finds C<*old_plugin_p> to be null.  This
12927 function is also thread safe on the small scale.  It uses appropriate
12928 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
12929
12930 When this function is called, the function referenced by C<new_plugin>
12931 must be ready to be called, except for C<*old_plugin_p> being unfilled.
12932 In a threading situation, C<new_plugin> may be called immediately, even
12933 before this function has returned.  C<*old_plugin_p> will always be
12934 appropriately set before C<new_plugin> is called.  If C<new_plugin>
12935 decides not to do anything special with the identifier that it is given
12936 (which is the usual case for most calls to a keyword plugin), it must
12937 chain the plugin function referenced by C<*old_plugin_p>.
12938
12939 Taken all together, XS code to install a keyword plugin should typically
12940 look something like this:
12941
12942     static Perl_keyword_plugin_t next_keyword_plugin;
12943     static OP *my_keyword_plugin(pTHX_
12944         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12945     {
12946         if (memEQs(keyword_ptr, keyword_len,
12947                    "my_new_keyword")) {
12948             ...
12949         } else {
12950             return next_keyword_plugin(aTHX_
12951                 keyword_ptr, keyword_len, op_ptr);
12952         }
12953     }
12954     BOOT:
12955         wrap_keyword_plugin(my_keyword_plugin,
12956                             &next_keyword_plugin);
12957
12958 Direct access to L</PL_keyword_plugin> should be avoided.
12959
12960 =cut
12961 */
12962
12963 void
12964 Perl_wrap_keyword_plugin(pTHX_
12965     Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
12966 {
12967
12968     PERL_UNUSED_CONTEXT;
12969     PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
12970     if (*old_plugin_p) return;
12971     KEYWORD_PLUGIN_MUTEX_LOCK;
12972     if (!*old_plugin_p) {
12973         *old_plugin_p = PL_keyword_plugin;
12974         PL_keyword_plugin = new_plugin;
12975     }
12976     KEYWORD_PLUGIN_MUTEX_UNLOCK;
12977 }
12978
12979 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12980 static void
12981 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12982 {
12983     SAVEI32(PL_lex_brackets);
12984     if (PL_lex_brackets > 100)
12985         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12986     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12987     SAVEI32(PL_lex_allbrackets);
12988     PL_lex_allbrackets = 0;
12989     SAVEI8(PL_lex_fakeeof);
12990     PL_lex_fakeeof = (U8)fakeeof;
12991     if(yyparse(gramtype) && !PL_parser->error_count)
12992         qerror(Perl_mess(aTHX_ "Parse error"));
12993 }
12994
12995 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12996 static OP *
12997 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12998 {
12999     OP *o;
13000     ENTER;
13001     SAVEVPTR(PL_eval_root);
13002     PL_eval_root = NULL;
13003     parse_recdescent(gramtype, fakeeof);
13004     o = PL_eval_root;
13005     LEAVE;
13006     return o;
13007 }
13008
13009 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
13010 static OP *
13011 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
13012 {
13013     OP *exprop;
13014     if (flags & ~PARSE_OPTIONAL)
13015         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
13016     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
13017     if (!exprop && !(flags & PARSE_OPTIONAL)) {
13018         if (!PL_parser->error_count)
13019             qerror(Perl_mess(aTHX_ "Parse error"));
13020         exprop = newOP(OP_NULL, 0);
13021     }
13022     return exprop;
13023 }
13024
13025 /*
13026 =for apidoc parse_arithexpr
13027
13028 Parse a Perl arithmetic expression.  This may contain operators of precedence
13029 down to the bit shift operators.  The expression must be followed (and thus
13030 terminated) either by a comparison or lower-precedence operator or by
13031 something that would normally terminate an expression such as semicolon.
13032 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13033 otherwise it is mandatory.  It is up to the caller to ensure that the
13034 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13035 the source of the code to be parsed and the lexical context for the
13036 expression.
13037
13038 The op tree representing the expression is returned.  If an optional
13039 expression is absent, a null pointer is returned, otherwise the pointer
13040 will be non-null.
13041
13042 If an error occurs in parsing or compilation, in most cases a valid op
13043 tree is returned anyway.  The error is reflected in the parser state,
13044 normally resulting in a single exception at the top level of parsing
13045 which covers all the compilation errors that occurred.  Some compilation
13046 errors, however, will throw an exception immediately.
13047
13048 =for apidoc Amnh||PARSE_OPTIONAL
13049
13050 =cut
13051
13052 */
13053
13054 OP *
13055 Perl_parse_arithexpr(pTHX_ U32 flags)
13056 {
13057     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
13058 }
13059
13060 /*
13061 =for apidoc parse_termexpr
13062
13063 Parse a Perl term expression.  This may contain operators of precedence
13064 down to the assignment operators.  The expression must be followed (and thus
13065 terminated) either by a comma or lower-precedence operator or by
13066 something that would normally terminate an expression such as semicolon.
13067 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13068 otherwise it is mandatory.  It is up to the caller to ensure that the
13069 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13070 the source of the code to be parsed and the lexical context for the
13071 expression.
13072
13073 The op tree representing the expression is returned.  If an optional
13074 expression is absent, a null pointer is returned, otherwise the pointer
13075 will be non-null.
13076
13077 If an error occurs in parsing or compilation, in most cases a valid op
13078 tree is returned anyway.  The error is reflected in the parser state,
13079 normally resulting in a single exception at the top level of parsing
13080 which covers all the compilation errors that occurred.  Some compilation
13081 errors, however, will throw an exception immediately.
13082
13083 =cut
13084 */
13085
13086 OP *
13087 Perl_parse_termexpr(pTHX_ U32 flags)
13088 {
13089     return parse_expr(LEX_FAKEEOF_COMMA, flags);
13090 }
13091
13092 /*
13093 =for apidoc parse_listexpr
13094
13095 Parse a Perl list expression.  This may contain operators of precedence
13096 down to the comma operator.  The expression must be followed (and thus
13097 terminated) either by a low-precedence logic operator such as C<or> or by
13098 something that would normally terminate an expression such as semicolon.
13099 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13100 otherwise it is mandatory.  It is up to the caller to ensure that the
13101 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13102 the source of the code to be parsed and the lexical context for the
13103 expression.
13104
13105 The op tree representing the expression is returned.  If an optional
13106 expression is absent, a null pointer is returned, otherwise the pointer
13107 will be non-null.
13108
13109 If an error occurs in parsing or compilation, in most cases a valid op
13110 tree is returned anyway.  The error is reflected in the parser state,
13111 normally resulting in a single exception at the top level of parsing
13112 which covers all the compilation errors that occurred.  Some compilation
13113 errors, however, will throw an exception immediately.
13114
13115 =cut
13116 */
13117
13118 OP *
13119 Perl_parse_listexpr(pTHX_ U32 flags)
13120 {
13121     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
13122 }
13123
13124 /*
13125 =for apidoc parse_fullexpr
13126
13127 Parse a single complete Perl expression.  This allows the full
13128 expression grammar, including the lowest-precedence operators such
13129 as C<or>.  The expression must be followed (and thus terminated) by a
13130 token that an expression would normally be terminated by: end-of-file,
13131 closing bracketing punctuation, semicolon, or one of the keywords that
13132 signals a postfix expression-statement modifier.  If C<flags> has the
13133 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
13134 mandatory.  It is up to the caller to ensure that the dynamic parser
13135 state (L</PL_parser> et al) is correctly set to reflect the source of
13136 the code to be parsed and the lexical context for the expression.
13137
13138 The op tree representing the expression is returned.  If an optional
13139 expression is absent, a null pointer is returned, otherwise the pointer
13140 will be non-null.
13141
13142 If an error occurs in parsing or compilation, in most cases a valid op
13143 tree is returned anyway.  The error is reflected in the parser state,
13144 normally resulting in a single exception at the top level of parsing
13145 which covers all the compilation errors that occurred.  Some compilation
13146 errors, however, will throw an exception immediately.
13147
13148 =cut
13149 */
13150
13151 OP *
13152 Perl_parse_fullexpr(pTHX_ U32 flags)
13153 {
13154     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
13155 }
13156
13157 /*
13158 =for apidoc parse_block
13159
13160 Parse a single complete Perl code block.  This consists of an opening
13161 brace, a sequence of statements, and a closing brace.  The block
13162 constitutes a lexical scope, so C<my> variables and various compile-time
13163 effects can be contained within it.  It is up to the caller to ensure
13164 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13165 reflect the source of the code to be parsed and the lexical context for
13166 the statement.
13167
13168 The op tree representing the code block is returned.  This is always a
13169 real op, never a null pointer.  It will normally be a C<lineseq> list,
13170 including C<nextstate> or equivalent ops.  No ops to construct any kind
13171 of runtime scope are included by virtue of it being a block.
13172
13173 If an error occurs in parsing or compilation, in most cases a valid op
13174 tree (most likely null) is returned anyway.  The error is reflected in
13175 the parser state, normally resulting in a single exception at the top
13176 level of parsing which covers all the compilation errors that occurred.
13177 Some compilation errors, however, will throw an exception immediately.
13178
13179 The C<flags> parameter is reserved for future use, and must always
13180 be zero.
13181
13182 =cut
13183 */
13184
13185 OP *
13186 Perl_parse_block(pTHX_ U32 flags)
13187 {
13188     if (flags)
13189         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
13190     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
13191 }
13192
13193 /*
13194 =for apidoc parse_barestmt
13195
13196 Parse a single unadorned Perl statement.  This may be a normal imperative
13197 statement or a declaration that has compile-time effect.  It does not
13198 include any label or other affixture.  It is up to the caller to ensure
13199 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13200 reflect the source of the code to be parsed and the lexical context for
13201 the statement.
13202
13203 The op tree representing the statement is returned.  This may be a
13204 null pointer if the statement is null, for example if it was actually
13205 a subroutine definition (which has compile-time side effects).  If not
13206 null, it will be ops directly implementing the statement, suitable to
13207 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
13208 equivalent op (except for those embedded in a scope contained entirely
13209 within the statement).
13210
13211 If an error occurs in parsing or compilation, in most cases a valid op
13212 tree (most likely null) is returned anyway.  The error is reflected in
13213 the parser state, normally resulting in a single exception at the top
13214 level of parsing which covers all the compilation errors that occurred.
13215 Some compilation errors, however, will throw an exception immediately.
13216
13217 The C<flags> parameter is reserved for future use, and must always
13218 be zero.
13219
13220 =cut
13221 */
13222
13223 OP *
13224 Perl_parse_barestmt(pTHX_ U32 flags)
13225 {
13226     if (flags)
13227         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
13228     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
13229 }
13230
13231 /*
13232 =for apidoc parse_label
13233
13234 Parse a single label, possibly optional, of the type that may prefix a
13235 Perl statement.  It is up to the caller to ensure that the dynamic parser
13236 state (L</PL_parser> et al) is correctly set to reflect the source of
13237 the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
13238 label is optional, otherwise it is mandatory.
13239
13240 The name of the label is returned in the form of a fresh scalar.  If an
13241 optional label is absent, a null pointer is returned.
13242
13243 If an error occurs in parsing, which can only occur if the label is
13244 mandatory, a valid label is returned anyway.  The error is reflected in
13245 the parser state, normally resulting in a single exception at the top
13246 level of parsing which covers all the compilation errors that occurred.
13247
13248 =cut
13249 */
13250
13251 SV *
13252 Perl_parse_label(pTHX_ U32 flags)
13253 {
13254     if (flags & ~PARSE_OPTIONAL)
13255         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
13256     if (PL_nexttoke) {
13257         PL_parser->yychar = yylex();
13258         if (PL_parser->yychar == LABEL) {
13259             SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
13260             PL_parser->yychar = YYEMPTY;
13261             cSVOPx(pl_yylval.opval)->op_sv = NULL;
13262             op_free(pl_yylval.opval);
13263             return labelsv;
13264         } else {
13265             yyunlex();
13266             goto no_label;
13267         }
13268     } else {
13269         char *s, *t;
13270         STRLEN wlen, bufptr_pos;
13271         lex_read_space(0);
13272         t = s = PL_bufptr;
13273         if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
13274             goto no_label;
13275         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
13276         if (word_takes_any_delimiter(s, wlen))
13277             goto no_label;
13278         bufptr_pos = s - SvPVX(PL_linestr);
13279         PL_bufptr = t;
13280         lex_read_space(LEX_KEEP_PREVIOUS);
13281         t = PL_bufptr;
13282         s = SvPVX(PL_linestr) + bufptr_pos;
13283         if (t[0] == ':' && t[1] != ':') {
13284             PL_oldoldbufptr = PL_oldbufptr;
13285             PL_oldbufptr = s;
13286             PL_bufptr = t+1;
13287             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
13288         } else {
13289             PL_bufptr = s;
13290             no_label:
13291             if (flags & PARSE_OPTIONAL) {
13292                 return NULL;
13293             } else {
13294                 qerror(Perl_mess(aTHX_ "Parse error"));
13295                 return newSVpvs("x");
13296             }
13297         }
13298     }
13299 }
13300
13301 /*
13302 =for apidoc parse_fullstmt
13303
13304 Parse a single complete Perl statement.  This may be a normal imperative
13305 statement or a declaration that has compile-time effect, and may include
13306 optional labels.  It is up to the caller to ensure that the dynamic
13307 parser state (L</PL_parser> et al) is correctly set to reflect the source
13308 of the code to be parsed and the lexical context for the statement.
13309
13310 The op tree representing the statement is returned.  This may be a
13311 null pointer if the statement is null, for example if it was actually
13312 a subroutine definition (which has compile-time side effects).  If not
13313 null, it will be the result of a L</newSTATEOP> call, normally including
13314 a C<nextstate> or equivalent op.
13315
13316 If an error occurs in parsing or compilation, in most cases a valid op
13317 tree (most likely null) is returned anyway.  The error is reflected in
13318 the parser state, normally resulting in a single exception at the top
13319 level of parsing which covers all the compilation errors that occurred.
13320 Some compilation errors, however, will throw an exception immediately.
13321
13322 The C<flags> parameter is reserved for future use, and must always
13323 be zero.
13324
13325 =cut
13326 */
13327
13328 OP *
13329 Perl_parse_fullstmt(pTHX_ U32 flags)
13330 {
13331     if (flags)
13332         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13333     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
13334 }
13335
13336 /*
13337 =for apidoc parse_stmtseq
13338
13339 Parse a sequence of zero or more Perl statements.  These may be normal
13340 imperative statements, including optional labels, or declarations
13341 that have compile-time effect, or any mixture thereof.  The statement
13342 sequence ends when a closing brace or end-of-file is encountered in a
13343 place where a new statement could have validly started.  It is up to
13344 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
13345 is correctly set to reflect the source of the code to be parsed and the
13346 lexical context for the statements.
13347
13348 The op tree representing the statement sequence is returned.  This may
13349 be a null pointer if the statements were all null, for example if there
13350 were no statements or if there were only subroutine definitions (which
13351 have compile-time side effects).  If not null, it will be a C<lineseq>
13352 list, normally including C<nextstate> or equivalent ops.
13353
13354 If an error occurs in parsing or compilation, in most cases a valid op
13355 tree is returned anyway.  The error is reflected in the parser state,
13356 normally resulting in a single exception at the top level of parsing
13357 which covers all the compilation errors that occurred.  Some compilation
13358 errors, however, will throw an exception immediately.
13359
13360 The C<flags> parameter is reserved for future use, and must always
13361 be zero.
13362
13363 =cut
13364 */
13365
13366 OP *
13367 Perl_parse_stmtseq(pTHX_ U32 flags)
13368 {
13369     OP *stmtseqop;
13370     I32 c;
13371     if (flags)
13372         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
13373     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
13374     c = lex_peek_unichar(0);
13375     if (c != -1 && c != /*{*/'}')
13376         qerror(Perl_mess(aTHX_ "Parse error"));
13377     return stmtseqop;
13378 }
13379
13380 /*
13381 =for apidoc parse_subsignature
13382
13383 Parse a subroutine signature declaration. This is the contents of the
13384 parentheses following a named or anonymous subroutine declaration when the
13385 C<signatures> feature is enabled. Note that this function neither expects
13386 nor consumes the opening and closing parentheses around the signature; it
13387 is the caller's job to handle these.
13388
13389 This function must only be called during parsing of a subroutine; after
13390 L</start_subparse> has been called. It might allocate lexical variables on
13391 the pad for the current subroutine.
13392
13393 The op tree to unpack the arguments from the stack at runtime is returned.
13394 This op tree should appear at the beginning of the compiled function. The
13395 caller may wish to use L</op_append_list> to build their function body
13396 after it, or splice it together with the body before calling L</newATTRSUB>.
13397
13398 The C<flags> parameter is reserved for future use, and must always
13399 be zero.
13400
13401 =cut
13402 */
13403
13404 OP *
13405 Perl_parse_subsignature(pTHX_ U32 flags)
13406 {
13407     if (flags)
13408         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
13409     return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
13410 }
13411
13412 /*
13413  * ex: set ts=8 sts=4 sw=4 et:
13414  */