toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26 This is the lower layer of the Perl parser, managing characters and tokens.
  27
  28 =for apidoc AmU|yy_parser *|PL_parser
  29
  30 Pointer to a structure encapsulating the state of the parsing operation
  31 currently in progress.  The pointer can be locally changed to perform
  32 a nested parse without interfering with the state of an outer parse.
  33 Individual members of C<PL_parser> have their own documentation.
  34
  35 =cut
  36 */
  37
  38 #include "EXTERN.h"
  39 #define PERL_IN_TOKE_C
  40 #include "perl.h"
  41 #include "dquote_inline.h"
  42 #include "invlist_inline.h"
  43
  44 #define new_constant(a,b,c,d,e,f,g, h)  \
  45         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
  46
  47 #define pl_yylval       (PL_parser->yylval)
  48
  49 /* XXX temporary backwards compatibility */
  50 #define PL_lex_brackets         (PL_parser->lex_brackets)
  51 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  52 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  57 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  58 #define PL_lex_inpat            (PL_parser->lex_inpat)
  59 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  60 #define PL_lex_op               (PL_parser->lex_op)
  61 #define PL_lex_repl             (PL_parser->lex_repl)
  62 #define PL_lex_starts           (PL_parser->lex_starts)
  63 #define PL_lex_stuff            (PL_parser->lex_stuff)
  64 #define PL_multi_start          (PL_parser->multi_start)
  65 #define PL_multi_open           (PL_parser->multi_open)
  66 #define PL_multi_close          (PL_parser->multi_close)
  67 #define PL_preambled            (PL_parser->preambled)
  68 #define PL_linestr              (PL_parser->linestr)
  69 #define PL_expect               (PL_parser->expect)
  70 #define PL_copline              (PL_parser->copline)
  71 #define PL_bufptr               (PL_parser->bufptr)
  72 #define PL_oldbufptr            (PL_parser->oldbufptr)
  73 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  74 #define PL_linestart            (PL_parser->linestart)
  75 #define PL_bufend               (PL_parser->bufend)
  76 #define PL_last_uni             (PL_parser->last_uni)
  77 #define PL_last_lop             (PL_parser->last_lop)
  78 #define PL_last_lop_op          (PL_parser->last_lop_op)
  79 #define PL_lex_state            (PL_parser->lex_state)
  80 #define PL_rsfp                 (PL_parser->rsfp)
  81 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  82 #define PL_in_my                (PL_parser->in_my)
  83 #define PL_in_my_stash          (PL_parser->in_my_stash)
  84 #define PL_tokenbuf             (PL_parser->tokenbuf)
  85 #define PL_multi_end            (PL_parser->multi_end)
  86 #define PL_error_count          (PL_parser->error_count)
  87
  88 #  define PL_nexttoke           (PL_parser->nexttoke)
  89 #  define PL_nexttype           (PL_parser->nexttype)
  90 #  define PL_nextval            (PL_parser->nextval)
  91
  92
  93 #define SvEVALED(sv) \
  94     (SvTYPE(sv) >= SVt_PVNV \
  95     && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
  96
  97 static const char* const ident_too_long = "Identifier too long";
  98
  99 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 100
 101 #define XENUMMASK  0x3f
 102 #define XFAKEEOF   0x40
 103 #define XFAKEBRACK 0x80
 104
 105 #ifdef USE_UTF8_SCRIPTS
 106 #   define UTF cBOOL(!IN_BYTES)
 107 #else
 108 #   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 109 #endif
 110
 111 /* The maximum number of characters preceding the unrecognized one to display */
 112 #define UNRECOGNIZED_PRECEDE_COUNT 10
 113
 114 /* In variables named $^X, these are the legal values for X.
 115  * 1999-02-27 mjd-perl-patch@plover.com */
 116 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 117
 118 #define SPACE_OR_TAB(c) isBLANK_A(c)
 119
 120 #define HEXFP_PEEK(s)     \
 121     (((s[0] == '.') && \
 122       (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
 123      isALPHA_FOLD_EQ(s[0], 'p'))
 124
 125 /* LEX_* are values for PL_lex_state, the state of the lexer.
 126  * They are arranged oddly so that the guard on the switch statement
 127  * can get by with a single comparison (if the compiler is smart enough).
 128  *
 129  * These values refer to the various states within a sublex parse,
 130  * i.e. within a double quotish string
 131  */
 132
 133 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 134
 135 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 136 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 137 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 138 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 139 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 140
 141                                    /* at end of code, eg "$x" followed by:  */
 142 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 143 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 144
 145 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 146                                         string or after \E, $foo, etc       */
 147 #define LEX_INTERPCONST          2 /* NOT USED */
 148 #define LEX_FORMLINE             1 /* expecting a format line               */
 149
 150
 151 #ifdef DEBUGGING
 152 static const char* const lex_state_names[] = {
 153     "KNOWNEXT",
 154     "FORMLINE",
 155     "INTERPCONST",
 156     "INTERPCONCAT",
 157     "INTERPENDMAYBE",
 158     "INTERPEND",
 159     "INTERPSTART",
 160     "INTERPPUSH",
 161     "INTERPCASEMOD",
 162     "INTERPNORMAL",
 163     "NORMAL"
 164 };
 165 #endif
 166
 167 #include "keywords.h"
 168
 169 /* CLINE is a macro that ensures PL_copline has a sane value */
 170
 171 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 172
 173 /*
 174  * Convenience functions to return different tokens and prime the
 175  * lexer for the next token.  They all take an argument.
 176  *
 177  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 178  * OPERATOR     : generic operator
 179  * AOPERATOR    : assignment operator
 180  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 181  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 182  * PREREF       : *EXPR where EXPR is not a simple identifier
 183  * TERM         : expression term
 184  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 185  * LOOPX        : loop exiting command (goto, last, dump, etc)
 186  * FTST         : file test operator
 187  * FUN0         : zero-argument function
 188  * FUN0OP       : zero-argument function, with its op created in this file
 189  * FUN1         : not used, except for not, which isn't a UNIOP
 190  * BOop         : bitwise or or xor
 191  * BAop         : bitwise and
 192  * BCop         : bitwise complement
 193  * SHop         : shift operator
 194  * PWop         : power operator
 195  * PMop         : pattern-matching operator
 196  * Aop          : addition-level operator
 197  * AopNOASSIGN  : addition-level operator that is never part of .=
 198  * Mop          : multiplication-level operator
 199  * Eop          : equality-testing operator
 200  * Rop          : relational operator <= != gt
 201  *
 202  * Also see LOP and lop() below.
 203  */
 204
 205 #ifdef DEBUGGING /* Serve -DT. */
 206 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 207 #else
 208 #   define REPORT(retval) (retval)
 209 #endif
 210
 211 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 212 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 213 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
 214 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 215 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 216 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 217 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 218 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 219 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
 220                          pl_yylval.ival=f, \
 221                          PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
 222                          REPORT((int)LOOPEX))
 223 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 224 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 225 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 226 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 227 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
 228 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
 229 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
 230                        REPORT('~')
 231 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
 232 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
 233 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 234 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
 235 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
 236 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
 237 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 238 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 239
 240 /* This bit of chicanery makes a unary function followed by
 241  * a parenthesis into a function with one argument, highest precedence.
 242  * The UNIDOR macro is for unary functions that can be followed by the //
 243  * operator (such as C<shift // 0>).
 244  */
 245 #define UNI3(f,x,have_x) { \
 246         pl_yylval.ival = f; \
 247         if (have_x) PL_expect = x; \
 248         PL_bufptr = s; \
 249         PL_last_uni = PL_oldbufptr; \
 250         PL_last_lop_op = (f) < 0 ? -(f) : (f); \
 251         if (*s == '(') \
 252             return REPORT( (int)FUNC1 ); \
 253         s = skipspace(s); \
 254         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 255         }
 256 #define UNI(f)    UNI3(f,XTERM,1)
 257 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 258 #define UNIPROTO(f,optional) { \
 259         if (optional) PL_last_uni = PL_oldbufptr; \
 260         OPERATOR(f); \
 261         }
 262
 263 #define UNIBRACK(f) UNI3(f,0,0)
 264
 265 /* grandfather return to old style */
 266 #define OLDLOP(f) \
 267         do { \
 268             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 269                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 270             pl_yylval.ival = (f); \
 271             PL_expect = XTERM; \
 272             PL_bufptr = s; \
 273             return (int)LSTOP; \
 274         } while(0)
 275
 276 #define COPLINE_INC_WITH_HERELINES                  \
 277     STMT_START {                                     \
 278         CopLINE_inc(PL_curcop);                       \
 279         if (PL_parser->herelines)                      \
 280             CopLINE(PL_curcop) += PL_parser->herelines, \
 281             PL_parser->herelines = 0;                    \
 282     } STMT_END
 283 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 284  * is no sublex_push to follow. */
 285 #define COPLINE_SET_FROM_MULTI_END            \
 286     STMT_START {                               \
 287         CopLINE_set(PL_curcop, PL_multi_end);   \
 288         if (PL_multi_end != PL_multi_start)      \
 289             PL_parser->herelines = 0;             \
 290     } STMT_END
 291
 292
 293 #ifdef DEBUGGING
 294
 295 /* how to interpret the pl_yylval associated with the token */
 296 enum token_type {
 297     TOKENTYPE_NONE,
 298     TOKENTYPE_IVAL,
 299     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 300     TOKENTYPE_PVAL,
 301     TOKENTYPE_OPVAL
 302 };
 303
 304 static struct debug_tokens {
 305     const int token;
 306     enum token_type type;
 307     const char *name;
 308 } const debug_tokens[] =
 309 {
 310     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 311     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 312     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 313     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 314     { ANON_SIGSUB,      TOKENTYPE_IVAL,         "ANON_SIGSUB" },
 315     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 316     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 317     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 318     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 319     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 320     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 321     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 322     { DO,               TOKENTYPE_NONE,         "DO" },
 323     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 324     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 325     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 326     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 327     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 328     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 329     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 330     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 331     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 332     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 333     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 334     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 335     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 336     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 337     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 338     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 339     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 340     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 341     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 342     { IF,               TOKENTYPE_IVAL,         "IF" },
 343     { LABEL,            TOKENTYPE_OPVAL,        "LABEL" },
 344     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 345     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 346     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 347     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 348     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 349     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 350     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 351     { MY,               TOKENTYPE_IVAL,         "MY" },
 352     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 353     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 354     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 355     { OROR,             TOKENTYPE_NONE,         "OROR" },
 356     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 357     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 358     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 359     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 360     { POSTJOIN,         TOKENTYPE_NONE,         "POSTJOIN" },
 361     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 362     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 363     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 364     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 365     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 366     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 367     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 368     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 369     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 370     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 371     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 372     { SIGSUB,           TOKENTYPE_NONE,         "SIGSUB" },
 373     { SUB,              TOKENTYPE_NONE,         "SUB" },
 374     { THING,            TOKENTYPE_OPVAL,        "THING" },
 375     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 376     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 377     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 378     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 379     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 380     { USE,              TOKENTYPE_IVAL,         "USE" },
 381     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 382     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 383     { BAREWORD,         TOKENTYPE_OPVAL,        "BAREWORD" },
 384     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 385     { 0,                TOKENTYPE_NONE,         NULL }
 386 };
 387
 388 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 389
 390 STATIC int
 391 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 392 {
 393     PERL_ARGS_ASSERT_TOKEREPORT;
 394
 395     if (DEBUG_T_TEST) {
 396         const char *name = NULL;
 397         enum token_type type = TOKENTYPE_NONE;
 398         const struct debug_tokens *p;
 399         SV* const report = newSVpvs("<== ");
 400
 401         for (p = debug_tokens; p->token; p++) {
 402             if (p->token == (int)rv) {
 403                 name = p->name;
 404                 type = p->type;
 405                 break;
 406             }
 407         }
 408         if (name)
 409             Perl_sv_catpv(aTHX_ report, name);
 410         else if (isGRAPH(rv))
 411         {
 412             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 413             if ((char)rv == 'p')
 414                 sv_catpvs(report, " (pending identifier)");
 415         }
 416         else if (!rv)
 417             sv_catpvs(report, "EOF");
 418         else
 419             Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
 420         switch (type) {
 421         case TOKENTYPE_NONE:
 422             break;
 423         case TOKENTYPE_IVAL:
 424             Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
 425             break;
 426         case TOKENTYPE_OPNUM:
 427             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 428                                     PL_op_name[lvalp->ival]);
 429             break;
 430         case TOKENTYPE_PVAL:
 431             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 432             break;
 433         case TOKENTYPE_OPVAL:
 434             if (lvalp->opval) {
 435                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 436                                     PL_op_name[lvalp->opval->op_type]);
 437                 if (lvalp->opval->op_type == OP_CONST) {
 438                     Perl_sv_catpvf(aTHX_ report, " %s",
 439                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 440                 }
 441
 442             }
 443             else
 444                 sv_catpvs(report, "(opval=null)");
 445             break;
 446         }
 447         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 448     };
 449     return (int)rv;
 450 }
 451
 452
 453 /* print the buffer with suitable escapes */
 454
 455 STATIC void
 456 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 457 {
 458     SV* const tmp = newSVpvs("");
 459
 460     PERL_ARGS_ASSERT_PRINTBUF;
 461
 462     GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
 463     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 464     GCC_DIAG_RESTORE_STMT;
 465     SvREFCNT_dec(tmp);
 466 }
 467
 468 #endif
 469
 470 /*
 471  * S_ao
 472  *
 473  * This subroutine looks for an '=' next to the operator that has just been
 474  * parsed and turns it into an ASSIGNOP if it finds one.
 475  */
 476
 477 STATIC int
 478 S_ao(pTHX_ int toketype)
 479 {
 480     if (*PL_bufptr == '=') {
 481         PL_bufptr++;
 482         if (toketype == ANDAND)
 483             pl_yylval.ival = OP_ANDASSIGN;
 484         else if (toketype == OROR)
 485             pl_yylval.ival = OP_ORASSIGN;
 486         else if (toketype == DORDOR)
 487             pl_yylval.ival = OP_DORASSIGN;
 488         toketype = ASSIGNOP;
 489     }
 490     return REPORT(toketype);
 491 }
 492
 493 /*
 494  * S_no_op
 495  * When Perl expects an operator and finds something else, no_op
 496  * prints the warning.  It always prints "<something> found where
 497  * operator expected.  It prints "Missing semicolon on previous line?"
 498  * if the surprise occurs at the start of the line.  "do you need to
 499  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 500  * where the compiler doesn't know if foo is a method call or a function.
 501  * It prints "Missing operator before end of line" if there's nothing
 502  * after the missing operator, or "... before <...>" if there is something
 503  * after the missing operator.
 504  *
 505  * PL_bufptr is expected to point to the start of the thing that was found,
 506  * and s after the next token or partial token.
 507  */
 508
 509 STATIC void
 510 S_no_op(pTHX_ const char *const what, char *s)
 511 {
 512     char * const oldbp = PL_bufptr;
 513     const bool is_first = (PL_oldbufptr == PL_linestart);
 514
 515     PERL_ARGS_ASSERT_NO_OP;
 516
 517     if (!s)
 518         s = oldbp;
 519     else
 520         PL_bufptr = s;
 521     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 522     if (ckWARN_d(WARN_SYNTAX)) {
 523         if (is_first)
 524             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 525                     "\t(Missing semicolon on previous line?)\n");
 526         else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
 527                                                            PL_bufend,
 528                                                            UTF))
 529         {
 530             const char *t;
 531             for (t = PL_oldoldbufptr;
 532                  (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
 533                  t += UTF ? UTF8SKIP(t) : 1)
 534             {
 535                 NOOP;
 536             }
 537             if (t < PL_bufptr && isSPACE(*t))
 538                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 539                         "\t(Do you need to predeclare %" UTF8f "?)\n",
 540                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 541         }
 542         else {
 543             assert(s >= oldbp);
 544             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 545                     "\t(Missing operator before %" UTF8f "?)\n",
 546                      UTF8fARG(UTF, s - oldbp, oldbp));
 547         }
 548     }
 549     PL_bufptr = oldbp;
 550 }
 551
 552 /*
 553  * S_missingterm
 554  * Complain about missing quote/regexp/heredoc terminator.
 555  * If it's called with NULL then it cauterizes the line buffer.
 556  * If we're in a delimited string and the delimiter is a control
 557  * character, it's reformatted into a two-char sequence like ^C.
 558  * This is fatal.
 559  */
 560
 561 STATIC void
 562 S_missingterm(pTHX_ char *s, STRLEN len)
 563 {
 564     char tmpbuf[UTF8_MAXBYTES + 1];
 565     char q;
 566     bool uni = FALSE;
 567     SV *sv;
 568     if (s) {
 569         char * const nl = (char *) my_memrchr(s, '\n', len);
 570         if (nl) {
 571             *nl = '\0';
 572             len = nl - s;
 573         }
 574         uni = UTF;
 575     }
 576     else if (PL_multi_close < 32) {
 577         *tmpbuf = '^';
 578         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 579         tmpbuf[2] = '\0';
 580         s = tmpbuf;
 581         len = 2;
 582     }
 583     else {
 584         if (LIKELY(PL_multi_close < 256)) {
 585             *tmpbuf = (char)PL_multi_close;
 586             tmpbuf[1] = '\0';
 587             len = 1;
 588         }
 589         else {
 590             char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
 591             *end = '\0';
 592             len = end - tmpbuf;
 593             uni = TRUE;
 594         }
 595         s = tmpbuf;
 596     }
 597     q = memchr(s, '"', len) ? '\'' : '"';
 598     sv = sv_2mortal(newSVpvn(s, len));
 599     if (uni)
 600         SvUTF8_on(sv);
 601     Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c"
 602                      " anywhere before EOF", q, SVfARG(sv), q);
 603 }
 604
 605 #include "feature.h"
 606
 607 /*
 608  * Check whether the named feature is enabled.
 609  */
 610 bool
 611 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 612 {
 613     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 614
 615     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 616
 617     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 618
 619     if (namelen > MAX_FEATURE_LEN)
 620         return FALSE;
 621     memcpy(&he_name[8], name, namelen);
 622
 623     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 624                                      REFCOUNTED_HE_EXISTS));
 625 }
 626
 627 /*
 628  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 629  * utf16-to-utf8-reversed.
 630  */
 631
 632 #ifdef PERL_CR_FILTER
 633 static void
 634 strip_return(SV *sv)
 635 {
 636     const char *s = SvPVX_const(sv);
 637     const char * const e = s + SvCUR(sv);
 638
 639     PERL_ARGS_ASSERT_STRIP_RETURN;
 640
 641     /* outer loop optimized to do nothing if there are no CR-LFs */
 642     while (s < e) {
 643         if (*s++ == '\r' && *s == '\n') {
 644             /* hit a CR-LF, need to copy the rest */
 645             char *d = s - 1;
 646             *d++ = *s++;
 647             while (s < e) {
 648                 if (*s == '\r' && s[1] == '\n')
 649                     s++;
 650                 *d++ = *s++;
 651             }
 652             SvCUR(sv) -= s - d;
 653             return;
 654         }
 655     }
 656 }
 657
 658 STATIC I32
 659 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 660 {
 661     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 662     if (count > 0 && !maxlen)
 663         strip_return(sv);
 664     return count;
 665 }
 666 #endif
 667
 668 /*
 669 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 670
 671 Creates and initialises a new lexer/parser state object, supplying
 672 a context in which to lex and parse from a new source of Perl code.
 673 A pointer to the new state object is placed in L</PL_parser>.  An entry
 674 is made on the save stack so that upon unwinding, the new state object
 675 will be destroyed and the former value of L</PL_parser> will be restored.
 676 Nothing else need be done to clean up the parsing context.
 677
 678 The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
 679 non-null, provides a string (in SV form) containing code to be parsed.
 680 A copy of the string is made, so subsequent modification of C<line>
 681 does not affect parsing.  C<rsfp>, if non-null, provides an input stream
 682 from which code will be read to be parsed.  If both are non-null, the
 683 code in C<line> comes first and must consist of complete lines of input,
 684 and C<rsfp> supplies the remainder of the source.
 685
 686 The C<flags> parameter is reserved for future use.  Currently it is only
 687 used by perl internally, so extensions should always pass zero.
 688
 689 =cut
 690 */
 691
 692 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 693    can share filters with the current parser.
 694    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 695    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 696    destruction. This is used to handle the case of defaulting to reading the
 697    script from the standard input because no filename was given on the command
 698    line (without getting confused by situation where STDIN has been closed, so
 699    the script handle is opened on fd 0)  */
 700
 701 void
 702 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 703 {
 704     const char *s = NULL;
 705     yy_parser *parser, *oparser;
 706
 707     if (flags && flags & ~LEX_START_FLAGS)
 708         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 709
 710     /* create and initialise a parser */
 711
 712     Newxz(parser, 1, yy_parser);
 713     parser->old_parser = oparser = PL_parser;
 714     PL_parser = parser;
 715
 716     parser->stack = NULL;
 717     parser->stack_max1 = NULL;
 718     parser->ps = NULL;
 719
 720     /* on scope exit, free this parser and restore any outer one */
 721     SAVEPARSER(parser);
 722     parser->saved_curcop = PL_curcop;
 723
 724     /* initialise lexer state */
 725
 726     parser->nexttoke = 0;
 727     parser->error_count = oparser ? oparser->error_count : 0;
 728     parser->copline = parser->preambling = NOLINE;
 729     parser->lex_state = LEX_NORMAL;
 730     parser->expect = XSTATE;
 731     parser->rsfp = rsfp;
 732     parser->recheck_utf8_validity = FALSE;
 733     parser->rsfp_filters =
 734       !(flags & LEX_START_SAME_FILTER) || !oparser
 735         ? NULL
 736         : MUTABLE_AV(SvREFCNT_inc(
 737             oparser->rsfp_filters
 738              ? oparser->rsfp_filters
 739              : (oparser->rsfp_filters = newAV())
 740           ));
 741
 742     Newx(parser->lex_brackstack, 120, char);
 743     Newx(parser->lex_casestack, 12, char);
 744     *parser->lex_casestack = '\0';
 745     Newxz(parser->lex_shared, 1, LEXSHARED);
 746
 747     if (line) {
 748         STRLEN len;
 749         const U8* first_bad_char_loc;
 750
 751         s = SvPV_const(line, len);
 752
 753         if (   SvUTF8(line)
 754             && UNLIKELY(! is_utf8_string_loc((U8 *) s,
 755                                              SvCUR(line),
 756                                              &first_bad_char_loc)))
 757         {
 758             _force_out_malformed_utf8_message(first_bad_char_loc,
 759                                               (U8 *) s + SvCUR(line),
 760                                               0,
 761                                               1 /* 1 means die */ );
 762             NOT_REACHED; /* NOTREACHED */
 763         }
 764
 765         parser->linestr = flags & LEX_START_COPIED
 766                             ? SvREFCNT_inc_simple_NN(line)
 767                             : newSVpvn_flags(s, len, SvUTF8(line));
 768         if (!rsfp)
 769             sv_catpvs(parser->linestr, "\n;");
 770     } else {
 771         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 772     }
 773
 774     parser->oldoldbufptr =
 775         parser->oldbufptr =
 776         parser->bufptr =
 777         parser->linestart = SvPVX(parser->linestr);
 778     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 779     parser->last_lop = parser->last_uni = NULL;
 780
 781     STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 782                                                         |LEX_DONT_CLOSE_RSFP));
 783     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 784                                                         |LEX_DONT_CLOSE_RSFP));
 785
 786     parser->in_pod = parser->filtered = 0;
 787 }
 788
 789
 790 /* delete a parser object */
 791
 792 void
 793 Perl_parser_free(pTHX_  const yy_parser *parser)
 794 {
 795     PERL_ARGS_ASSERT_PARSER_FREE;
 796
 797     PL_curcop = parser->saved_curcop;
 798     SvREFCNT_dec(parser->linestr);
 799
 800     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 801         PerlIO_clearerr(parser->rsfp);
 802     else if (parser->rsfp && (!parser->old_parser
 803           || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 804         PerlIO_close(parser->rsfp);
 805     SvREFCNT_dec(parser->rsfp_filters);
 806     SvREFCNT_dec(parser->lex_stuff);
 807     SvREFCNT_dec(parser->lex_sub_repl);
 808
 809     Safefree(parser->lex_brackstack);
 810     Safefree(parser->lex_casestack);
 811     Safefree(parser->lex_shared);
 812     PL_parser = parser->old_parser;
 813     Safefree(parser);
 814 }
 815
 816 void
 817 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 818 {
 819     I32 nexttoke = parser->nexttoke;
 820     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 821     while (nexttoke--) {
 822         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 823          && parser->nextval[nexttoke].opval
 824          && parser->nextval[nexttoke].opval->op_slabbed
 825          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 826             op_free(parser->nextval[nexttoke].opval);
 827             parser->nextval[nexttoke].opval = NULL;
 828         }
 829     }
 830 }
 831
 832
 833 /*
 834 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 835
 836 Buffer scalar containing the chunk currently under consideration of the
 837 text currently being lexed.  This is always a plain string scalar (for
 838 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 839 normal scalar means; instead refer to the buffer directly by the pointer
 840 variables described below.
 841
 842 The lexer maintains various C<char*> pointers to things in the
 843 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 844 reallocated, all of these pointers must be updated.  Don't attempt to
 845 do this manually, but rather use L</lex_grow_linestr> if you need to
 846 reallocate the buffer.
 847
 848 The content of the text chunk in the buffer is commonly exactly one
 849 complete line of input, up to and including a newline terminator,
 850 but there are situations where it is otherwise.  The octets of the
 851 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 852 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 853 flag on this scalar, which may disagree with it.
 854
 855 For direct examination of the buffer, the variable
 856 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 857 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 858 of these pointers is usually preferable to examination of the scalar
 859 through normal scalar means.
 860
 861 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 862
 863 Direct pointer to the end of the chunk of text currently being lexed, the
 864 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 865 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
 866 always located at the end of the buffer, and does not count as part of
 867 the buffer's contents.
 868
 869 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 870
 871 Points to the current position of lexing inside the lexer buffer.
 872 Characters around this point may be freely examined, within
 873 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 874 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 875 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 876
 877 Lexing code (whether in the Perl core or not) moves this pointer past
 878 the characters that it consumes.  It is also expected to perform some
 879 bookkeeping whenever a newline character is consumed.  This movement
 880 can be more conveniently performed by the function L</lex_read_to>,
 881 which handles newlines appropriately.
 882
 883 Interpretation of the buffer's octets can be abstracted out by
 884 using the slightly higher-level functions L</lex_peek_unichar> and
 885 L</lex_read_unichar>.
 886
 887 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 888
 889 Points to the start of the current line inside the lexer buffer.
 890 This is useful for indicating at which column an error occurred, and
 891 not much else.  This must be updated by any lexing code that consumes
 892 a newline; the function L</lex_read_to> handles this detail.
 893
 894 =cut
 895 */
 896
 897 /*
 898 =for apidoc Amx|bool|lex_bufutf8
 899
 900 Indicates whether the octets in the lexer buffer
 901 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 902 of Unicode characters.  If not, they should be interpreted as Latin-1
 903 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 904
 905 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 906 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 907 encoding.
 908
 909 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 910 is significant, but not the whole story regarding the input character
 911 encoding.  Normally, when a file is being read, the scalar contains octets
 912 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 913 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 914 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 915 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 916 is in effect.  This logic may change in the future; use this function
 917 instead of implementing the logic yourself.
 918
 919 =cut
 920 */
 921
 922 bool
 923 Perl_lex_bufutf8(pTHX)
 924 {
 925     return UTF;
 926 }
 927
 928 /*
 929 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 930
 931 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 932 at least C<len> octets (including terminating C<NUL>).  Returns a
 933 pointer to the reallocated buffer.  This is necessary before making
 934 any direct modification of the buffer that would increase its length.
 935 L</lex_stuff_pvn> provides a more convenient way to insert text into
 936 the buffer.
 937
 938 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 939 this function updates all of the lexer's variables that point directly
 940 into the buffer.
 941
 942 =cut
 943 */
 944
 945 char *
 946 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 947 {
 948     SV *linestr;
 949     char *buf;
 950     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 951     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 952     bool current;
 953
 954     linestr = PL_parser->linestr;
 955     buf = SvPVX(linestr);
 956     if (len <= SvLEN(linestr))
 957         return buf;
 958
 959     /* Is the lex_shared linestr SV the same as the current linestr SV?
 960      * Only in this case does re_eval_start need adjusting, since it
 961      * points within lex_shared->ls_linestr's buffer */
 962     current = (   !PL_parser->lex_shared->ls_linestr
 963                || linestr == PL_parser->lex_shared->ls_linestr);
 964
 965     bufend_pos = PL_parser->bufend - buf;
 966     bufptr_pos = PL_parser->bufptr - buf;
 967     oldbufptr_pos = PL_parser->oldbufptr - buf;
 968     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 969     linestart_pos = PL_parser->linestart - buf;
 970     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 971     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 972     re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
 973                             PL_parser->lex_shared->re_eval_start - buf : 0;
 974
 975     buf = sv_grow(linestr, len);
 976
 977     PL_parser->bufend = buf + bufend_pos;
 978     PL_parser->bufptr = buf + bufptr_pos;
 979     PL_parser->oldbufptr = buf + oldbufptr_pos;
 980     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 981     PL_parser->linestart = buf + linestart_pos;
 982     if (PL_parser->last_uni)
 983         PL_parser->last_uni = buf + last_uni_pos;
 984     if (PL_parser->last_lop)
 985         PL_parser->last_lop = buf + last_lop_pos;
 986     if (current && PL_parser->lex_shared->re_eval_start)
 987         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 988     return buf;
 989 }
 990
 991 /*
 992 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 993
 994 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 995 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 996 reallocating the buffer if necessary.  This means that lexing code that
 997 runs later will see the characters as if they had appeared in the input.
 998 It is not recommended to do this as part of normal parsing, and most
 999 uses of this facility run the risk of the inserted characters being
1000 interpreted in an unintended manner.
1001
1002 The string to be inserted is represented by C<len> octets starting
1003 at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
1004 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1005 The characters are recoded for the lexer buffer, according to how the
1006 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
1007 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1008 function is more convenient.
1009
1010 =cut
1011 */
1012
1013 void
1014 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1015 {
1016     dVAR;
1017     char *bufptr;
1018     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1019     if (flags & ~(LEX_STUFF_UTF8))
1020         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1021     if (UTF) {
1022         if (flags & LEX_STUFF_UTF8) {
1023             goto plain_copy;
1024         } else {
1025             STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1026                                                        (U8 *) pv + len);
1027             const char *p, *e = pv+len;;
1028             if (!highhalf)
1029                 goto plain_copy;
1030             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1031             bufptr = PL_parser->bufptr;
1032             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1033             SvCUR_set(PL_parser->linestr,
1034                 SvCUR(PL_parser->linestr) + len+highhalf);
1035             PL_parser->bufend += len+highhalf;
1036             for (p = pv; p != e; p++) {
1037                 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1038             }
1039         }
1040     } else {
1041         if (flags & LEX_STUFF_UTF8) {
1042             STRLEN highhalf = 0;
1043             const char *p, *e = pv+len;
1044             for (p = pv; p != e; p++) {
1045                 U8 c = (U8)*p;
1046                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1047                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1048                                 "non-Latin-1 character into Latin-1 input");
1049                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1050                     p++;
1051                     highhalf++;
1052                 } else assert(UTF8_IS_INVARIANT(c));
1053             }
1054             if (!highhalf)
1055                 goto plain_copy;
1056             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1057             bufptr = PL_parser->bufptr;
1058             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1059             SvCUR_set(PL_parser->linestr,
1060                 SvCUR(PL_parser->linestr) + len-highhalf);
1061             PL_parser->bufend += len-highhalf;
1062             p = pv;
1063             while (p < e) {
1064                 if (UTF8_IS_INVARIANT(*p)) {
1065                     *bufptr++ = *p;
1066                     p++;
1067                 }
1068                 else {
1069                     assert(p < e -1 );
1070                     *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1071                     p += 2;
1072                 }
1073             }
1074         } else {
1075           plain_copy:
1076             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1077             bufptr = PL_parser->bufptr;
1078             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1079             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1080             PL_parser->bufend += len;
1081             Copy(pv, bufptr, len, char);
1082         }
1083     }
1084 }
1085
1086 /*
1087 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1088
1089 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1090 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1091 reallocating the buffer if necessary.  This means that lexing code that
1092 runs later will see the characters as if they had appeared in the input.
1093 It is not recommended to do this as part of normal parsing, and most
1094 uses of this facility run the risk of the inserted characters being
1095 interpreted in an unintended manner.
1096
1097 The string to be inserted is represented by octets starting at C<pv>
1098 and continuing to the first nul.  These octets are interpreted as either
1099 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1100 in C<flags>.  The characters are recoded for the lexer buffer, according
1101 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1102 If it is not convenient to nul-terminate a string to be inserted, the
1103 L</lex_stuff_pvn> function is more appropriate.
1104
1105 =cut
1106 */
1107
1108 void
1109 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1110 {
1111     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1112     lex_stuff_pvn(pv, strlen(pv), flags);
1113 }
1114
1115 /*
1116 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1117
1118 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1119 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1120 reallocating the buffer if necessary.  This means that lexing code that
1121 runs later will see the characters as if they had appeared in the input.
1122 It is not recommended to do this as part of normal parsing, and most
1123 uses of this facility run the risk of the inserted characters being
1124 interpreted in an unintended manner.
1125
1126 The string to be inserted is the string value of C<sv>.  The characters
1127 are recoded for the lexer buffer, according to how the buffer is currently
1128 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1129 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1130 need to construct a scalar.
1131
1132 =cut
1133 */
1134
1135 void
1136 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1137 {
1138     char *pv;
1139     STRLEN len;
1140     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1141     if (flags)
1142         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1143     pv = SvPV(sv, len);
1144     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1145 }
1146
1147 /*
1148 =for apidoc Amx|void|lex_unstuff|char *ptr
1149
1150 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1151 C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
1152 This hides the discarded text from any lexing code that runs later,
1153 as if the text had never appeared.
1154
1155 This is not the normal way to consume lexed text.  For that, use
1156 L</lex_read_to>.
1157
1158 =cut
1159 */
1160
1161 void
1162 Perl_lex_unstuff(pTHX_ char *ptr)
1163 {
1164     char *buf, *bufend;
1165     STRLEN unstuff_len;
1166     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1167     buf = PL_parser->bufptr;
1168     if (ptr < buf)
1169         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1170     if (ptr == buf)
1171         return;
1172     bufend = PL_parser->bufend;
1173     if (ptr > bufend)
1174         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1175     unstuff_len = ptr - buf;
1176     Move(ptr, buf, bufend+1-ptr, char);
1177     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1178     PL_parser->bufend = bufend - unstuff_len;
1179 }
1180
1181 /*
1182 =for apidoc Amx|void|lex_read_to|char *ptr
1183
1184 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1185 to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1186 performing the correct bookkeeping whenever a newline character is passed.
1187 This is the normal way to consume lexed text.
1188
1189 Interpretation of the buffer's octets can be abstracted out by
1190 using the slightly higher-level functions L</lex_peek_unichar> and
1191 L</lex_read_unichar>.
1192
1193 =cut
1194 */
1195
1196 void
1197 Perl_lex_read_to(pTHX_ char *ptr)
1198 {
1199     char *s;
1200     PERL_ARGS_ASSERT_LEX_READ_TO;
1201     s = PL_parser->bufptr;
1202     if (ptr < s || ptr > PL_parser->bufend)
1203         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1204     for (; s != ptr; s++)
1205         if (*s == '\n') {
1206             COPLINE_INC_WITH_HERELINES;
1207             PL_parser->linestart = s+1;
1208         }
1209     PL_parser->bufptr = ptr;
1210 }
1211
1212 /*
1213 =for apidoc Amx|void|lex_discard_to|char *ptr
1214
1215 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1216 up to C<ptr>.  The remaining content of the buffer will be moved, and
1217 all pointers into the buffer updated appropriately.  C<ptr> must not
1218 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1219 it is not permitted to discard text that has yet to be lexed.
1220
1221 Normally it is not necessarily to do this directly, because it suffices to
1222 use the implicit discarding behaviour of L</lex_next_chunk> and things
1223 based on it.  However, if a token stretches across multiple lines,
1224 and the lexing code has kept multiple lines of text in the buffer for
1225 that purpose, then after completion of the token it would be wise to
1226 explicitly discard the now-unneeded earlier lines, to avoid future
1227 multi-line tokens growing the buffer without bound.
1228
1229 =cut
1230 */
1231
1232 void
1233 Perl_lex_discard_to(pTHX_ char *ptr)
1234 {
1235     char *buf;
1236     STRLEN discard_len;
1237     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1238     buf = SvPVX(PL_parser->linestr);
1239     if (ptr < buf)
1240         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1241     if (ptr == buf)
1242         return;
1243     if (ptr > PL_parser->bufptr)
1244         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1245     discard_len = ptr - buf;
1246     if (PL_parser->oldbufptr < ptr)
1247         PL_parser->oldbufptr = ptr;
1248     if (PL_parser->oldoldbufptr < ptr)
1249         PL_parser->oldoldbufptr = ptr;
1250     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1251         PL_parser->last_uni = NULL;
1252     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1253         PL_parser->last_lop = NULL;
1254     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1255     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1256     PL_parser->bufend -= discard_len;
1257     PL_parser->bufptr -= discard_len;
1258     PL_parser->oldbufptr -= discard_len;
1259     PL_parser->oldoldbufptr -= discard_len;
1260     if (PL_parser->last_uni)
1261         PL_parser->last_uni -= discard_len;
1262     if (PL_parser->last_lop)
1263         PL_parser->last_lop -= discard_len;
1264 }
1265
1266 void
1267 Perl_notify_parser_that_changed_to_utf8(pTHX)
1268 {
1269     /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1270      * off to on.  At compile time, this has the effect of entering a 'use
1271      * utf8' section.  This means that any input was not previously checked for
1272      * UTF-8 (because it was off), but now we do need to check it, or our
1273      * assumptions about the input being sane could be wrong, and we could
1274      * segfault.  This routine just sets a flag so that the next time we look
1275      * at the input we do the well-formed UTF-8 check.  If we aren't in the
1276      * proper phase, there may not be a parser object, but if there is, setting
1277      * the flag is harmless */
1278
1279     if (PL_parser) {
1280         PL_parser->recheck_utf8_validity = TRUE;
1281     }
1282 }
1283
1284 /*
1285 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1286
1287 Reads in the next chunk of text to be lexed, appending it to
1288 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1289 looked to the end of the current chunk and wants to know more.  It is
1290 usual, but not necessary, for lexing to have consumed the entirety of
1291 the current chunk at this time.
1292
1293 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1294 chunk (i.e., the current chunk has been entirely consumed), normally the
1295 current chunk will be discarded at the same time that the new chunk is
1296 read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1297 will not be discarded.  If the current chunk has not been entirely
1298 consumed, then it will not be discarded regardless of the flag.
1299
1300 Returns true if some new text was added to the buffer, or false if the
1301 buffer has reached the end of the input text.
1302
1303 =cut
1304 */
1305
1306 #define LEX_FAKE_EOF 0x80000000
1307 #define LEX_NO_TERM  0x40000000 /* here-doc */
1308
1309 bool
1310 Perl_lex_next_chunk(pTHX_ U32 flags)
1311 {
1312     SV *linestr;
1313     char *buf;
1314     STRLEN old_bufend_pos, new_bufend_pos;
1315     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1316     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1317     bool got_some_for_debugger = 0;
1318     bool got_some;
1319
1320     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1321         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1322     if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1323         return FALSE;
1324     linestr = PL_parser->linestr;
1325     buf = SvPVX(linestr);
1326     if (!(flags & LEX_KEEP_PREVIOUS)
1327           && PL_parser->bufptr == PL_parser->bufend)
1328     {
1329         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1330         linestart_pos = 0;
1331         if (PL_parser->last_uni != PL_parser->bufend)
1332             PL_parser->last_uni = NULL;
1333         if (PL_parser->last_lop != PL_parser->bufend)
1334             PL_parser->last_lop = NULL;
1335         last_uni_pos = last_lop_pos = 0;
1336         *buf = 0;
1337         SvCUR(linestr) = 0;
1338     } else {
1339         old_bufend_pos = PL_parser->bufend - buf;
1340         bufptr_pos = PL_parser->bufptr - buf;
1341         oldbufptr_pos = PL_parser->oldbufptr - buf;
1342         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1343         linestart_pos = PL_parser->linestart - buf;
1344         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1345         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1346     }
1347     if (flags & LEX_FAKE_EOF) {
1348         goto eof;
1349     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1350         got_some = 0;
1351     } else if (filter_gets(linestr, old_bufend_pos)) {
1352         got_some = 1;
1353         got_some_for_debugger = 1;
1354     } else if (flags & LEX_NO_TERM) {
1355         got_some = 0;
1356     } else {
1357         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1358             SvPVCLEAR(linestr);
1359         eof:
1360         /* End of real input.  Close filehandle (unless it was STDIN),
1361          * then add implicit termination.
1362          */
1363         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1364             PerlIO_clearerr(PL_parser->rsfp);
1365         else if (PL_parser->rsfp)
1366             (void)PerlIO_close(PL_parser->rsfp);
1367         PL_parser->rsfp = NULL;
1368         PL_parser->in_pod = PL_parser->filtered = 0;
1369         if (!PL_in_eval && PL_minus_p) {
1370             sv_catpvs(linestr,
1371                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1372             PL_minus_n = PL_minus_p = 0;
1373         } else if (!PL_in_eval && PL_minus_n) {
1374             sv_catpvs(linestr, /*{*/";}");
1375             PL_minus_n = 0;
1376         } else
1377             sv_catpvs(linestr, ";");
1378         got_some = 1;
1379     }
1380     buf = SvPVX(linestr);
1381     new_bufend_pos = SvCUR(linestr);
1382     PL_parser->bufend = buf + new_bufend_pos;
1383     PL_parser->bufptr = buf + bufptr_pos;
1384
1385     if (UTF) {
1386         const U8* first_bad_char_loc;
1387         if (UNLIKELY(! is_utf8_string_loc(
1388                             (U8 *) PL_parser->bufptr,
1389                                    PL_parser->bufend - PL_parser->bufptr,
1390                                    &first_bad_char_loc)))
1391         {
1392             _force_out_malformed_utf8_message(first_bad_char_loc,
1393                                               (U8 *) PL_parser->bufend,
1394                                               0,
1395                                               1 /* 1 means die */ );
1396             NOT_REACHED; /* NOTREACHED */
1397         }
1398     }
1399
1400     PL_parser->oldbufptr = buf + oldbufptr_pos;
1401     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1402     PL_parser->linestart = buf + linestart_pos;
1403     if (PL_parser->last_uni)
1404         PL_parser->last_uni = buf + last_uni_pos;
1405     if (PL_parser->last_lop)
1406         PL_parser->last_lop = buf + last_lop_pos;
1407     if (PL_parser->preambling != NOLINE) {
1408         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1409         PL_parser->preambling = NOLINE;
1410     }
1411     if (   got_some_for_debugger
1412         && PERLDB_LINE_OR_SAVESRC
1413         && PL_curstash != PL_debstash)
1414     {
1415         /* debugger active and we're not compiling the debugger code,
1416          * so store the line into the debugger's array of lines
1417          */
1418         update_debugger_info(NULL, buf+old_bufend_pos,
1419             new_bufend_pos-old_bufend_pos);
1420     }
1421     return got_some;
1422 }
1423
1424 /*
1425 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1426
1427 Looks ahead one (Unicode) character in the text currently being lexed.
1428 Returns the codepoint (unsigned integer value) of the next character,
1429 or -1 if lexing has reached the end of the input text.  To consume the
1430 peeked character, use L</lex_read_unichar>.
1431
1432 If the next character is in (or extends into) the next chunk of input
1433 text, the next chunk will be read in.  Normally the current chunk will be
1434 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1435 bit set, then the current chunk will not be discarded.
1436
1437 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1438 is encountered, an exception is generated.
1439
1440 =cut
1441 */
1442
1443 I32
1444 Perl_lex_peek_unichar(pTHX_ U32 flags)
1445 {
1446     dVAR;
1447     char *s, *bufend;
1448     if (flags & ~(LEX_KEEP_PREVIOUS))
1449         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1450     s = PL_parser->bufptr;
1451     bufend = PL_parser->bufend;
1452     if (UTF) {
1453         U8 head;
1454         I32 unichar;
1455         STRLEN len, retlen;
1456         if (s == bufend) {
1457             if (!lex_next_chunk(flags))
1458                 return -1;
1459             s = PL_parser->bufptr;
1460             bufend = PL_parser->bufend;
1461         }
1462         head = (U8)*s;
1463         if (UTF8_IS_INVARIANT(head))
1464             return head;
1465         if (UTF8_IS_START(head)) {
1466             len = UTF8SKIP(&head);
1467             while ((STRLEN)(bufend-s) < len) {
1468                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1469                     break;
1470                 s = PL_parser->bufptr;
1471                 bufend = PL_parser->bufend;
1472             }
1473         }
1474         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1475         if (retlen == (STRLEN)-1) {
1476             _force_out_malformed_utf8_message((U8 *) s,
1477                                               (U8 *) bufend,
1478                                               0,
1479                                               1 /* 1 means die */ );
1480             NOT_REACHED; /* NOTREACHED */
1481         }
1482         return unichar;
1483     } else {
1484         if (s == bufend) {
1485             if (!lex_next_chunk(flags))
1486                 return -1;
1487             s = PL_parser->bufptr;
1488         }
1489         return (U8)*s;
1490     }
1491 }
1492
1493 /*
1494 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1495
1496 Reads the next (Unicode) character in the text currently being lexed.
1497 Returns the codepoint (unsigned integer value) of the character read,
1498 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1499 if lexing has reached the end of the input text.  To non-destructively
1500 examine the next character, use L</lex_peek_unichar> instead.
1501
1502 If the next character is in (or extends into) the next chunk of input
1503 text, the next chunk will be read in.  Normally the current chunk will be
1504 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1505 bit set, then the current chunk will not be discarded.
1506
1507 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1508 is encountered, an exception is generated.
1509
1510 =cut
1511 */
1512
1513 I32
1514 Perl_lex_read_unichar(pTHX_ U32 flags)
1515 {
1516     I32 c;
1517     if (flags & ~(LEX_KEEP_PREVIOUS))
1518         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1519     c = lex_peek_unichar(flags);
1520     if (c != -1) {
1521         if (c == '\n')
1522             COPLINE_INC_WITH_HERELINES;
1523         if (UTF)
1524             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1525         else
1526             ++(PL_parser->bufptr);
1527     }
1528     return c;
1529 }
1530
1531 /*
1532 =for apidoc Amx|void|lex_read_space|U32 flags
1533
1534 Reads optional spaces, in Perl style, in the text currently being
1535 lexed.  The spaces may include ordinary whitespace characters and
1536 Perl-style comments.  C<#line> directives are processed if encountered.
1537 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1538 at a non-space character (or the end of the input text).
1539
1540 If spaces extend into the next chunk of input text, the next chunk will
1541 be read in.  Normally the current chunk will be discarded at the same
1542 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1543 chunk will not be discarded.
1544
1545 =cut
1546 */
1547
1548 #define LEX_NO_INCLINE    0x40000000
1549 #define LEX_NO_NEXT_CHUNK 0x80000000
1550
1551 void
1552 Perl_lex_read_space(pTHX_ U32 flags)
1553 {
1554     char *s, *bufend;
1555     const bool can_incline = !(flags & LEX_NO_INCLINE);
1556     bool need_incline = 0;
1557     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1558         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1559     s = PL_parser->bufptr;
1560     bufend = PL_parser->bufend;
1561     while (1) {
1562         char c = *s;
1563         if (c == '#') {
1564             do {
1565                 c = *++s;
1566             } while (!(c == '\n' || (c == 0 && s == bufend)));
1567         } else if (c == '\n') {
1568             s++;
1569             if (can_incline) {
1570                 PL_parser->linestart = s;
1571                 if (s == bufend)
1572                     need_incline = 1;
1573                 else
1574                     incline(s, bufend);
1575             }
1576         } else if (isSPACE(c)) {
1577             s++;
1578         } else if (c == 0 && s == bufend) {
1579             bool got_more;
1580             line_t l;
1581             if (flags & LEX_NO_NEXT_CHUNK)
1582                 break;
1583             PL_parser->bufptr = s;
1584             l = CopLINE(PL_curcop);
1585             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1586             got_more = lex_next_chunk(flags);
1587             CopLINE_set(PL_curcop, l);
1588             s = PL_parser->bufptr;
1589             bufend = PL_parser->bufend;
1590             if (!got_more)
1591                 break;
1592             if (can_incline && need_incline && PL_parser->rsfp) {
1593                 incline(s, bufend);
1594                 need_incline = 0;
1595             }
1596         } else if (!c) {
1597             s++;
1598         } else {
1599             break;
1600         }
1601     }
1602     PL_parser->bufptr = s;
1603 }
1604
1605 /*
1606
1607 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1608
1609 This function performs syntax checking on a prototype, C<proto>.
1610 If C<warn> is true, any illegal characters or mismatched brackets
1611 will trigger illegalproto warnings, declaring that they were
1612 detected in the prototype for C<name>.
1613
1614 The return value is C<true> if this is a valid prototype, and
1615 C<false> if it is not, regardless of whether C<warn> was C<true> or
1616 C<false>.
1617
1618 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1619
1620 =cut
1621
1622  */
1623
1624 bool
1625 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1626 {
1627     STRLEN len, origlen;
1628     char *p;
1629     bool bad_proto = FALSE;
1630     bool in_brackets = FALSE;
1631     bool after_slash = FALSE;
1632     char greedy_proto = ' ';
1633     bool proto_after_greedy_proto = FALSE;
1634     bool must_be_last = FALSE;
1635     bool underscore = FALSE;
1636     bool bad_proto_after_underscore = FALSE;
1637
1638     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1639
1640     if (!proto)
1641         return TRUE;
1642
1643     p = SvPV(proto, len);
1644     origlen = len;
1645     for (; len--; p++) {
1646         if (!isSPACE(*p)) {
1647             if (must_be_last)
1648                 proto_after_greedy_proto = TRUE;
1649             if (underscore) {
1650                 if (!strchr(";@%", *p))
1651                     bad_proto_after_underscore = TRUE;
1652                 underscore = FALSE;
1653             }
1654             if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1655                 bad_proto = TRUE;
1656             }
1657             else {
1658                 if (*p == '[')
1659                     in_brackets = TRUE;
1660                 else if (*p == ']')
1661                     in_brackets = FALSE;
1662                 else if ((*p == '@' || *p == '%')
1663                          && !after_slash
1664                          && !in_brackets )
1665                 {
1666                     must_be_last = TRUE;
1667                     greedy_proto = *p;
1668                 }
1669                 else if (*p == '_')
1670                     underscore = TRUE;
1671             }
1672             if (*p == '\\')
1673                 after_slash = TRUE;
1674             else
1675                 after_slash = FALSE;
1676         }
1677     }
1678
1679     if (warn) {
1680         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1681         p -= origlen;
1682         p = SvUTF8(proto)
1683             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1684                              origlen, UNI_DISPLAY_ISPRINT)
1685             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1686
1687         if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1688             SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1689             sv_catpvs(name2, "::");
1690             sv_catsv(name2, (SV *)name);
1691             name = name2;
1692         }
1693
1694         if (proto_after_greedy_proto)
1695             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1696                         "Prototype after '%c' for %" SVf " : %s",
1697                         greedy_proto, SVfARG(name), p);
1698         if (in_brackets)
1699             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1700                         "Missing ']' in prototype for %" SVf " : %s",
1701                         SVfARG(name), p);
1702         if (bad_proto)
1703             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1704                         "Illegal character in prototype for %" SVf " : %s",
1705                         SVfARG(name), p);
1706         if (bad_proto_after_underscore)
1707             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1708                         "Illegal character after '_' in prototype for %" SVf " : %s",
1709                         SVfARG(name), p);
1710     }
1711
1712     return (! (proto_after_greedy_proto || bad_proto) );
1713 }
1714
1715 /*
1716  * S_incline
1717  * This subroutine has nothing to do with tilting, whether at windmills
1718  * or pinball tables.  Its name is short for "increment line".  It
1719  * increments the current line number in CopLINE(PL_curcop) and checks
1720  * to see whether the line starts with a comment of the form
1721  *    # line 500 "foo.pm"
1722  * If so, it sets the current line number and file to the values in the comment.
1723  */
1724
1725 STATIC void
1726 S_incline(pTHX_ const char *s, const char *end)
1727 {
1728     const char *t;
1729     const char *n;
1730     const char *e;
1731     line_t line_num;
1732     UV uv;
1733
1734     PERL_ARGS_ASSERT_INCLINE;
1735
1736     assert(end >= s);
1737
1738     COPLINE_INC_WITH_HERELINES;
1739     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1740      && s+1 == PL_bufend && *s == ';') {
1741         /* fake newline in string eval */
1742         CopLINE_dec(PL_curcop);
1743         return;
1744     }
1745     if (*s++ != '#')
1746         return;
1747     while (SPACE_OR_TAB(*s))
1748         s++;
1749     if (memBEGINs(s, (STRLEN) (end - s), "line"))
1750         s += sizeof("line") - 1;
1751     else
1752         return;
1753     if (SPACE_OR_TAB(*s))
1754         s++;
1755     else
1756         return;
1757     while (SPACE_OR_TAB(*s))
1758         s++;
1759     if (!isDIGIT(*s))
1760         return;
1761
1762     n = s;
1763     while (isDIGIT(*s))
1764         s++;
1765     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1766         return;
1767     while (SPACE_OR_TAB(*s))
1768         s++;
1769     if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1770         s++;
1771         e = t + 1;
1772     }
1773     else {
1774         t = s;
1775         while (*t && !isSPACE(*t))
1776             t++;
1777         e = t;
1778     }
1779     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1780         e++;
1781     if (*e != '\n' && *e != '\0')
1782         return;         /* false alarm */
1783
1784     if (!grok_atoUV(n, &uv, &e))
1785         return;
1786     line_num = ((line_t)uv) - 1;
1787
1788     if (t - s > 0) {
1789         const STRLEN len = t - s;
1790
1791         if (!PL_rsfp && !PL_parser->filtered) {
1792             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1793              * to *{"::_<newfilename"} */
1794             /* However, the long form of evals is only turned on by the
1795                debugger - usually they're "(eval %lu)" */
1796             GV * const cfgv = CopFILEGV(PL_curcop);
1797             if (cfgv) {
1798                 char smallbuf[128];
1799                 STRLEN tmplen2 = len;
1800                 char *tmpbuf2;
1801                 GV *gv2;
1802
1803                 if (tmplen2 + 2 <= sizeof smallbuf)
1804                     tmpbuf2 = smallbuf;
1805                 else
1806                     Newx(tmpbuf2, tmplen2 + 2, char);
1807
1808                 tmpbuf2[0] = '_';
1809                 tmpbuf2[1] = '<';
1810
1811                 memcpy(tmpbuf2 + 2, s, tmplen2);
1812                 tmplen2 += 2;
1813
1814                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1815                 if (!isGV(gv2)) {
1816                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1817                     /* adjust ${"::_<newfilename"} to store the new file name */
1818                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1819                     /* The line number may differ. If that is the case,
1820                        alias the saved lines that are in the array.
1821                        Otherwise alias the whole array. */
1822                     if (CopLINE(PL_curcop) == line_num) {
1823                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1824                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1825                     }
1826                     else if (GvAV(cfgv)) {
1827                         AV * const av = GvAV(cfgv);
1828                         const line_t start = CopLINE(PL_curcop)+1;
1829                         SSize_t items = AvFILLp(av) - start;
1830                         if (items > 0) {
1831                             AV * const av2 = GvAVn(gv2);
1832                             SV **svp = AvARRAY(av) + start;
1833                             Size_t l = line_num+1;
1834                             while (items-- && l < SSize_t_MAX && l == (line_t)l)
1835                                 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1836                         }
1837                     }
1838                 }
1839
1840                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1841             }
1842         }
1843         CopFILE_free(PL_curcop);
1844         CopFILE_setn(PL_curcop, s, len);
1845     }
1846     CopLINE_set(PL_curcop, line_num);
1847 }
1848
1849 STATIC void
1850 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1851 {
1852     AV *av = CopFILEAVx(PL_curcop);
1853     if (av) {
1854         SV * sv;
1855         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1856         else {
1857             sv = *av_fetch(av, 0, 1);
1858             SvUPGRADE(sv, SVt_PVMG);
1859         }
1860         if (!SvPOK(sv)) SvPVCLEAR(sv);
1861         if (orig_sv)
1862             sv_catsv(sv, orig_sv);
1863         else
1864             sv_catpvn(sv, buf, len);
1865         if (!SvIOK(sv)) {
1866             (void)SvIOK_on(sv);
1867             SvIV_set(sv, 0);
1868         }
1869         if (PL_parser->preambling == NOLINE)
1870             av_store(av, CopLINE(PL_curcop), sv);
1871     }
1872 }
1873
1874 /*
1875  * skipspace
1876  * Called to gobble the appropriate amount and type of whitespace.
1877  * Skips comments as well.
1878  * Returns the next character after the whitespace that is skipped.
1879  *
1880  * peekspace
1881  * Same thing, but look ahead without incrementing line numbers or
1882  * adjusting PL_linestart.
1883  */
1884
1885 #define skipspace(s) skipspace_flags(s, 0)
1886 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1887
1888 STATIC char *
1889 S_skipspace_flags(pTHX_ char *s, U32 flags)
1890 {
1891     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1892     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1893         while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1894             s++;
1895     } else {
1896         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1897         PL_bufptr = s;
1898         lex_read_space(flags | LEX_KEEP_PREVIOUS |
1899                 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1900                     LEX_NO_NEXT_CHUNK : 0));
1901         s = PL_bufptr;
1902         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1903         if (PL_linestart > PL_bufptr)
1904             PL_bufptr = PL_linestart;
1905         return s;
1906     }
1907     return s;
1908 }
1909
1910 /*
1911  * S_check_uni
1912  * Check the unary operators to ensure there's no ambiguity in how they're
1913  * used.  An ambiguous piece of code would be:
1914  *     rand + 5
1915  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1916  * the +5 is its argument.
1917  */
1918
1919 STATIC void
1920 S_check_uni(pTHX)
1921 {
1922     const char *s;
1923
1924     if (PL_oldoldbufptr != PL_last_uni)
1925         return;
1926     while (isSPACE(*PL_last_uni))
1927         PL_last_uni++;
1928     s = PL_last_uni;
1929     while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
1930         s += UTF ? UTF8SKIP(s) : 1;
1931     if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
1932         return;
1933
1934     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1935                      "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
1936                      UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1937 }
1938
1939 /*
1940  * LOP : macro to build a list operator.  Its behaviour has been replaced
1941  * with a subroutine, S_lop() for which LOP is just another name.
1942  */
1943
1944 #define LOP(f,x) return lop(f,x,s)
1945
1946 /*
1947  * S_lop
1948  * Build a list operator (or something that might be one).  The rules:
1949  *  - if we have a next token, then it's a list operator (no parens) for
1950  *    which the next token has already been parsed; e.g.,
1951  *       sort foo @args
1952  *       sort foo (@args)
1953  *  - if the next thing is an opening paren, then it's a function
1954  *  - else it's a list operator
1955  */
1956
1957 STATIC I32
1958 S_lop(pTHX_ I32 f, U8 x, char *s)
1959 {
1960     PERL_ARGS_ASSERT_LOP;
1961
1962     pl_yylval.ival = f;
1963     CLINE;
1964     PL_bufptr = s;
1965     PL_last_lop = PL_oldbufptr;
1966     PL_last_lop_op = (OPCODE)f;
1967     if (PL_nexttoke)
1968         goto lstop;
1969     PL_expect = x;
1970     if (*s == '(')
1971         return REPORT(FUNC);
1972     s = skipspace(s);
1973     if (*s == '(')
1974         return REPORT(FUNC);
1975     else {
1976         lstop:
1977         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1978             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1979         return REPORT(LSTOP);
1980     }
1981 }
1982
1983 /*
1984  * S_force_next
1985  * When the lexer realizes it knows the next token (for instance,
1986  * it is reordering tokens for the parser) then it can call S_force_next
1987  * to know what token to return the next time the lexer is called.  Caller
1988  * will need to set PL_nextval[] and possibly PL_expect to ensure
1989  * the lexer handles the token correctly.
1990  */
1991
1992 STATIC void
1993 S_force_next(pTHX_ I32 type)
1994 {
1995 #ifdef DEBUGGING
1996     if (DEBUG_T_TEST) {
1997         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1998         tokereport(type, &NEXTVAL_NEXTTOKE);
1999     }
2000 #endif
2001     assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2002     PL_nexttype[PL_nexttoke] = type;
2003     PL_nexttoke++;
2004 }
2005
2006 /*
2007  * S_postderef
2008  *
2009  * This subroutine handles postfix deref syntax after the arrow has already
2010  * been emitted.  @* $* etc. are emitted as two separate tokens right here.
2011  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2012  * only the first, leaving yylex to find the next.
2013  */
2014
2015 static int
2016 S_postderef(pTHX_ int const funny, char const next)
2017 {
2018     assert(funny == DOLSHARP || strchr("$@%&*", funny));
2019     if (next == '*') {
2020         PL_expect = XOPERATOR;
2021         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2022             assert('@' == funny || '$' == funny || DOLSHARP == funny);
2023             PL_lex_state = LEX_INTERPEND;
2024             if ('@' == funny)
2025                 force_next(POSTJOIN);
2026         }
2027         force_next(next);
2028         PL_bufptr+=2;
2029     }
2030     else {
2031         if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2032          && !PL_lex_brackets)
2033             PL_lex_dojoin = 2;
2034         PL_expect = XOPERATOR;
2035         PL_bufptr++;
2036     }
2037     return funny;
2038 }
2039
2040 void
2041 Perl_yyunlex(pTHX)
2042 {
2043     int yyc = PL_parser->yychar;
2044     if (yyc != YYEMPTY) {
2045         if (yyc) {
2046             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2047             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2048                 PL_lex_allbrackets--;
2049                 PL_lex_brackets--;
2050                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2051             } else if (yyc == '('/*)*/) {
2052                 PL_lex_allbrackets--;
2053                 yyc |= (2<<24);
2054             }
2055             force_next(yyc);
2056         }
2057         PL_parser->yychar = YYEMPTY;
2058     }
2059 }
2060
2061 STATIC SV *
2062 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2063 {
2064     SV * const sv = newSVpvn_utf8(start, len,
2065                     ! IN_BYTES
2066                   &&  UTF
2067                   &&  len != 0
2068                   &&  is_utf8_non_invariant_string((const U8*)start, len));
2069     return sv;
2070 }
2071
2072 /*
2073  * S_force_word
2074  * When the lexer knows the next thing is a word (for instance, it has
2075  * just seen -> and it knows that the next char is a word char, then
2076  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2077  * lookahead.
2078  *
2079  * Arguments:
2080  *   char *start : buffer position (must be within PL_linestr)
2081  *   int token   : PL_next* will be this type of bare word
2082  *                 (e.g., METHOD,BAREWORD)
2083  *   int check_keyword : if true, Perl checks to make sure the word isn't
2084  *       a keyword (do this if the word is a label, e.g. goto FOO)
2085  *   int allow_pack : if true, : characters will also be allowed (require,
2086  *       use, etc. do this)
2087  */
2088
2089 STATIC char *
2090 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2091 {
2092     char *s;
2093     STRLEN len;
2094
2095     PERL_ARGS_ASSERT_FORCE_WORD;
2096
2097     start = skipspace(start);
2098     s = start;
2099     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2100         || (allow_pack && *s == ':' && s[1] == ':') )
2101     {
2102         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2103         if (check_keyword) {
2104           char *s2 = PL_tokenbuf;
2105           STRLEN len2 = len;
2106           if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2107             s2 += sizeof("CORE::") - 1;
2108             len2 -= sizeof("CORE::") - 1;
2109           }
2110           if (keyword(s2, len2, 0))
2111             return start;
2112         }
2113         if (token == METHOD) {
2114             s = skipspace(s);
2115             if (*s == '(')
2116                 PL_expect = XTERM;
2117             else {
2118                 PL_expect = XOPERATOR;
2119             }
2120         }
2121         NEXTVAL_NEXTTOKE.opval
2122             = newSVOP(OP_CONST,0,
2123                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2124         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2125         force_next(token);
2126     }
2127     return s;
2128 }
2129
2130 /*
2131  * S_force_ident
2132  * Called when the lexer wants $foo *foo &foo etc, but the program
2133  * text only contains the "foo" portion.  The first argument is a pointer
2134  * to the "foo", and the second argument is the type symbol to prefix.
2135  * Forces the next token to be a "BAREWORD".
2136  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2137  */
2138
2139 STATIC void
2140 S_force_ident(pTHX_ const char *s, int kind)
2141 {
2142     PERL_ARGS_ASSERT_FORCE_IDENT;
2143
2144     if (s[0]) {
2145         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2146         OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2147                                                                 UTF ? SVf_UTF8 : 0));
2148         NEXTVAL_NEXTTOKE.opval = o;
2149         force_next(BAREWORD);
2150         if (kind) {
2151             o->op_private = OPpCONST_ENTERED;
2152             /* XXX see note in pp_entereval() for why we forgo typo
2153                warnings if the symbol must be introduced in an eval.
2154                GSAR 96-10-12 */
2155             gv_fetchpvn_flags(s, len,
2156                               (PL_in_eval ? GV_ADDMULTI
2157                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2158                               kind == '$' ? SVt_PV :
2159                               kind == '@' ? SVt_PVAV :
2160                               kind == '%' ? SVt_PVHV :
2161                               SVt_PVGV
2162                               );
2163         }
2164     }
2165 }
2166
2167 static void
2168 S_force_ident_maybe_lex(pTHX_ char pit)
2169 {
2170     NEXTVAL_NEXTTOKE.ival = pit;
2171     force_next('p');
2172 }
2173
2174 NV
2175 Perl_str_to_version(pTHX_ SV *sv)
2176 {
2177     NV retval = 0.0;
2178     NV nshift = 1.0;
2179     STRLEN len;
2180     const char *start = SvPV_const(sv,len);
2181     const char * const end = start + len;
2182     const bool utf = cBOOL(SvUTF8(sv));
2183
2184     PERL_ARGS_ASSERT_STR_TO_VERSION;
2185
2186     while (start < end) {
2187         STRLEN skip;
2188         UV n;
2189         if (utf)
2190             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2191         else {
2192             n = *(U8*)start;
2193             skip = 1;
2194         }
2195         retval += ((NV)n)/nshift;
2196         start += skip;
2197         nshift *= 1000;
2198     }
2199     return retval;
2200 }
2201
2202 /*
2203  * S_force_version
2204  * Forces the next token to be a version number.
2205  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2206  * and if "guessing" is TRUE, then no new token is created (and the caller
2207  * must use an alternative parsing method).
2208  */
2209
2210 STATIC char *
2211 S_force_version(pTHX_ char *s, int guessing)
2212 {
2213     OP *version = NULL;
2214     char *d;
2215
2216     PERL_ARGS_ASSERT_FORCE_VERSION;
2217
2218     s = skipspace(s);
2219
2220     d = s;
2221     if (*d == 'v')
2222         d++;
2223     if (isDIGIT(*d)) {
2224         while (isDIGIT(*d) || *d == '_' || *d == '.')
2225             d++;
2226         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2227             SV *ver;
2228             s = scan_num(s, &pl_yylval);
2229             version = pl_yylval.opval;
2230             ver = cSVOPx(version)->op_sv;
2231             if (SvPOK(ver) && !SvNIOK(ver)) {
2232                 SvUPGRADE(ver, SVt_PVNV);
2233                 SvNV_set(ver, str_to_version(ver));
2234                 SvNOK_on(ver);          /* hint that it is a version */
2235             }
2236         }
2237         else if (guessing) {
2238             return s;
2239         }
2240     }
2241
2242     /* NOTE: The parser sees the package name and the VERSION swapped */
2243     NEXTVAL_NEXTTOKE.opval = version;
2244     force_next(BAREWORD);
2245
2246     return s;
2247 }
2248
2249 /*
2250  * S_force_strict_version
2251  * Forces the next token to be a version number using strict syntax rules.
2252  */
2253
2254 STATIC char *
2255 S_force_strict_version(pTHX_ char *s)
2256 {
2257     OP *version = NULL;
2258     const char *errstr = NULL;
2259
2260     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2261
2262     while (isSPACE(*s)) /* leading whitespace */
2263         s++;
2264
2265     if (is_STRICT_VERSION(s,&errstr)) {
2266         SV *ver = newSV(0);
2267         s = (char *)scan_version(s, ver, 0);
2268         version = newSVOP(OP_CONST, 0, ver);
2269     }
2270     else if ((*s != ';' && *s != '{' && *s != '}' )
2271              && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2272     {
2273         PL_bufptr = s;
2274         if (errstr)
2275             yyerror(errstr); /* version required */
2276         return s;
2277     }
2278
2279     /* NOTE: The parser sees the package name and the VERSION swapped */
2280     NEXTVAL_NEXTTOKE.opval = version;
2281     force_next(BAREWORD);
2282
2283     return s;
2284 }
2285
2286 /*
2287  * S_tokeq
2288  * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2289  * modified as necessary.  However, if HINT_NEW_STRING is on, 'sv' is
2290  * unchanged, and a new SV containing the modified input is returned.
2291  */
2292
2293 STATIC SV *
2294 S_tokeq(pTHX_ SV *sv)
2295 {
2296     char *s;
2297     char *send;
2298     char *d;
2299     SV *pv = sv;
2300
2301     PERL_ARGS_ASSERT_TOKEQ;
2302
2303     assert (SvPOK(sv));
2304     assert (SvLEN(sv));
2305     assert (!SvIsCOW(sv));
2306     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2307         goto finish;
2308     s = SvPVX(sv);
2309     send = SvEND(sv);
2310     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2311     while (s < send && !(*s == '\\' && s[1] == '\\'))
2312         s++;
2313     if (s == send)
2314         goto finish;
2315     d = s;
2316     if ( PL_hints & HINT_NEW_STRING ) {
2317         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2318                             SVs_TEMP | SvUTF8(sv));
2319     }
2320     while (s < send) {
2321         if (*s == '\\') {
2322             if (s + 1 < send && (s[1] == '\\'))
2323                 s++;            /* all that, just for this */
2324         }
2325         *d++ = *s++;
2326     }
2327     *d = '\0';
2328     SvCUR_set(sv, d - SvPVX_const(sv));
2329   finish:
2330     if ( PL_hints & HINT_NEW_STRING )
2331        return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2332     return sv;
2333 }
2334
2335 /*
2336  * Now come three functions related to double-quote context,
2337  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2338  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2339  * interact with PL_lex_state, and create fake ( ... ) argument lists
2340  * to handle functions and concatenation.
2341  * For example,
2342  *   "foo\lbar"
2343  * is tokenised as
2344  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2345  */
2346
2347 /*
2348  * S_sublex_start
2349  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2350  *
2351  * Pattern matching will set PL_lex_op to the pattern-matching op to
2352  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2353  *
2354  * OP_CONST is easy--just make the new op and return.
2355  *
2356  * Everything else becomes a FUNC.
2357  *
2358  * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2359  * had an OP_CONST.  This just sets us up for a
2360  * call to S_sublex_push().
2361  */
2362
2363 STATIC I32
2364 S_sublex_start(pTHX)
2365 {
2366     const I32 op_type = pl_yylval.ival;
2367
2368     if (op_type == OP_NULL) {
2369         pl_yylval.opval = PL_lex_op;
2370         PL_lex_op = NULL;
2371         return THING;
2372     }
2373     if (op_type == OP_CONST) {
2374         SV *sv = PL_lex_stuff;
2375         PL_lex_stuff = NULL;
2376         sv = tokeq(sv);
2377
2378         if (SvTYPE(sv) == SVt_PVIV) {
2379             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2380             STRLEN len;
2381             const char * const p = SvPV_const(sv, len);
2382             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2383             SvREFCNT_dec(sv);
2384             sv = nsv;
2385         }
2386         pl_yylval.opval = newSVOP(op_type, 0, sv);
2387         return THING;
2388     }
2389
2390     PL_parser->lex_super_state = PL_lex_state;
2391     PL_parser->lex_sub_inwhat = (U16)op_type;
2392     PL_parser->lex_sub_op = PL_lex_op;
2393     PL_parser->sub_no_recover = FALSE;
2394     PL_parser->sub_error_count = PL_error_count;
2395     PL_lex_state = LEX_INTERPPUSH;
2396
2397     PL_expect = XTERM;
2398     if (PL_lex_op) {
2399         pl_yylval.opval = PL_lex_op;
2400         PL_lex_op = NULL;
2401         return PMFUNC;
2402     }
2403     else
2404         return FUNC;
2405 }
2406
2407 /*
2408  * S_sublex_push
2409  * Create a new scope to save the lexing state.  The scope will be
2410  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2411  * to the uc, lc, etc. found before.
2412  * Sets PL_lex_state to LEX_INTERPCONCAT.
2413  */
2414
2415 STATIC I32
2416 S_sublex_push(pTHX)
2417 {
2418     LEXSHARED *shared;
2419     const bool is_heredoc = PL_multi_close == '<';
2420     ENTER;
2421
2422     PL_lex_state = PL_parser->lex_super_state;
2423     SAVEI8(PL_lex_dojoin);
2424     SAVEI32(PL_lex_brackets);
2425     SAVEI32(PL_lex_allbrackets);
2426     SAVEI32(PL_lex_formbrack);
2427     SAVEI8(PL_lex_fakeeof);
2428     SAVEI32(PL_lex_casemods);
2429     SAVEI32(PL_lex_starts);
2430     SAVEI8(PL_lex_state);
2431     SAVESPTR(PL_lex_repl);
2432     SAVEVPTR(PL_lex_inpat);
2433     SAVEI16(PL_lex_inwhat);
2434     if (is_heredoc)
2435     {
2436         SAVECOPLINE(PL_curcop);
2437         SAVEI32(PL_multi_end);
2438         SAVEI32(PL_parser->herelines);
2439         PL_parser->herelines = 0;
2440     }
2441     SAVEIV(PL_multi_close);
2442     SAVEPPTR(PL_bufptr);
2443     SAVEPPTR(PL_bufend);
2444     SAVEPPTR(PL_oldbufptr);
2445     SAVEPPTR(PL_oldoldbufptr);
2446     SAVEPPTR(PL_last_lop);
2447     SAVEPPTR(PL_last_uni);
2448     SAVEPPTR(PL_linestart);
2449     SAVESPTR(PL_linestr);
2450     SAVEGENERICPV(PL_lex_brackstack);
2451     SAVEGENERICPV(PL_lex_casestack);
2452     SAVEGENERICPV(PL_parser->lex_shared);
2453     SAVEBOOL(PL_parser->lex_re_reparsing);
2454     SAVEI32(PL_copline);
2455
2456     /* The here-doc parser needs to be able to peek into outer lexing
2457        scopes to find the body of the here-doc.  So we put PL_linestr and
2458        PL_bufptr into lex_shared, to ‘share’ those values.
2459      */
2460     PL_parser->lex_shared->ls_linestr = PL_linestr;
2461     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2462
2463     PL_linestr = PL_lex_stuff;
2464     PL_lex_repl = PL_parser->lex_sub_repl;
2465     PL_lex_stuff = NULL;
2466     PL_parser->lex_sub_repl = NULL;
2467
2468     /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2469        set for an inner quote-like operator and then an error causes scope-
2470        popping.  We must not have a PL_lex_stuff value left dangling, as
2471        that breaks assumptions elsewhere.  See bug #123617.  */
2472     SAVEGENERICSV(PL_lex_stuff);
2473     SAVEGENERICSV(PL_parser->lex_sub_repl);
2474
2475     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2476         = SvPVX(PL_linestr);
2477     PL_bufend += SvCUR(PL_linestr);
2478     PL_last_lop = PL_last_uni = NULL;
2479     SAVEFREESV(PL_linestr);
2480     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2481
2482     PL_lex_dojoin = FALSE;
2483     PL_lex_brackets = PL_lex_formbrack = 0;
2484     PL_lex_allbrackets = 0;
2485     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2486     Newx(PL_lex_brackstack, 120, char);
2487     Newx(PL_lex_casestack, 12, char);
2488     PL_lex_casemods = 0;
2489     *PL_lex_casestack = '\0';
2490     PL_lex_starts = 0;
2491     PL_lex_state = LEX_INTERPCONCAT;
2492     if (is_heredoc)
2493         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2494     PL_copline = NOLINE;
2495
2496     Newxz(shared, 1, LEXSHARED);
2497     shared->ls_prev = PL_parser->lex_shared;
2498     PL_parser->lex_shared = shared;
2499
2500     PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2501     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2502     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2503         PL_lex_inpat = PL_parser->lex_sub_op;
2504     else
2505         PL_lex_inpat = NULL;
2506
2507     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2508     PL_in_eval &= ~EVAL_RE_REPARSING;
2509
2510     return '(';
2511 }
2512
2513 /*
2514  * S_sublex_done
2515  * Restores lexer state after a S_sublex_push.
2516  */
2517
2518 STATIC I32
2519 S_sublex_done(pTHX)
2520 {
2521     if (!PL_lex_starts++) {
2522         SV * const sv = newSVpvs("");
2523         if (SvUTF8(PL_linestr))
2524             SvUTF8_on(sv);
2525         PL_expect = XOPERATOR;
2526         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2527         return THING;
2528     }
2529
2530     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2531         PL_lex_state = LEX_INTERPCASEMOD;
2532         return yylex();
2533     }
2534
2535     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2536     assert(PL_lex_inwhat != OP_TRANSR);
2537     if (PL_lex_repl) {
2538         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2539         PL_linestr = PL_lex_repl;
2540         PL_lex_inpat = 0;
2541         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2542         PL_bufend += SvCUR(PL_linestr);
2543         PL_last_lop = PL_last_uni = NULL;
2544         PL_lex_dojoin = FALSE;
2545         PL_lex_brackets = 0;
2546         PL_lex_allbrackets = 0;
2547         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2548         PL_lex_casemods = 0;
2549         *PL_lex_casestack = '\0';
2550         PL_lex_starts = 0;
2551         if (SvEVALED(PL_lex_repl)) {
2552             PL_lex_state = LEX_INTERPNORMAL;
2553             PL_lex_starts++;
2554             /*  we don't clear PL_lex_repl here, so that we can check later
2555                 whether this is an evalled subst; that means we rely on the
2556                 logic to ensure sublex_done() is called again only via the
2557                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2558         }
2559         else {
2560             PL_lex_state = LEX_INTERPCONCAT;
2561             PL_lex_repl = NULL;
2562         }
2563         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2564             CopLINE(PL_curcop) +=
2565                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2566                  + PL_parser->herelines;
2567             PL_parser->herelines = 0;
2568         }
2569         return '/';
2570     }
2571     else {
2572         const line_t l = CopLINE(PL_curcop);
2573         LEAVE;
2574         if (PL_parser->sub_error_count != PL_error_count) {
2575             if (PL_parser->sub_no_recover) {
2576                 yyquit();
2577                 NOT_REACHED;
2578             }
2579         }
2580         if (PL_multi_close == '<')
2581             PL_parser->herelines += l - PL_multi_end;
2582         PL_bufend = SvPVX(PL_linestr);
2583         PL_bufend += SvCUR(PL_linestr);
2584         PL_expect = XOPERATOR;
2585         return ')';
2586     }
2587 }
2588
2589 STATIC SV*
2590 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2591 {
2592     /* This justs wraps get_and_check_backslash_N_name() to output any error
2593      * message it returns. */
2594
2595     const char * error_msg = NULL;
2596     SV * result;
2597
2598     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2599
2600     /* charnames doesn't work well if there have been errors found */
2601     if (PL_error_count > 0) {
2602         return NULL;
2603     }
2604
2605     result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2606
2607     if (error_msg) {
2608         yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2609     }
2610
2611     return result;
2612 }
2613
2614 SV*
2615 Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2616                                           const char* const e,
2617                                           const bool is_utf8,
2618                                           const char ** error_msg)
2619 {
2620     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2621      * interior, hence to the "}".  Finds what the name resolves to, returning
2622      * an SV* containing it; NULL if no valid one found.
2623      *
2624      * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2625      * doesn't have to be. */
2626
2627     SV* res;
2628     HV * table;
2629     SV **cvp;
2630     SV *cv;
2631     SV *rv;
2632     HV *stash;
2633     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2634     dVAR;
2635
2636     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2637
2638     assert(e >= s);
2639     assert(s > (char *) 3);
2640
2641     res = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2642
2643     if (!SvCUR(res)) {
2644         SvREFCNT_dec_NN(res);
2645         /* diag_listed_as: Unknown charname '%s' */
2646         *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2647         return NULL;
2648     }
2649
2650     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2651                         /* include the <}> */
2652                         e - backslash_ptr + 1, error_msg);
2653     if (! SvPOK(res)) {
2654         SvREFCNT_dec_NN(res);
2655         return NULL;
2656     }
2657
2658     /* See if the charnames handler is the Perl core's, and if so, we can skip
2659      * the validation needed for a user-supplied one, as Perl's does its own
2660      * validation. */
2661     table = GvHV(PL_hintgv);             /* ^H */
2662     cvp = hv_fetchs(table, "charnames", FALSE);
2663     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2664         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2665     {
2666         const char * const name = HvNAME(stash);
2667          if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2668            return res;
2669        }
2670     }
2671
2672     /* Here, it isn't Perl's charname handler.  We can't rely on a
2673      * user-supplied handler to validate the input name.  For non-ut8 input,
2674      * look to see that the first character is legal.  Then loop through the
2675      * rest checking that each is a continuation */
2676
2677     /* This code makes the reasonable assumption that the only Latin1-range
2678      * characters that begin a character name alias are alphabetic, otherwise
2679      * would have to create a isCHARNAME_BEGIN macro */
2680
2681     if (! is_utf8) {
2682         if (! isALPHAU(*s)) {
2683             goto bad_charname;
2684         }
2685         s++;
2686         while (s < e) {
2687             if (! isCHARNAME_CONT(*s)) {
2688                 goto bad_charname;
2689             }
2690             if (*s == ' ' && *(s-1) == ' ') {
2691                 goto multi_spaces;
2692             }
2693             s++;
2694         }
2695     }
2696     else {
2697         /* Similarly for utf8.  For invariants can check directly; for other
2698          * Latin1, can calculate their code point and check; otherwise  use a
2699          * swash */
2700         if (UTF8_IS_INVARIANT(*s)) {
2701             if (! isALPHAU(*s)) {
2702                 goto bad_charname;
2703             }
2704             s++;
2705         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2706             if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2707                 goto bad_charname;
2708             }
2709             s += 2;
2710         }
2711         else {
2712             if (! _invlist_contains_cp(PL_utf8_charname_begin,
2713                                        utf8_to_uvchr_buf((U8 *) s,
2714                                                          (U8 *) e,
2715                                                          NULL)))
2716             {
2717                 goto bad_charname;
2718             }
2719             s += UTF8SKIP(s);
2720         }
2721
2722         while (s < e) {
2723             if (UTF8_IS_INVARIANT(*s)) {
2724                 if (! isCHARNAME_CONT(*s)) {
2725                     goto bad_charname;
2726                 }
2727                 if (*s == ' ' && *(s-1) == ' ') {
2728                     goto multi_spaces;
2729                 }
2730                 s++;
2731             }
2732             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2733                 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2734                 {
2735                     goto bad_charname;
2736                 }
2737                 s += 2;
2738             }
2739             else {
2740                 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2741                                            utf8_to_uvchr_buf((U8 *) s,
2742                                                              (U8 *) e,
2743                                                              NULL)))
2744                 {
2745                     goto bad_charname;
2746                 }
2747                 s += UTF8SKIP(s);
2748             }
2749         }
2750     }
2751     if (*(s-1) == ' ') {
2752         /* diag_listed_as: charnames alias definitions may not contain
2753                            trailing white-space; marked by <-- HERE in %s
2754          */
2755         *error_msg = Perl_form(aTHX_
2756             "charnames alias definitions may not contain trailing "
2757             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2758             (int)(s - backslash_ptr + 1), backslash_ptr,
2759             (int)(e - s + 1), s + 1);
2760         return NULL;
2761     }
2762
2763     if (SvUTF8(res)) { /* Don't accept malformed charname value */
2764         const U8* first_bad_char_loc;
2765         STRLEN len;
2766         const char* const str = SvPV_const(res, len);
2767         if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2768                                           &first_bad_char_loc)))
2769         {
2770             _force_out_malformed_utf8_message(first_bad_char_loc,
2771                                               (U8 *) PL_parser->bufend,
2772                                               0,
2773                                               0 /* 0 means don't die */ );
2774             /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
2775                                immediately after '%s' */
2776             *error_msg = Perl_form(aTHX_
2777                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2778                  (int) (e - backslash_ptr + 1), backslash_ptr,
2779                  (int) ((char *) first_bad_char_loc - str), str);
2780             return NULL;
2781         }
2782     }
2783
2784     return res;
2785
2786   bad_charname: {
2787
2788         /* The final %.*s makes sure that should the trailing NUL be missing
2789          * that this print won't run off the end of the string */
2790         /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
2791                            in \N{%s} */
2792         *error_msg = Perl_form(aTHX_
2793             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2794             (int)(s - backslash_ptr + 1), backslash_ptr,
2795             (int)(e - s + 1), s + 1);
2796         return NULL;
2797     }
2798
2799   multi_spaces:
2800         /* diag_listed_as: charnames alias definitions may not contain a
2801                            sequence of multiple spaces; marked by <-- HERE
2802                            in %s */
2803         *error_msg = Perl_form(aTHX_
2804             "charnames alias definitions may not contain a sequence of "
2805             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2806             (int)(s - backslash_ptr + 1), backslash_ptr,
2807             (int)(e - s + 1), s + 1);
2808         return NULL;
2809 }
2810
2811 /*
2812   scan_const
2813
2814   Extracts the next constant part of a pattern, double-quoted string,
2815   or transliteration.  This is terrifying code.
2816
2817   For example, in parsing the double-quoted string "ab\x63$d", it would
2818   stop at the '$' and return an OP_CONST containing 'abc'.
2819
2820   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2821   processing a pattern (PL_lex_inpat is true), a transliteration
2822   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2823
2824   Returns a pointer to the character scanned up to. If this is
2825   advanced from the start pointer supplied (i.e. if anything was
2826   successfully parsed), will leave an OP_CONST for the substring scanned
2827   in pl_yylval. Caller must intuit reason for not parsing further
2828   by looking at the next characters herself.
2829
2830   In patterns:
2831     expand:
2832       \N{FOO}  => \N{U+hex_for_character_FOO}
2833       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2834
2835     pass through:
2836         all other \-char, including \N and \N{ apart from \N{ABC}
2837
2838     stops on:
2839         @ and $ where it appears to be a var, but not for $ as tail anchor
2840         \l \L \u \U \Q \E
2841         (?{  or  (??{
2842
2843   In transliterations:
2844     characters are VERY literal, except for - not at the start or end
2845     of the string, which indicates a range.  However some backslash sequences
2846     are recognized: \r, \n, and the like
2847                     \007 \o{}, \x{}, \N{}
2848     If all elements in the transliteration are below 256,
2849     scan_const expands the range to the full set of intermediate
2850     characters. If the range is in utf8, the hyphen is replaced with
2851     a certain range mark which will be handled by pmtrans() in op.c.
2852
2853   In double-quoted strings:
2854     backslashes:
2855       all those recognized in transliterations
2856       deprecated backrefs: \1 (in substitution replacements)
2857       case and quoting: \U \Q \E
2858     stops on @ and $
2859
2860   scan_const does *not* construct ops to handle interpolated strings.
2861   It stops processing as soon as it finds an embedded $ or @ variable
2862   and leaves it to the caller to work out what's going on.
2863
2864   embedded arrays (whether in pattern or not) could be:
2865       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2866
2867   $ in double-quoted strings must be the symbol of an embedded scalar.
2868
2869   $ in pattern could be $foo or could be tail anchor.  Assumption:
2870   it's a tail anchor if $ is the last thing in the string, or if it's
2871   followed by one of "()| \r\n\t"
2872
2873   \1 (backreferences) are turned into $1 in substitutions
2874
2875   The structure of the code is
2876       while (there's a character to process) {
2877           handle transliteration ranges
2878           skip regexp comments /(?#comment)/ and codes /(?{code})/
2879           skip #-initiated comments in //x patterns
2880           check for embedded arrays
2881           check for embedded scalars
2882           if (backslash) {
2883               deprecate \1 in substitution replacements
2884               handle string-changing backslashes \l \U \Q \E, etc.
2885               switch (what was escaped) {
2886                   handle \- in a transliteration (becomes a literal -)
2887                   if a pattern and not \N{, go treat as regular character
2888                   handle \132 (octal characters)
2889                   handle \x15 and \x{1234} (hex characters)
2890                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2891                   handle \cV (control characters)
2892                   handle printf-style backslashes (\f, \r, \n, etc)
2893               } (end switch)
2894               continue
2895           } (end if backslash)
2896           handle regular character
2897     } (end while character to read)
2898
2899 */
2900
2901 STATIC char *
2902 S_scan_const(pTHX_ char *start)
2903 {
2904     char *send = PL_bufend;             /* end of the constant */
2905     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
2906                                            on sizing. */
2907     char *s = start;                    /* start of the constant */
2908     char *d = SvPVX(sv);                /* destination for copies */
2909     bool dorange = FALSE;               /* are we in a translit range? */
2910     bool didrange = FALSE;              /* did we just finish a range? */
2911     bool in_charclass = FALSE;          /* within /[...]/ */
2912     bool d_is_utf8 = FALSE;             /* Output constant is UTF8 */
2913     bool s_is_utf8 = cBOOL(UTF);        /* Is the source string assumed to be
2914                                            UTF8?  But, this can show as true
2915                                            when the source isn't utf8, as for
2916                                            example when it is entirely composed
2917                                            of hex constants */
2918     STRLEN utf8_variant_count = 0;      /* When not in UTF-8, this counts the
2919                                            number of characters found so far
2920                                            that will expand (into 2 bytes)
2921                                            should we have to convert to
2922                                            UTF-8) */
2923     SV *res;                            /* result from charnames */
2924     STRLEN offset_to_max = 0;   /* The offset in the output to where the range
2925                                    high-end character is temporarily placed */
2926
2927     /* Does something require special handling in tr/// ?  This avoids extra
2928      * work in a less likely case.  As such, khw didn't feel it was worth
2929      * adding any branches to the more mainline code to handle this, which
2930      * means that this doesn't get set in some circumstances when things like
2931      * \x{100} get expanded out.  As a result there needs to be extra testing
2932      * done in the tr code */
2933     bool has_above_latin1 = FALSE;
2934
2935     /* Note on sizing:  The scanned constant is placed into sv, which is
2936      * initialized by newSV() assuming one byte of output for every byte of
2937      * input.  This routine expects newSV() to allocate an extra byte for a
2938      * trailing NUL, which this routine will append if it gets to the end of
2939      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2940      * CAPITAL LETTER A}), or more output than input if the constant ends up
2941      * recoded to utf8, but each time a construct is found that might increase
2942      * the needed size, SvGROW() is called.  Its size parameter each time is
2943      * based on the best guess estimate at the time, namely the length used so
2944      * far, plus the length the current construct will occupy, plus room for
2945      * the trailing NUL, plus one byte for every input byte still unscanned */
2946
2947     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2948                        before set */
2949 #ifdef EBCDIC
2950     int backslash_N = 0;            /* ? was the character from \N{} */
2951     int non_portable_endpoint = 0;  /* ? In a range is an endpoint
2952                                        platform-specific like \x65 */
2953 #endif
2954
2955     PERL_ARGS_ASSERT_SCAN_CONST;
2956
2957     assert(PL_lex_inwhat != OP_TRANSR);
2958     if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
2959         /* If we are doing a trans and we know we want UTF8 set expectation */
2960         d_is_utf8  = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2961         s_is_utf8  = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2962     }
2963
2964     /* Protect sv from errors and fatal warnings. */
2965     ENTER_with_name("scan_const");
2966     SAVEFREESV(sv);
2967
2968     /* A bunch of code in the loop below assumes that if s[n] exists and is not
2969      * NUL, then s[n+1] exists.  This assertion makes sure that assumption is
2970      * valid */
2971     assert(*send == '\0');
2972
2973     while (s < send
2974            || dorange   /* Handle tr/// range at right edge of input */
2975     ) {
2976
2977         /* get transliterations out of the way (they're most literal) */
2978         if (PL_lex_inwhat == OP_TRANS) {
2979
2980             /* But there isn't any special handling necessary unless there is a
2981              * range, so for most cases we just drop down and handle the value
2982              * as any other.  There are two exceptions.
2983              *
2984              * 1.  A hyphen indicates that we are actually going to have a
2985              *     range.  In this case, skip the '-', set a flag, then drop
2986              *     down to handle what should be the end range value.
2987              * 2.  After we've handled that value, the next time through, that
2988              *     flag is set and we fix up the range.
2989              *
2990              * Ranges entirely within Latin1 are expanded out entirely, in
2991              * order to make the transliteration a simple table look-up.
2992              * Ranges that extend above Latin1 have to be done differently, so
2993              * there is no advantage to expanding them here, so they are
2994              * stored here as Min, ILLEGAL_UTF8_BYTE, Max.  The illegal byte
2995              * signifies a hyphen without any possible ambiguity.  On EBCDIC
2996              * machines, if the range is expressed as Unicode, the Latin1
2997              * portion is expanded out even if the range extends above
2998              * Latin1.  This is because each code point in it has to be
2999              * processed here individually to get its native translation */
3000
3001             if (! dorange) {
3002
3003                 /* Here, we don't think we're in a range.  If the new character
3004                  * is not a hyphen; or if it is a hyphen, but it's too close to
3005                  * either edge to indicate a range, or if we haven't output any
3006                  * characters yet then it's a regular character. */
3007                 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv)) {
3008
3009                     /* A regular character.  Process like any other, but first
3010                      * clear any flags */
3011                     didrange = FALSE;
3012                     dorange = FALSE;
3013 #ifdef EBCDIC
3014                     non_portable_endpoint = 0;
3015                     backslash_N = 0;
3016 #endif
3017                     /* The tests here for being above Latin1 and similar ones
3018                      * in the following 'else' suffice to find all such
3019                      * occurences in the constant, except those added by a
3020                      * backslash escape sequence, like \x{100}.  Mostly, those
3021                      * set 'has_above_latin1' as appropriate */
3022                     if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3023                         has_above_latin1 = TRUE;
3024                     }
3025
3026                     /* Drops down to generic code to process current byte */
3027                 }
3028                 else {  /* Is a '-' in the context where it means a range */
3029                     if (didrange) { /* Something like y/A-C-Z// */
3030                         Perl_croak(aTHX_ "Ambiguous range in transliteration"
3031                                          " operator");
3032                     }
3033
3034                     dorange = TRUE;
3035
3036                     s++;    /* Skip past the hyphen */
3037
3038                     /* d now points to where the end-range character will be
3039                      * placed.  Save it so won't have to go finding it later,
3040                      * and drop down to get that character.  (Actually we
3041                      * instead save the offset, to handle the case where a
3042                      * realloc in the meantime could change the actual
3043                      * pointer).  We'll finish processing the range the next
3044                      * time through the loop */
3045                     offset_to_max = d - SvPVX_const(sv);
3046
3047                     if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3048                         has_above_latin1 = TRUE;
3049                     }
3050
3051                     /* Drops down to generic code to process current byte */
3052                 }
3053             }  /* End of not a range */
3054             else {
3055                 /* Here we have parsed a range.  Now must handle it.  At this
3056                  * point:
3057                  * 'sv' is a SV* that contains the output string we are
3058                  *      constructing.  The final two characters in that string
3059                  *      are the range start and range end, in order.
3060                  * 'd'  points to just beyond the range end in the 'sv' string,
3061                  *      where we would next place something
3062                  * 'offset_to_max' is the offset in 'sv' at which the character
3063                  *      (the range's maximum end point) before 'd'  begins.
3064                  */
3065                 char * max_ptr = SvPVX(sv) + offset_to_max;
3066                 char * min_ptr;
3067                 IV range_min;
3068                 IV range_max;   /* last character in range */
3069                 STRLEN grow;
3070                 Size_t offset_to_min = 0;
3071                 Size_t extras = 0;
3072 #ifdef EBCDIC
3073                 bool convert_unicode;
3074                 IV real_range_max = 0;
3075 #endif
3076                 /* Get the code point values of the range ends. */
3077                 if (d_is_utf8) {
3078                     /* We know the utf8 is valid, because we just constructed
3079                      * it ourselves in previous loop iterations */
3080                     min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3081                     range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3082                     range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3083
3084                     /* This compensates for not all code setting
3085                      * 'has_above_latin1', so that we don't skip stuff that
3086                      * should be executed */
3087                     if (range_max > 255) {
3088                         has_above_latin1 = TRUE;
3089                     }
3090                 }
3091                 else {
3092                     min_ptr = max_ptr - 1;
3093                     range_min = * (U8*) min_ptr;
3094                     range_max = * (U8*) max_ptr;
3095                 }
3096
3097                 /* If the range is just a single code point, like tr/a-a/.../,
3098                  * that code point is already in the output, twice.  We can
3099                  * just back up over the second instance and avoid all the rest
3100                  * of the work.  But if it is a variant character, it's been
3101                  * counted twice, so decrement.  (This unlikely scenario is
3102                  * special cased, like the one for a range of 2 code points
3103                  * below, only because the main-line code below needs a range
3104                  * of 3 or more to work without special casing.  Might as well
3105                  * get it out of the way now.) */
3106                 if (UNLIKELY(range_max == range_min)) {
3107                     d = max_ptr;
3108                     if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3109                         utf8_variant_count--;
3110                     }
3111                     goto range_done;
3112                 }
3113
3114 #ifdef EBCDIC
3115                 /* On EBCDIC platforms, we may have to deal with portable
3116                  * ranges.  These happen if at least one range endpoint is a
3117                  * Unicode value (\N{...}), or if the range is a subset of
3118                  * [A-Z] or [a-z], and both ends are literal characters,
3119                  * like 'A', and not like \x{C1} */
3120                 convert_unicode =
3121                                cBOOL(backslash_N)   /* \N{} forces Unicode,
3122                                                        hence portable range */
3123                     || (     ! non_portable_endpoint
3124                         && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
3125                            || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3126                 if (convert_unicode) {
3127
3128                     /* Special handling is needed for these portable ranges.
3129                      * They are defined to be in Unicode terms, which includes
3130                      * all the Unicode code points between the end points.
3131                      * Convert to Unicode to get the Unicode range.  Later we
3132                      * will convert each code point in the range back to
3133                      * native.  */
3134                     range_min = NATIVE_TO_UNI(range_min);
3135                     range_max = NATIVE_TO_UNI(range_max);
3136                 }
3137 #endif
3138
3139                 if (range_min > range_max) {
3140 #ifdef EBCDIC
3141                     if (convert_unicode) {
3142                         /* Need to convert back to native for meaningful
3143                          * messages for this platform */
3144                         range_min = UNI_TO_NATIVE(range_min);
3145                         range_max = UNI_TO_NATIVE(range_max);
3146                     }
3147 #endif
3148                     /* Use the characters themselves for the error message if
3149                      * ASCII printables; otherwise some visible representation
3150                      * of them */
3151                     if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3152                         Perl_croak(aTHX_
3153                          "Invalid range \"%c-%c\" in transliteration operator",
3154                          (char)range_min, (char)range_max);
3155                     }
3156 #ifdef EBCDIC
3157                     else if (convert_unicode) {
3158         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3159                         Perl_croak(aTHX_
3160                            "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3161                            UVXf "}\" in transliteration operator",
3162                            range_min, range_max);
3163                     }
3164 #endif
3165                     else {
3166         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3167                         Perl_croak(aTHX_
3168                            "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3169                            " in transliteration operator",
3170                            range_min, range_max);
3171                     }
3172                 }
3173
3174                 /* If the range is exactly two code points long, they are
3175                  * already both in the output */
3176                 if (UNLIKELY(range_min + 1 == range_max)) {
3177                     goto range_done;
3178                 }
3179
3180                 /* Here the range contains at least 3 code points */
3181
3182                 if (d_is_utf8) {
3183
3184                     /* If everything in the transliteration is below 256, we
3185                      * can avoid special handling later.  A translation table
3186                      * for each of those bytes is created by op.c.  So we
3187                      * expand out all ranges to their constituent code points.
3188                      * But if we've encountered something above 255, the
3189                      * expanding won't help, so skip doing that.  But if it's
3190                      * EBCDIC, we may have to look at each character below 256
3191                      * if we have to convert to/from Unicode values */
3192                     if (   has_above_latin1
3193 #ifdef EBCDIC
3194                         && (range_min > 255 || ! convert_unicode)
3195 #endif
3196                     ) {
3197                         /* Move the high character one byte to the right; then
3198                          * insert between it and the range begin, an illegal
3199                          * byte which serves to indicate this is a range (using
3200                          * a '-' would be ambiguous). */
3201                         char *e = d++;
3202                         while (e-- > max_ptr) {
3203                             *(e + 1) = *e;
3204                         }
3205                         *(e + 1) = (char) ILLEGAL_UTF8_BYTE;
3206                         goto range_done;
3207                     }
3208
3209                     /* Here, we're going to expand out the range.  For EBCDIC
3210                      * the range can extend above 255 (not so in ASCII), so
3211                      * for EBCDIC, split it into the parts above and below
3212                      * 255/256 */
3213 #ifdef EBCDIC
3214                     if (range_max > 255) {
3215                         real_range_max = range_max;
3216                         range_max = 255;
3217                     }
3218 #endif
3219                 }
3220
3221                 /* Here we need to expand out the string to contain each
3222                  * character in the range.  Grow the output to handle this.
3223                  * For non-UTF8, we need a byte for each code point in the
3224                  * range, minus the three that we've already allocated for: the
3225                  * hyphen, the min, and the max.  For UTF-8, we need this
3226                  * plus an extra byte for each code point that occupies two
3227                  * bytes (is variant) when in UTF-8 (except we've already
3228                  * allocated for the end points, including if they are
3229                  * variants).  For ASCII platforms and Unicode ranges on EBCDIC
3230                  * platforms, it's easy to calculate a precise number.  To
3231                  * start, we count the variants in the range, which we need
3232                  * elsewhere in this function anyway.  (For the case where it
3233                  * isn't easy to calculate, 'extras' has been initialized to 0,
3234                  * and the calculation is done in a loop further down.) */
3235 #ifdef EBCDIC
3236                 if (convert_unicode)
3237 #endif
3238                 {
3239                     /* This is executed unconditionally on ASCII, and for
3240                      * Unicode ranges on EBCDIC.  Under these conditions, all
3241                      * code points above a certain value are variant; and none
3242                      * under that value are.  We just need to find out how much
3243                      * of the range is above that value.  We don't count the
3244                      * end points here, as they will already have been counted
3245                      * as they were parsed. */
3246                     if (range_min >= UTF_CONTINUATION_MARK) {
3247
3248                         /* The whole range is made up of variants */
3249                         extras = (range_max - 1) - (range_min + 1) + 1;
3250                     }
3251                     else if (range_max >= UTF_CONTINUATION_MARK) {
3252
3253                         /* Only the higher portion of the range is variants */
3254                         extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3255                     }
3256
3257                     utf8_variant_count += extras;
3258                 }
3259
3260                 /* The base growth is the number of code points in the range,
3261                  * not including the endpoints, which have already been sized
3262                  * for (and output).  We don't subtract for the hyphen, as it
3263                  * has been parsed but not output, and the SvGROW below is
3264                  * based only on what's been output plus what's left to parse.
3265                  * */
3266                 grow = (range_max - 1) - (range_min + 1) + 1;
3267
3268                 if (d_is_utf8) {
3269 #ifdef EBCDIC
3270                     /* In some cases in EBCDIC, we haven't yet calculated a
3271                      * precise amount needed for the UTF-8 variants.  Just
3272                      * assume the worst case, that everything will expand by a
3273                      * byte */
3274                     if (! convert_unicode) {
3275                         grow *= 2;
3276                     }
3277                     else
3278 #endif
3279                     {
3280                         /* Otherwise we know exactly how many variants there
3281                          * are in the range. */
3282                         grow += extras;
3283                     }
3284                 }
3285
3286                 /* Grow, but position the output to overwrite the range min end
3287                  * point, because in some cases we overwrite that */
3288                 SvCUR_set(sv, d - SvPVX_const(sv));
3289                 offset_to_min = min_ptr - SvPVX_const(sv);
3290
3291                 /* See Note on sizing above. */
3292                 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3293                                              + (send - s)
3294                                              + grow
3295                                              + 1 /* Trailing NUL */ );
3296
3297                 /* Now, we can expand out the range. */
3298 #ifdef EBCDIC
3299                 if (convert_unicode) {
3300                     SSize_t i;
3301
3302                     /* Recall that the min and max are now in Unicode terms, so
3303                      * we have to convert each character to its native
3304                      * equivalent */
3305                     if (d_is_utf8) {
3306                         for (i = range_min; i <= range_max; i++) {
3307                             append_utf8_from_native_byte(
3308                                                     LATIN1_TO_NATIVE((U8) i),
3309                                                     (U8 **) &d);
3310                         }
3311                     }
3312                     else {
3313                         for (i = range_min; i <= range_max; i++) {
3314                             *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3315                         }
3316                     }
3317                 }
3318                 else
3319 #endif
3320                 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3321                 {
3322                     /* Here, no conversions are necessary, which means that the
3323                      * first character in the range is already in 'd' and
3324                      * valid, so we can skip overwriting it */
3325                     if (d_is_utf8) {
3326                         SSize_t i;
3327                         d += UTF8SKIP(d);
3328                         for (i = range_min + 1; i <= range_max; i++) {
3329                             append_utf8_from_native_byte((U8) i, (U8 **) &d);
3330                         }
3331                     }
3332                     else {
3333                         SSize_t i;
3334                         d++;
3335                         assert(range_min + 1 <= range_max);
3336                         for (i = range_min + 1; i < range_max; i++) {
3337 #ifdef EBCDIC
3338                             /* In this case on EBCDIC, we haven't calculated
3339                              * the variants.  Do it here, as we go along */
3340                             if (! UVCHR_IS_INVARIANT(i)) {
3341                                 utf8_variant_count++;
3342                             }
3343 #endif
3344                             *d++ = (char)i;
3345                         }
3346
3347                         /* The range_max is done outside the loop so as to
3348                          * avoid having to special case not incrementing
3349                          * 'utf8_variant_count' on EBCDIC (it's already been
3350                          * counted when originally parsed) */
3351                         *d++ = (char) range_max;
3352                     }
3353                 }
3354
3355 #ifdef EBCDIC
3356                 /* If the original range extended above 255, add in that
3357                  * portion. */
3358                 if (real_range_max) {
3359                     *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3360                     *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3361                     if (real_range_max > 0x100) {
3362                         if (real_range_max > 0x101) {
3363                             *d++ = (char) ILLEGAL_UTF8_BYTE;
3364                         }
3365                         d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3366                     }
3367                 }
3368 #endif
3369
3370               range_done:
3371                 /* mark the range as done, and continue */
3372                 didrange = TRUE;
3373                 dorange = FALSE;
3374 #ifdef EBCDIC
3375                 non_portable_endpoint = 0;
3376                 backslash_N = 0;
3377 #endif
3378                 continue;
3379             } /* End of is a range */
3380         } /* End of transliteration.  Joins main code after these else's */
3381         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3382             char *s1 = s-1;
3383             int esc = 0;
3384             while (s1 >= start && *s1-- == '\\')
3385                 esc = !esc;
3386             if (!esc)
3387                 in_charclass = TRUE;
3388         }
3389         else if (*s == ']' && PL_lex_inpat && in_charclass) {
3390             char *s1 = s-1;
3391             int esc = 0;
3392             while (s1 >= start && *s1-- == '\\')
3393                 esc = !esc;
3394             if (!esc)
3395                 in_charclass = FALSE;
3396         }
3397             /* skip for regexp comments /(?#comment)/, except for the last
3398              * char, which will be done separately.  Stop on (?{..}) and
3399              * friends */
3400         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3401             if (s[2] == '#') {
3402                 if (s_is_utf8) {
3403                     PERL_UINT_FAST8_T  len = UTF8SKIP(s);
3404
3405                     while (s + len < send && *s != ')') {
3406                         Copy(s, d, len, U8);
3407                         d += len;
3408                         s += len;
3409                         len = UTF8_SAFE_SKIP(s, send);
3410                     }
3411                 }
3412                 else while (s+1 < send && *s != ')') {
3413                     *d++ = *s++;
3414                 }
3415             }
3416             else if (!PL_lex_casemods
3417                      && (    s[2] == '{' /* This should match regcomp.c */
3418                          || (s[2] == '?' && s[3] == '{')))
3419             {
3420                 break;
3421             }
3422         }
3423             /* likewise skip #-initiated comments in //x patterns */
3424         else if (*s == '#'
3425                  && PL_lex_inpat
3426                  && !in_charclass
3427                  && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3428         {
3429             while (s < send && *s != '\n')
3430                 *d++ = *s++;
3431         }
3432             /* no further processing of single-quoted regex */
3433         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3434             goto default_action;
3435
3436             /* check for embedded arrays
3437              * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3438              */
3439         else if (*s == '@' && s[1]) {
3440             if (UTF
3441                ? isIDFIRST_utf8_safe(s+1, send)
3442                : isWORDCHAR_A(s[1]))
3443             {
3444                 break;
3445             }
3446             if (strchr(":'{$", s[1]))
3447                 break;
3448             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3449                 break; /* in regexp, neither @+ nor @- are interpolated */
3450         }
3451             /* check for embedded scalars.  only stop if we're sure it's a
3452              * variable.  */
3453         else if (*s == '$') {
3454             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3455                 break;
3456             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3457                 if (s[1] == '\\') {
3458                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3459                                    "Possible unintended interpolation of $\\ in regex");
3460                 }
3461                 break;          /* in regexp, $ might be tail anchor */
3462             }
3463         }
3464
3465         /* End of else if chain - OP_TRANS rejoin rest */
3466
3467         if (UNLIKELY(s >= send)) {
3468             assert(s == send);
3469             break;
3470         }
3471
3472         /* backslashes */
3473         if (*s == '\\' && s+1 < send) {
3474             char* e;    /* Can be used for ending '}', etc. */
3475
3476             s++;
3477
3478             /* warn on \1 - \9 in substitution replacements, but note that \11
3479              * is an octal; and \19 is \1 followed by '9' */
3480             if (PL_lex_inwhat == OP_SUBST
3481                 && !PL_lex_inpat
3482                 && isDIGIT(*s)
3483                 && *s != '0'
3484                 && !isDIGIT(s[1]))
3485             {
3486                 /* diag_listed_as: \%d better written as $%d */
3487                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3488                 *--s = '$';
3489                 break;
3490             }
3491
3492             /* string-change backslash escapes */
3493             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3494                 --s;
3495                 break;
3496             }
3497             /* In a pattern, process \N, but skip any other backslash escapes.
3498              * This is because we don't want to translate an escape sequence
3499              * into a meta symbol and have the regex compiler use the meta
3500              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3501              * in spite of this, we do have to process \N here while the proper
3502              * charnames handler is in scope.  See bugs #56444 and #62056.
3503              *
3504              * There is a complication because \N in a pattern may also stand
3505              * for 'match a non-nl', and not mean a charname, in which case its
3506              * processing should be deferred to the regex compiler.  To be a
3507              * charname it must be followed immediately by a '{', and not look
3508              * like \N followed by a curly quantifier, i.e., not something like
3509              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3510              * quantifier */
3511             else if (PL_lex_inpat
3512                     && (*s != 'N'
3513                         || s[1] != '{'
3514                         || regcurly(s + 1)))
3515             {
3516                 *d++ = '\\';
3517                 goto default_action;
3518             }
3519
3520             switch (*s) {
3521             default:
3522                 {
3523                     if ((isALPHANUMERIC(*s)))
3524                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3525                                        "Unrecognized escape \\%c passed through",
3526                                        *s);
3527                     /* default action is to copy the quoted character */
3528                     goto default_action;
3529                 }
3530
3531             /* eg. \132 indicates the octal constant 0132 */
3532             case '0': case '1': case '2': case '3':
3533             case '4': case '5': case '6': case '7':
3534                 {
3535                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3536                     STRLEN len = 3;
3537                     uv = grok_oct(s, &len, &flags, NULL);
3538                     s += len;
3539                     if (len < 3 && s < send && isDIGIT(*s)
3540                         && ckWARN(WARN_MISC))
3541                     {
3542                         Perl_warner(aTHX_ packWARN(WARN_MISC),
3543                                     "%s", form_short_octal_warning(s, len));
3544                     }
3545                 }
3546                 goto NUM_ESCAPE_INSERT;
3547
3548             /* eg. \o{24} indicates the octal constant \024 */
3549             case 'o':
3550                 {
3551                     const char* error;
3552
3553                     bool valid = grok_bslash_o(&s, send,
3554                                                &uv, &error,
3555                                                TRUE, /* Output warning */
3556                                                FALSE, /* Not strict */
3557                                                TRUE, /* Output warnings for
3558                                                          non-portables */
3559                                                UTF);
3560                     if (! valid) {
3561                         yyerror(error);
3562                         uv = 0; /* drop through to ensure range ends are set */
3563                     }
3564                     goto NUM_ESCAPE_INSERT;
3565                 }
3566
3567             /* eg. \x24 indicates the hex constant 0x24 */
3568             case 'x':
3569                 {
3570                     const char* error;
3571
3572                     bool valid = grok_bslash_x(&s, send,
3573                                                &uv, &error,
3574                                                TRUE, /* Output warning */
3575                                                FALSE, /* Not strict */
3576                                                TRUE,  /* Output warnings for
3577                                                          non-portables */
3578                                                UTF);
3579                     if (! valid) {
3580                         yyerror(error);
3581                         uv = 0; /* drop through to ensure range ends are set */
3582                     }
3583                 }
3584
3585               NUM_ESCAPE_INSERT:
3586                 /* Insert oct or hex escaped character. */
3587
3588                 /* Here uv is the ordinal of the next character being added */
3589                 if (UVCHR_IS_INVARIANT(uv)) {
3590                     *d++ = (char) uv;
3591                 }
3592                 else {
3593                     if (!d_is_utf8 && uv > 255) {
3594
3595                         /* Here, 'uv' won't fit unless we convert to UTF-8.
3596                          * If we've only seen invariants so far, all we have to
3597                          * do is turn on the flag */
3598                         if (utf8_variant_count == 0) {
3599                             SvUTF8_on(sv);
3600                         }
3601                         else {
3602                             SvCUR_set(sv, d - SvPVX_const(sv));
3603                             SvPOK_on(sv);
3604                             *d = '\0';
3605
3606                             sv_utf8_upgrade_flags_grow(
3607                                            sv,
3608                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3609
3610                                            /* Since we're having to grow here,
3611                                             * make sure we have enough room for
3612                                             * this escape and a NUL, so the
3613                                             * code immediately below won't have
3614                                             * to actually grow again */
3615                                           UVCHR_SKIP(uv)
3616                                         + (STRLEN)(send - s) + 1);
3617                             d = SvPVX(sv) + SvCUR(sv);
3618                         }
3619
3620                         has_above_latin1 = TRUE;
3621                         d_is_utf8 = TRUE;
3622                     }
3623
3624                     if (! d_is_utf8) {
3625                         *d++ = (char)uv;
3626                         utf8_variant_count++;
3627                     }
3628                     else {
3629                        /* Usually, there will already be enough room in 'sv'
3630                         * since such escapes are likely longer than any UTF-8
3631                         * sequence they can end up as.  This isn't the case on
3632                         * EBCDIC where \x{40000000} contains 12 bytes, and the
3633                         * UTF-8 for it contains 14.  And, we have to allow for
3634                         * a trailing NUL.  It probably can't happen on ASCII
3635                         * platforms, but be safe.  See Note on sizing above. */
3636                         const STRLEN needed = d - SvPVX(sv)
3637                                             + UVCHR_SKIP(uv)
3638                                             + (send - s)
3639                                             + 1;
3640                         if (UNLIKELY(needed > SvLEN(sv))) {
3641                             SvCUR_set(sv, d - SvPVX_const(sv));
3642                             d = SvCUR(sv) + SvGROW(sv, needed);
3643                         }
3644
3645                         d = (char*)uvchr_to_utf8((U8*)d, uv);
3646                         if (PL_lex_inwhat == OP_TRANS
3647                             && PL_parser->lex_sub_op)
3648                         {
3649                             PL_parser->lex_sub_op->op_private |=
3650                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3651                                              : OPpTRANS_TO_UTF);
3652                         }
3653                     }
3654                 }
3655 #ifdef EBCDIC
3656                 non_portable_endpoint++;
3657 #endif
3658                 continue;
3659
3660             case 'N':
3661                 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3662                  * named character, like \N{LATIN SMALL LETTER A}, or a named
3663                  * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3664                  * GRAVE} (except y/// can't handle the latter, croaking).  For
3665                  * convenience all three forms are referred to as "named
3666                  * characters" below.
3667                  *
3668                  * For patterns, \N also can mean to match a non-newline.  Code
3669                  * before this 'switch' statement should already have handled
3670                  * this situation, and hence this code only has to deal with
3671                  * the named character cases.
3672                  *
3673                  * For non-patterns, the named characters are converted to
3674                  * their string equivalents.  In patterns, named characters are
3675                  * not converted to their ultimate forms for the same reasons
3676                  * that other escapes aren't (mainly that the ultimate
3677                  * character could be considered a meta-symbol by the regex
3678                  * compiler).  Instead, they are converted to the \N{U+...}
3679                  * form to get the value from the charnames that is in effect
3680                  * right now, while preserving the fact that it was a named
3681                  * character, so that the regex compiler knows this.
3682                  *
3683                  * The structure of this section of code (besides checking for
3684                  * errors and upgrading to utf8) is:
3685                  *    If the named character is of the form \N{U+...}, pass it
3686                  *      through if a pattern; otherwise convert the code point
3687                  *      to utf8
3688                  *    Otherwise must be some \N{NAME}: convert to
3689                  *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3690                  *
3691                  * Transliteration is an exception.  The conversion to utf8 is
3692                  * only done if the code point requires it to be representable.
3693                  *
3694                  * Here, 's' points to the 'N'; the test below is guaranteed to
3695                  * succeed if we are being called on a pattern, as we already
3696                  * know from a test above that the next character is a '{'.  A
3697                  * non-pattern \N must mean 'named character', which requires
3698                  * braces */
3699                 s++;
3700                 if (*s != '{') {
3701                     yyerror("Missing braces on \\N{}");
3702                     *d++ = '\0';
3703                     continue;
3704                 }
3705                 s++;
3706
3707                 /* If there is no matching '}', it is an error. */
3708                 if (! (e = (char *) memchr(s, '}', send - s))) {
3709                     if (! PL_lex_inpat) {
3710                         yyerror("Missing right brace on \\N{}");
3711                     } else {
3712                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3713                     }
3714                     yyquit(); /* Have exhausted the input. */
3715                 }
3716
3717                 /* Here it looks like a named character */
3718
3719                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3720                     s += 2;         /* Skip to next char after the 'U+' */
3721                     if (PL_lex_inpat) {
3722
3723                         /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3724                         /* Check the syntax.  */
3725                         const char *orig_s;
3726                         orig_s = s - 5;
3727                         if (!isXDIGIT(*s)) {
3728                           bad_NU:
3729                             yyerror(
3730                                 "Invalid hexadecimal number in \\N{U+...}"
3731                             );
3732                             s = e + 1;
3733                             *d++ = '\0';
3734                             continue;
3735                         }
3736                         while (++s < e) {
3737                             if (isXDIGIT(*s))
3738                                 continue;
3739                             else if ((*s == '.' || *s == '_')
3740                                   && isXDIGIT(s[1]))
3741                                 continue;
3742                             goto bad_NU;
3743                         }
3744
3745                         /* Pass everything through unchanged.
3746                          * +1 is for the '}' */
3747                         Copy(orig_s, d, e - orig_s + 1, char);
3748                         d += e - orig_s + 1;
3749                     }
3750                     else {  /* Not a pattern: convert the hex to string */
3751                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3752                                 | PERL_SCAN_SILENT_ILLDIGIT
3753                                 | PERL_SCAN_DISALLOW_PREFIX;
3754                         STRLEN len = e - s;
3755                         uv = grok_hex(s, &len, &flags, NULL);
3756                         if (len == 0 || (len != (STRLEN)(e - s)))
3757                             goto bad_NU;
3758
3759                          /* For non-tr///, if the destination is not in utf8,
3760                           * unconditionally recode it to be so.  This is
3761                           * because \N{} implies Unicode semantics, and scalars
3762                           * have to be in utf8 to guarantee those semantics.
3763                           * tr/// doesn't care about Unicode rules, so no need
3764                           * there to upgrade to UTF-8 for small enough code
3765                           * points */
3766                         if (! d_is_utf8 && (   uv > 0xFF
3767                                            || PL_lex_inwhat != OP_TRANS))
3768                         {
3769                             /* See Note on sizing above.  */
3770                             const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1;
3771
3772                             SvCUR_set(sv, d - SvPVX_const(sv));
3773                             SvPOK_on(sv);
3774                             *d = '\0';
3775
3776                             if (utf8_variant_count == 0) {
3777                                 SvUTF8_on(sv);
3778                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3779                             }
3780                             else {
3781                                 sv_utf8_upgrade_flags_grow(
3782                                                sv,
3783                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3784                                                extra);
3785                                 d = SvPVX(sv) + SvCUR(sv);
3786                             }
3787
3788                             d_is_utf8 = TRUE;
3789                             has_above_latin1 = TRUE;
3790                         }
3791
3792                         /* Add the (Unicode) code point to the output. */
3793                         if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
3794                             *d++ = (char) LATIN1_TO_NATIVE(uv);
3795                         }
3796                         else {
3797                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3798                         }
3799                     }
3800                 }
3801                 else /* Here is \N{NAME} but not \N{U+...}. */
3802                      if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
3803                 {   /* Failed.  We should die eventually, but for now use a NUL
3804                        to keep parsing */
3805                     *d++ = '\0';
3806                 }
3807                 else {  /* Successfully evaluated the name */
3808                     STRLEN len;
3809                     const char *str = SvPV_const(res, len);
3810                     if (PL_lex_inpat) {
3811
3812                         if (! len) { /* The name resolved to an empty string */
3813                             const char empty_N[] = "\\N{_}";
3814                             Copy(empty_N, d, sizeof(empty_N) - 1, char);
3815                             d += sizeof(empty_N) - 1;
3816                         }
3817                         else {
3818                             /* In order to not lose information for the regex
3819                             * compiler, pass the result in the specially made
3820                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3821                             * the code points in hex of each character
3822                             * returned by charnames */
3823
3824                             const char *str_end = str + len;
3825                             const STRLEN off = d - SvPVX_const(sv);
3826
3827                             if (! SvUTF8(res)) {
3828                                 /* For the non-UTF-8 case, we can determine the
3829                                  * exact length needed without having to parse
3830                                  * through the string.  Each character takes up
3831                                  * 2 hex digits plus either a trailing dot or
3832                                  * the "}" */
3833                                 const char initial_text[] = "\\N{U+";
3834                                 const STRLEN initial_len = sizeof(initial_text)
3835                                                            - 1;
3836                                 d = off + SvGROW(sv, off
3837                                                     + 3 * len
3838
3839                                                     /* +1 for trailing NUL */
3840                                                     + initial_len + 1
3841
3842                                                     + (STRLEN)(send - e));
3843                                 Copy(initial_text, d, initial_len, char);
3844                                 d += initial_len;
3845                                 while (str < str_end) {
3846                                     char hex_string[4];
3847                                     int len =
3848                                         my_snprintf(hex_string,
3849                                                   sizeof(hex_string),
3850                                                   "%02X.",
3851
3852                                                   /* The regex compiler is
3853                                                    * expecting Unicode, not
3854                                                    * native */
3855                                                   NATIVE_TO_LATIN1(*str));
3856                                     PERL_MY_SNPRINTF_POST_GUARD(len,
3857                                                            sizeof(hex_string));
3858                                     Copy(hex_string, d, 3, char);
3859                                     d += 3;
3860                                     str++;
3861                                 }
3862                                 d--;    /* Below, we will overwrite the final
3863                                            dot with a right brace */
3864                             }
3865                             else {
3866                                 STRLEN char_length; /* cur char's byte length */
3867
3868                                 /* and the number of bytes after this is
3869                                  * translated into hex digits */
3870                                 STRLEN output_length;
3871
3872                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3873                                  * for max('U+', '.'); and 1 for NUL */
3874                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3875
3876                                 /* Get the first character of the result. */
3877                                 U32 uv = utf8n_to_uvchr((U8 *) str,
3878                                                         len,
3879                                                         &char_length,
3880                                                         UTF8_ALLOW_ANYUV);
3881                                 /* Convert first code point to Unicode hex,
3882                                  * including the boiler plate before it. */
3883                                 output_length =
3884                                     my_snprintf(hex_string, sizeof(hex_string),
3885                                              "\\N{U+%X",
3886                                              (unsigned int) NATIVE_TO_UNI(uv));
3887
3888                                 /* Make sure there is enough space to hold it */
3889                                 d = off + SvGROW(sv, off
3890                                                     + output_length
3891                                                     + (STRLEN)(send - e)
3892                                                     + 2);       /* '}' + NUL */
3893                                 /* And output it */
3894                                 Copy(hex_string, d, output_length, char);
3895                                 d += output_length;
3896
3897                                 /* For each subsequent character, append dot and
3898                                 * its Unicode code point in hex */
3899                                 while ((str += char_length) < str_end) {
3900                                     const STRLEN off = d - SvPVX_const(sv);
3901                                     U32 uv = utf8n_to_uvchr((U8 *) str,
3902                                                             str_end - str,
3903                                                             &char_length,
3904                                                             UTF8_ALLOW_ANYUV);
3905                                     output_length =
3906                                         my_snprintf(hex_string,
3907                                              sizeof(hex_string),
3908                                              ".%X",
3909                                              (unsigned int) NATIVE_TO_UNI(uv));
3910
3911                                     d = off + SvGROW(sv, off
3912                                                         + output_length
3913                                                         + (STRLEN)(send - e)
3914                                                         + 2);   /* '}' +  NUL */
3915                                     Copy(hex_string, d, output_length, char);
3916                                     d += output_length;
3917                                 }
3918                             }
3919
3920                             *d++ = '}'; /* Done.  Add the trailing brace */
3921                         }
3922                     }
3923                     else { /* Here, not in a pattern.  Convert the name to a
3924                             * string. */
3925
3926                         if (PL_lex_inwhat == OP_TRANS) {
3927                             str = SvPV_const(res, len);
3928                             if (len > ((SvUTF8(res))
3929                                        ? UTF8SKIP(str)
3930                                        : 1U))
3931                             {
3932                                 yyerror(Perl_form(aTHX_
3933                                     "%.*s must not be a named sequence"
3934                                     " in transliteration operator",
3935                                         /*  +1 to include the "}" */
3936                                     (int) (e + 1 - start), start));
3937                                 *d++ = '\0';
3938                                 goto end_backslash_N;
3939                             }
3940
3941                             if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
3942                                 has_above_latin1 = TRUE;
3943                             }
3944
3945                         }
3946                         else if (! SvUTF8(res)) {
3947                             /* Make sure \N{} return is UTF-8.  This is because
3948                              * \N{} implies Unicode semantics, and scalars have
3949                              * to be in utf8 to guarantee those semantics; but
3950                              * not needed in tr/// */
3951                             sv_utf8_upgrade_flags(res, 0);
3952                             str = SvPV_const(res, len);
3953                         }
3954
3955                          /* Upgrade destination to be utf8 if this new
3956                           * component is */
3957                         if (! d_is_utf8 && SvUTF8(res)) {
3958                             /* See Note on sizing above.  */
3959                             const STRLEN extra = len + (send - s) + 1;
3960
3961                             SvCUR_set(sv, d - SvPVX_const(sv));
3962                             SvPOK_on(sv);
3963                             *d = '\0';
3964
3965                             if (utf8_variant_count == 0) {
3966                                 SvUTF8_on(sv);
3967                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3968                             }
3969                             else {
3970                                 sv_utf8_upgrade_flags_grow(sv,
3971                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3972                                                 extra);
3973                                 d = SvPVX(sv) + SvCUR(sv);
3974                             }
3975                             d_is_utf8 = TRUE;
3976                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3977
3978                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3979                              * set correctly here). */
3980                             const STRLEN extra = len + (send - e) + 1;
3981                             const STRLEN off = d - SvPVX_const(sv);
3982                             d = off + SvGROW(sv, off + extra);
3983                         }
3984                         Copy(str, d, len, char);
3985                         d += len;
3986                     }
3987
3988                     SvREFCNT_dec(res);
3989
3990                 } /* End \N{NAME} */
3991
3992               end_backslash_N:
3993 #ifdef EBCDIC
3994                 backslash_N++; /* \N{} is defined to be Unicode */
3995 #endif
3996                 s = e + 1;  /* Point to just after the '}' */
3997                 continue;
3998
3999             /* \c is a control character */
4000             case 'c':
4001                 s++;
4002                 if (s < send) {
4003                     *d++ = grok_bslash_c(*s, 1);
4004                 }
4005                 else {
4006                     yyerror("Missing control char name in \\c");
4007                     yyquit();   /* Are at end of input, no sense continuing */
4008                 }
4009 #ifdef EBCDIC
4010                 non_portable_endpoint++;
4011 #endif
4012                 break;
4013
4014             /* printf-style backslashes, formfeeds, newlines, etc */
4015             case 'b':
4016                 *d++ = '\b';
4017                 break;
4018             case 'n':
4019                 *d++ = '\n';
4020                 break;
4021             case 'r':
4022                 *d++ = '\r';
4023                 break;
4024             case 'f':
4025                 *d++ = '\f';
4026                 break;
4027             case 't':
4028                 *d++ = '\t';
4029                 break;
4030             case 'e':
4031                 *d++ = ESC_NATIVE;
4032                 break;
4033             case 'a':
4034                 *d++ = '\a';
4035                 break;
4036             } /* end switch */
4037
4038             s++;
4039             continue;
4040         } /* end if (backslash) */
4041
4042     default_action:
4043         /* Just copy the input to the output, though we may have to convert
4044          * to/from UTF-8.
4045          *
4046          * If the input has the same representation in UTF-8 as not, it will be
4047          * a single byte, and we don't care about UTF8ness; just copy the byte */
4048         if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4049             *d++ = *s++;
4050         }
4051         else if (! s_is_utf8 && ! d_is_utf8) {
4052             /* If neither source nor output is UTF-8, is also a single byte,
4053              * just copy it; but this byte counts should we later have to
4054              * convert to UTF-8 */
4055             *d++ = *s++;
4056             utf8_variant_count++;
4057         }
4058         else if (s_is_utf8 && d_is_utf8) {   /* Both UTF-8, can just copy */
4059             const STRLEN len = UTF8SKIP(s);
4060
4061             /* We expect the source to have already been checked for
4062              * malformedness */
4063             assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4064
4065             Copy(s, d, len, U8);
4066             d += len;
4067             s += len;
4068         }
4069         else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4070             STRLEN need = send - s + 1; /* See Note on sizing above. */
4071
4072             SvCUR_set(sv, d - SvPVX_const(sv));
4073             SvPOK_on(sv);
4074             *d = '\0';
4075
4076             if (utf8_variant_count == 0) {
4077                 SvUTF8_on(sv);
4078                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4079             }
4080             else {
4081                 sv_utf8_upgrade_flags_grow(sv,
4082                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4083                                            need);
4084                 d = SvPVX(sv) + SvCUR(sv);
4085             }
4086             d_is_utf8 = TRUE;
4087             goto default_action; /* Redo, having upgraded so both are UTF-8 */
4088         }
4089         else {  /* UTF8ness matters: convert this non-UTF8 source char to
4090                    UTF-8 for output.  It will occupy 2 bytes */
4091             if (d + 2 >= SvEND(sv)) {
4092                 const STRLEN extra = 2 + (send - s - 1) + 1;
4093                 const STRLEN off = d - SvPVX_const(sv);
4094                 d = off + SvGROW(sv, off + extra);
4095             }
4096             *d++ = UTF8_EIGHT_BIT_HI(*s);
4097             *d++ = UTF8_EIGHT_BIT_LO(*s);
4098             s++;
4099         }
4100     } /* while loop to process each character */
4101
4102     /* terminate the string and set up the sv */
4103     *d = '\0';
4104     SvCUR_set(sv, d - SvPVX_const(sv));
4105     if (SvCUR(sv) >= SvLEN(sv))
4106         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %" UVuf
4107                    " >= %" UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
4108
4109     SvPOK_on(sv);
4110     if (d_is_utf8) {
4111         SvUTF8_on(sv);
4112         if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
4113             PL_parser->lex_sub_op->op_private |=
4114                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
4115         }
4116     }
4117
4118     /* shrink the sv if we allocated more than we used */
4119     if (SvCUR(sv) + 5 < SvLEN(sv)) {
4120         SvPV_shrink_to_cur(sv);
4121     }
4122
4123     /* return the substring (via pl_yylval) only if we parsed anything */
4124     if (s > start) {
4125         char *s2 = start;
4126         for (; s2 < s; s2++) {
4127             if (*s2 == '\n')
4128                 COPLINE_INC_WITH_HERELINES;
4129         }
4130         SvREFCNT_inc_simple_void_NN(sv);
4131         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4132             && ! PL_parser->lex_re_reparsing)
4133         {
4134             const char *const key = PL_lex_inpat ? "qr" : "q";
4135             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4136             const char *type;
4137             STRLEN typelen;
4138
4139             if (PL_lex_inwhat == OP_TRANS) {
4140                 type = "tr";
4141                 typelen = 2;
4142             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4143                 type = "s";
4144                 typelen = 1;
4145             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4146                 type = "q";
4147                 typelen = 1;
4148             } else  {
4149                 type = "qq";
4150                 typelen = 2;
4151             }
4152
4153             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4154                                 type, typelen, NULL);
4155         }
4156         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4157     }
4158     LEAVE_with_name("scan_const");
4159     return s;
4160 }
4161
4162 /* S_intuit_more
4163  * Returns TRUE if there's more to the expression (e.g., a subscript),
4164  * FALSE otherwise.
4165  *
4166  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4167  *
4168  * ->[ and ->{ return TRUE
4169  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4170  * { and [ outside a pattern are always subscripts, so return TRUE
4171  * if we're outside a pattern and it's not { or [, then return FALSE
4172  * if we're in a pattern and the first char is a {
4173  *   {4,5} (any digits around the comma) returns FALSE
4174  * if we're in a pattern and the first char is a [
4175  *   [] returns FALSE
4176  *   [SOMETHING] has a funky algorithm to decide whether it's a
4177  *      character class or not.  It has to deal with things like
4178  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4179  * anything else returns TRUE
4180  */
4181
4182 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4183
4184 STATIC int
4185 S_intuit_more(pTHX_ char *s, char *e)
4186 {
4187     PERL_ARGS_ASSERT_INTUIT_MORE;
4188
4189     if (PL_lex_brackets)
4190         return TRUE;
4191     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4192         return TRUE;
4193     if (*s == '-' && s[1] == '>'
4194      && FEATURE_POSTDEREF_QQ_IS_ENABLED
4195      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4196         ||(s[2] == '@' && strchr("*[{",s[3])) ))
4197         return TRUE;
4198     if (*s != '{' && *s != '[')
4199         return FALSE;
4200     PL_parser->sub_no_recover = TRUE;
4201     if (!PL_lex_inpat)
4202         return TRUE;
4203
4204     /* In a pattern, so maybe we have {n,m}. */
4205     if (*s == '{') {
4206         if (regcurly(s)) {
4207             return FALSE;
4208         }
4209         return TRUE;
4210     }
4211
4212     /* On the other hand, maybe we have a character class */
4213
4214     s++;
4215     if (*s == ']' || *s == '^')
4216         return FALSE;
4217     else {
4218         /* this is terrifying, and it works */
4219         int weight;
4220         char seen[256];
4221         const char * const send = (char *) memchr(s, ']', e - s);
4222         unsigned char un_char, last_un_char;
4223         char tmpbuf[sizeof PL_tokenbuf * 4];
4224
4225         if (!send)              /* has to be an expression */
4226             return TRUE;
4227         weight = 2;             /* let's weigh the evidence */
4228
4229         if (*s == '$')
4230             weight -= 3;
4231         else if (isDIGIT(*s)) {
4232             if (s[1] != ']') {
4233                 if (isDIGIT(s[1]) && s[2] == ']')
4234                     weight -= 10;
4235             }
4236             else
4237                 weight -= 100;
4238         }
4239         Zero(seen,256,char);
4240         un_char = 255;
4241         for (; s < send; s++) {
4242             last_un_char = un_char;
4243             un_char = (unsigned char)*s;
4244             switch (*s) {
4245             case '@':
4246             case '&':
4247             case '$':
4248                 weight -= seen[un_char] * 10;
4249                 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4250                     int len;
4251                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4252                     len = (int)strlen(tmpbuf);
4253                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4254                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
4255                         weight -= 100;
4256                     else
4257                         weight -= 10;
4258                 }
4259                 else if (*s == '$'
4260                          && s[1]
4261                          && strchr("[#!%*<>()-=",s[1]))
4262                 {
4263                     if (/*{*/ strchr("])} =",s[2]))
4264                         weight -= 10;
4265                     else
4266                         weight -= 1;
4267                 }
4268                 break;
4269             case '\\':
4270                 un_char = 254;
4271                 if (s[1]) {
4272                     if (strchr("wds]",s[1]))
4273                         weight += 100;
4274                     else if (seen[(U8)'\''] || seen[(U8)'"'])
4275                         weight += 1;
4276                     else if (strchr("rnftbxcav",s[1]))
4277                         weight += 40;
4278                     else if (isDIGIT(s[1])) {
4279                         weight += 40;
4280                         while (s[1] && isDIGIT(s[1]))
4281                             s++;
4282                     }
4283                 }
4284                 else
4285                     weight += 100;
4286                 break;
4287             case '-':
4288                 if (s[1] == '\\')
4289                     weight += 50;
4290                 if (strchr("aA01! ",last_un_char))
4291                     weight += 30;
4292                 if (strchr("zZ79~",s[1]))
4293                     weight += 30;
4294                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4295                     weight -= 5;        /* cope with negative subscript */
4296                 break;
4297             default:
4298                 if (!isWORDCHAR(last_un_char)
4299                     && !(last_un_char == '$' || last_un_char == '@'
4300                          || last_un_char == '&')
4301                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4302                     char *d = s;
4303                     while (isALPHA(*s))
4304                         s++;
4305                     if (keyword(d, s - d, 0))
4306                         weight -= 150;
4307                 }
4308                 if (un_char == last_un_char + 1)
4309                     weight += 5;
4310                 weight -= seen[un_char];
4311                 break;
4312             }
4313             seen[un_char]++;
4314         }
4315         if (weight >= 0)        /* probably a character class */
4316             return FALSE;
4317     }
4318
4319     return TRUE;
4320 }
4321
4322 /*
4323  * S_intuit_method
4324  *
4325  * Does all the checking to disambiguate
4326  *   foo bar
4327  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4328  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4329  *
4330  * First argument is the stuff after the first token, e.g. "bar".
4331  *
4332  * Not a method if foo is a filehandle.
4333  * Not a method if foo is a subroutine prototyped to take a filehandle.
4334  * Not a method if it's really "Foo $bar"
4335  * Method if it's "foo $bar"
4336  * Not a method if it's really "print foo $bar"
4337  * Method if it's really "foo package::" (interpreted as package->foo)
4338  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4339  * Not a method if bar is a filehandle or package, but is quoted with
4340  *   =>
4341  */
4342
4343 STATIC int
4344 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4345 {
4346     char *s = start + (*start == '$');
4347     char tmpbuf[sizeof PL_tokenbuf];
4348     STRLEN len;
4349     GV* indirgv;
4350         /* Mustn't actually add anything to a symbol table.
4351            But also don't want to "initialise" any placeholder
4352            constants that might already be there into full
4353            blown PVGVs with attached PVCV.  */
4354     GV * const gv =
4355         ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4356
4357     PERL_ARGS_ASSERT_INTUIT_METHOD;
4358
4359     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4360             return 0;
4361     if (cv && SvPOK(cv)) {
4362         const char *proto = CvPROTO(cv);
4363         if (proto) {
4364             while (*proto && (isSPACE(*proto) || *proto == ';'))
4365                 proto++;
4366             if (*proto == '*')
4367                 return 0;
4368         }
4369     }
4370
4371     if (*start == '$') {
4372         SSize_t start_off = start - SvPVX(PL_linestr);
4373         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4374             || isUPPER(*PL_tokenbuf))
4375             return 0;
4376         /* this could be $# */
4377         if (isSPACE(*s))
4378             s = skipspace(s);
4379         PL_bufptr = SvPVX(PL_linestr) + start_off;
4380         PL_expect = XREF;
4381         return *s == '(' ? FUNCMETH : METHOD;
4382     }
4383
4384     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4385     /* start is the beginning of the possible filehandle/object,
4386      * and s is the end of it
4387      * tmpbuf is a copy of it (but with single quotes as double colons)
4388      */
4389
4390     if (!keyword(tmpbuf, len, 0)) {
4391         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4392             len -= 2;
4393             tmpbuf[len] = '\0';
4394             goto bare_package;
4395         }
4396         indirgv = gv_fetchpvn_flags(tmpbuf, len,
4397                                     GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4398                                     SVt_PVCV);
4399         if (indirgv && SvTYPE(indirgv) != SVt_NULL
4400          && (!isGV(indirgv) || GvCVu(indirgv)))
4401             return 0;
4402         /* filehandle or package name makes it a method */
4403         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4404             s = skipspace(s);
4405             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4406                 return 0;       /* no assumptions -- "=>" quotes bareword */
4407       bare_package:
4408             NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4409                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4410             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4411             PL_expect = XTERM;
4412             force_next(BAREWORD);
4413             PL_bufptr = s;
4414             return *s == '(' ? FUNCMETH : METHOD;
4415         }
4416     }
4417     return 0;
4418 }
4419
4420 /* Encoded script support. filter_add() effectively inserts a
4421  * 'pre-processing' function into the current source input stream.
4422  * Note that the filter function only applies to the current source file
4423  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4424  *
4425  * The datasv parameter (which may be NULL) can be used to pass
4426  * private data to this instance of the filter. The filter function
4427  * can recover the SV using the FILTER_DATA macro and use it to
4428  * store private buffers and state information.
4429  *
4430  * The supplied datasv parameter is upgraded to a PVIO type
4431  * and the IoDIRP/IoANY field is used to store the function pointer,
4432  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4433  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4434  * private use must be set using malloc'd pointers.
4435  */
4436
4437 SV *
4438 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4439 {
4440     if (!funcp)
4441         return NULL;
4442
4443     if (!PL_parser)
4444         return NULL;
4445
4446     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4447         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4448
4449     if (!PL_rsfp_filters)
4450         PL_rsfp_filters = newAV();
4451     if (!datasv)
4452         datasv = newSV(0);
4453     SvUPGRADE(datasv, SVt_PVIO);
4454     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4455     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4456     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4457                           FPTR2DPTR(void *, IoANY(datasv)),
4458                           SvPV_nolen(datasv)));
4459     av_unshift(PL_rsfp_filters, 1);
4460     av_store(PL_rsfp_filters, 0, datasv) ;
4461     if (
4462         !PL_parser->filtered
4463      && PL_parser->lex_flags & LEX_EVALBYTES
4464      && PL_bufptr < PL_bufend
4465     ) {
4466         const char *s = PL_bufptr;
4467         while (s < PL_bufend) {
4468             if (*s == '\n') {
4469                 SV *linestr = PL_parser->linestr;
4470                 char *buf = SvPVX(linestr);
4471                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4472                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4473                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4474                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4475                 STRLEN const last_uni_pos =
4476                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4477                 STRLEN const last_lop_pos =
4478                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4479                 av_push(PL_rsfp_filters, linestr);
4480                 PL_parser->linestr =
4481                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4482                 buf = SvPVX(PL_parser->linestr);
4483                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4484                 PL_parser->bufptr = buf + bufptr_pos;
4485                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4486                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4487                 PL_parser->linestart = buf + linestart_pos;
4488                 if (PL_parser->last_uni)
4489                     PL_parser->last_uni = buf + last_uni_pos;
4490                 if (PL_parser->last_lop)
4491                     PL_parser->last_lop = buf + last_lop_pos;
4492                 SvLEN_set(linestr, SvCUR(linestr));
4493                 SvCUR_set(linestr, s - SvPVX(linestr));
4494                 PL_parser->filtered = 1;
4495                 break;
4496             }
4497             s++;
4498         }
4499     }
4500     return(datasv);
4501 }
4502
4503
4504 /* Delete most recently added instance of this filter function. */
4505 void
4506 Perl_filter_del(pTHX_ filter_t funcp)
4507 {
4508     SV *datasv;
4509
4510     PERL_ARGS_ASSERT_FILTER_DEL;
4511
4512 #ifdef DEBUGGING
4513     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4514                           FPTR2DPTR(void*, funcp)));
4515 #endif
4516     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4517         return;
4518     /* if filter is on top of stack (usual case) just pop it off */
4519     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4520     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4521         sv_free(av_pop(PL_rsfp_filters));
4522
4523         return;
4524     }
4525     /* we need to search for the correct entry and clear it     */
4526     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4527 }
4528
4529
4530 /* Invoke the idxth filter function for the current rsfp.        */
4531 /* maxlen 0 = read one text line */
4532 I32
4533 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4534 {
4535     filter_t funcp;
4536     I32 ret;
4537     SV *datasv = NULL;
4538     /* This API is bad. It should have been using unsigned int for maxlen.
4539        Not sure if we want to change the API, but if not we should sanity
4540        check the value here.  */
4541     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4542
4543     PERL_ARGS_ASSERT_FILTER_READ;
4544
4545     if (!PL_parser || !PL_rsfp_filters)
4546         return -1;
4547     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4548         /* Provide a default input filter to make life easy.    */
4549         /* Note that we append to the line. This is handy.      */
4550         DEBUG_P(PerlIO_printf(Perl_debug_log,
4551                               "filter_read %d: from rsfp\n", idx));
4552         if (correct_length) {
4553             /* Want a block */
4554             int len ;
4555             const int old_len = SvCUR(buf_sv);
4556
4557             /* ensure buf_sv is large enough */
4558             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4559             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4560                                    correct_length)) <= 0) {
4561                 if (PerlIO_error(PL_rsfp))
4562                     return -1;          /* error */
4563                 else
4564                     return 0 ;          /* end of file */
4565             }
4566             SvCUR_set(buf_sv, old_len + len) ;
4567             SvPVX(buf_sv)[old_len + len] = '\0';
4568         } else {
4569             /* Want a line */
4570             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4571                 if (PerlIO_error(PL_rsfp))
4572                     return -1;          /* error */
4573                 else
4574                     return 0 ;          /* end of file */
4575             }
4576         }
4577         return SvCUR(buf_sv);
4578     }
4579     /* Skip this filter slot if filter has been deleted */
4580     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4581         DEBUG_P(PerlIO_printf(Perl_debug_log,
4582                               "filter_read %d: skipped (filter deleted)\n",
4583                               idx));
4584         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4585     }
4586     if (SvTYPE(datasv) != SVt_PVIO) {
4587         if (correct_length) {
4588             /* Want a block */
4589             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4590             if (!remainder) return 0; /* eof */
4591             if (correct_length > remainder) correct_length = remainder;
4592             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4593             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4594         } else {
4595             /* Want a line */
4596             const char *s = SvEND(datasv);
4597             const char *send = SvPVX(datasv) + SvLEN(datasv);
4598             while (s < send) {
4599                 if (*s == '\n') {
4600                     s++;
4601                     break;
4602                 }
4603                 s++;
4604             }
4605             if (s == send) return 0; /* eof */
4606             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4607             SvCUR_set(datasv, s-SvPVX(datasv));
4608         }
4609         return SvCUR(buf_sv);
4610     }
4611     /* Get function pointer hidden within datasv        */
4612     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4613     DEBUG_P(PerlIO_printf(Perl_debug_log,
4614                           "filter_read %d: via function %p (%s)\n",
4615                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4616     /* Call function. The function is expected to       */
4617     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4618     /* Return: <0:error, =0:eof, >0:not eof             */
4619     ENTER;
4620     save_scalar(PL_errgv);
4621     ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4622     LEAVE;
4623     return ret;
4624 }
4625
4626 STATIC char *
4627 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4628 {
4629     PERL_ARGS_ASSERT_FILTER_GETS;
4630
4631 #ifdef PERL_CR_FILTER
4632     if (!PL_rsfp_filters) {
4633         filter_add(S_cr_textfilter,NULL);
4634     }
4635 #endif
4636     if (PL_rsfp_filters) {
4637         if (!append)
4638             SvCUR_set(sv, 0);   /* start with empty line        */
4639         if (FILTER_READ(0, sv, 0) > 0)
4640             return ( SvPVX(sv) ) ;
4641         else
4642             return NULL ;
4643     }
4644     else
4645         return (sv_gets(sv, PL_rsfp, append));
4646 }
4647
4648 STATIC HV *
4649 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4650 {
4651     GV *gv;
4652
4653     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4654
4655     if (memEQs(pkgname, len, "__PACKAGE__"))
4656         return PL_curstash;
4657
4658     if (len > 2
4659         && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4660         && (gv = gv_fetchpvn_flags(pkgname,
4661                                    len,
4662                                    ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4663     {
4664         return GvHV(gv);                        /* Foo:: */
4665     }
4666
4667     /* use constant CLASS => 'MyClass' */
4668     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4669     if (gv && GvCV(gv)) {
4670         SV * const sv = cv_const_sv(GvCV(gv));
4671         if (sv)
4672             return gv_stashsv(sv, 0);
4673     }
4674
4675     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4676 }
4677
4678
4679 STATIC char *
4680 S_tokenize_use(pTHX_ int is_use, char *s) {
4681     PERL_ARGS_ASSERT_TOKENIZE_USE;
4682
4683     if (PL_expect != XSTATE)
4684         /* diag_listed_as: "use" not allowed in expression */
4685         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4686                     is_use ? "use" : "no"));
4687     PL_expect = XTERM;
4688     s = skipspace(s);
4689     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4690         s = force_version(s, TRUE);
4691         if (*s == ';' || *s == '}'
4692                 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4693             NEXTVAL_NEXTTOKE.opval = NULL;
4694             force_next(BAREWORD);
4695         }
4696         else if (*s == 'v') {
4697             s = force_word(s,BAREWORD,FALSE,TRUE);
4698             s = force_version(s, FALSE);
4699         }
4700     }
4701     else {
4702         s = force_word(s,BAREWORD,FALSE,TRUE);
4703         s = force_version(s, FALSE);
4704     }
4705     pl_yylval.ival = is_use;
4706     return s;
4707 }
4708 #ifdef DEBUGGING
4709     static const char* const exp_name[] =
4710         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4711           "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4712           "SIGVAR", "TERMORDORDOR"
4713         };
4714 #endif
4715
4716 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4717 STATIC bool
4718 S_word_takes_any_delimiter(char *p, STRLEN len)
4719 {
4720     return (len == 1 && strchr("msyq", p[0]))
4721             || (len == 2
4722                 && ((p[0] == 't' && p[1] == 'r')
4723                     || (p[0] == 'q' && strchr("qwxr", p[1]))));
4724 }
4725
4726 static void
4727 S_check_scalar_slice(pTHX_ char *s)
4728 {
4729     s++;
4730     while (SPACE_OR_TAB(*s)) s++;
4731     if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4732                                                              PL_bufend,
4733                                                              UTF))
4734     {
4735         return;
4736     }
4737     while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4738            || (*s && strchr(" \t$#+-'\"", *s)))
4739     {
4740         s += UTF ? UTF8SKIP(s) : 1;
4741     }
4742     if (*s == '}' || *s == ']')
4743         pl_yylval.ival = OPpSLICEWARNING;
4744 }
4745
4746 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4747 static void
4748 S_lex_token_boundary(pTHX)
4749 {
4750     PL_oldoldbufptr = PL_oldbufptr;
4751     PL_oldbufptr = PL_bufptr;
4752 }
4753
4754 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
4755 static char *
4756 S_vcs_conflict_marker(pTHX_ char *s)
4757 {
4758     lex_token_boundary();
4759     PL_bufptr = s;
4760     yyerror("Version control conflict marker");
4761     while (s < PL_bufend && *s != '\n')
4762         s++;
4763     return s;
4764 }
4765
4766 /*
4767   yylex
4768
4769   Works out what to call the token just pulled out of the input
4770   stream.  The yacc parser takes care of taking the ops we return and
4771   stitching them into a tree.
4772
4773   Returns:
4774     The type of the next token
4775
4776   Structure:
4777       Check if we have already built the token; if so, use it.
4778       Switch based on the current state:
4779           - if we have a case modifier in a string, deal with that
4780           - handle other cases of interpolation inside a string
4781           - scan the next line if we are inside a format
4782       In the normal state, switch on the next character:
4783           - default:
4784             if alphabetic, go to key lookup
4785             unrecognized character - croak
4786           - 0/4/26: handle end-of-line or EOF
4787           - cases for whitespace
4788           - \n and #: handle comments and line numbers
4789           - various operators, brackets and sigils
4790           - numbers
4791           - quotes
4792           - 'v': vstrings (or go to key lookup)
4793           - 'x' repetition operator (or go to key lookup)
4794           - other ASCII alphanumerics (key lookup begins here):
4795               word before => ?
4796               keyword plugin
4797               scan built-in keyword (but do nothing with it yet)
4798               check for statement label
4799               check for lexical subs
4800                   goto just_a_word if there is one
4801               see whether built-in keyword is overridden
4802               switch on keyword number:
4803                   - default: just_a_word:
4804                       not a built-in keyword; handle bareword lookup
4805                       disambiguate between method and sub call
4806                       fall back to bareword
4807                   - cases for built-in keywords
4808 */
4809
4810
4811 int
4812 Perl_yylex(pTHX)
4813 {
4814     dVAR;
4815     char *s = PL_bufptr;
4816     char *d;
4817     STRLEN len;
4818     bool bof = FALSE;
4819     const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4820     U8 formbrack = 0;
4821     U32 fake_eof = 0;
4822
4823     /* orig_keyword, gvp, and gv are initialized here because
4824      * jump to the label just_a_word_zero can bypass their
4825      * initialization later. */
4826     I32 orig_keyword = 0;
4827     GV *gv = NULL;
4828     GV **gvp = NULL;
4829
4830     if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
4831         const U8* first_bad_char_loc;
4832         if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
4833                                                         PL_bufend - PL_bufptr,
4834                                                         &first_bad_char_loc)))
4835         {
4836             _force_out_malformed_utf8_message(first_bad_char_loc,
4837                                               (U8 *) PL_bufend,
4838                                               0,
4839                                               1 /* 1 means die */ );
4840             NOT_REACHED; /* NOTREACHED */
4841         }
4842         PL_parser->recheck_utf8_validity = FALSE;
4843     }
4844     DEBUG_T( {
4845         SV* tmp = newSVpvs("");
4846         PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
4847             (IV)CopLINE(PL_curcop),
4848             lex_state_names[PL_lex_state],
4849             exp_name[PL_expect],
4850             pv_display(tmp, s, strlen(s), 0, 60));
4851         SvREFCNT_dec(tmp);
4852     } );
4853
4854     /* when we've already built the next token, just pull it out of the queue */
4855     if (PL_nexttoke) {
4856         PL_nexttoke--;
4857         pl_yylval = PL_nextval[PL_nexttoke];
4858         {
4859             I32 next_type;
4860             next_type = PL_nexttype[PL_nexttoke];
4861             if (next_type & (7<<24)) {
4862                 if (next_type & (1<<24)) {
4863                     if (PL_lex_brackets > 100)
4864                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4865                     PL_lex_brackstack[PL_lex_brackets++] =
4866                         (char) ((next_type >> 16) & 0xff);
4867                 }
4868                 if (next_type & (2<<24))
4869                     PL_lex_allbrackets++;
4870                 if (next_type & (4<<24))
4871                     PL_lex_allbrackets--;
4872                 next_type &= 0xffff;
4873             }
4874             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4875         }
4876     }
4877
4878     switch (PL_lex_state) {
4879     case LEX_NORMAL:
4880     case LEX_INTERPNORMAL:
4881         break;
4882
4883     /* interpolated case modifiers like \L \U, including \Q and \E.
4884        when we get here, PL_bufptr is at the \
4885     */
4886     case LEX_INTERPCASEMOD:
4887 #ifdef DEBUGGING
4888         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4889             Perl_croak(aTHX_
4890                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4891                        PL_bufptr, PL_bufend, *PL_bufptr);
4892 #endif
4893         /* handle \E or end of string */
4894         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4895             /* if at a \E */
4896             if (PL_lex_casemods) {
4897                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4898                 PL_lex_casestack[PL_lex_casemods] = '\0';
4899
4900                 if (PL_bufptr != PL_bufend
4901                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4902                         || oldmod == 'F')) {
4903                     PL_bufptr += 2;
4904                     PL_lex_state = LEX_INTERPCONCAT;
4905                 }
4906                 PL_lex_allbrackets--;
4907                 return REPORT(')');
4908             }
4909             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4910                /* Got an unpaired \E */
4911                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4912                         "Useless use of \\E");
4913             }
4914             if (PL_bufptr != PL_bufend)
4915                 PL_bufptr += 2;
4916             PL_lex_state = LEX_INTERPCONCAT;
4917             return yylex();
4918         }
4919         else {
4920             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4921               "### Saw case modifier\n"); });
4922             s = PL_bufptr + 1;
4923             if (s[1] == '\\' && s[2] == 'E') {
4924                 PL_bufptr = s + 3;
4925                 PL_lex_state = LEX_INTERPCONCAT;
4926                 return yylex();
4927             }
4928             else {
4929                 I32 tmp;
4930                 if (   memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
4931                     || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
4932                 {
4933                     tmp = *s, *s = s[2], s[2] = (char)tmp;      /* misordered... */
4934                 }
4935                 if ((*s == 'L' || *s == 'U' || *s == 'F')
4936                     && (strpbrk(PL_lex_casestack, "LUF")))
4937                 {
4938                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4939                     PL_lex_allbrackets--;
4940                     return REPORT(')');
4941                 }
4942                 if (PL_lex_casemods > 10)
4943                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4944                 PL_lex_casestack[PL_lex_casemods++] = *s;
4945                 PL_lex_casestack[PL_lex_casemods] = '\0';
4946                 PL_lex_state = LEX_INTERPCONCAT;
4947                 NEXTVAL_NEXTTOKE.ival = 0;
4948                 force_next((2<<24)|'(');
4949                 if (*s == 'l')
4950                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4951                 else if (*s == 'u')
4952                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4953                 else if (*s == 'L')
4954                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4955                 else if (*s == 'U')
4956                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4957                 else if (*s == 'Q')
4958                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4959                 else if (*s == 'F')
4960                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4961                 else
4962                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4963                 PL_bufptr = s + 1;
4964             }
4965             force_next(FUNC);
4966             if (PL_lex_starts) {
4967                 s = PL_bufptr;
4968                 PL_lex_starts = 0;
4969                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4970                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4971                     TOKEN(',');
4972                 else
4973                     AopNOASSIGN(OP_CONCAT);
4974             }
4975             else
4976                 return yylex();
4977         }
4978
4979     case LEX_INTERPPUSH:
4980         return REPORT(sublex_push());
4981
4982     case LEX_INTERPSTART:
4983         if (PL_bufptr == PL_bufend)
4984             return REPORT(sublex_done());
4985         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4986               "### Interpolated variable\n"); });
4987         PL_expect = XTERM;
4988         /* for /@a/, we leave the joining for the regex engine to do
4989          * (unless we're within \Q etc) */
4990         PL_lex_dojoin = (*PL_bufptr == '@'
4991                             && (!PL_lex_inpat || PL_lex_casemods));
4992         PL_lex_state = LEX_INTERPNORMAL;
4993         if (PL_lex_dojoin) {
4994             NEXTVAL_NEXTTOKE.ival = 0;
4995             force_next(',');
4996             force_ident("\"", '$');
4997             NEXTVAL_NEXTTOKE.ival = 0;
4998             force_next('$');
4999             NEXTVAL_NEXTTOKE.ival = 0;
5000             force_next((2<<24)|'(');
5001             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
5002             force_next(FUNC);
5003         }
5004         /* Convert (?{...}) and friends to 'do {...}' */
5005         if (PL_lex_inpat && *PL_bufptr == '(') {
5006             PL_parser->lex_shared->re_eval_start = PL_bufptr;
5007             PL_bufptr += 2;
5008             if (*PL_bufptr != '{')
5009                 PL_bufptr++;
5010             PL_expect = XTERMBLOCK;
5011             force_next(DO);
5012         }
5013
5014         if (PL_lex_starts++) {
5015             s = PL_bufptr;
5016             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5017             if (!PL_lex_casemods && PL_lex_inpat)
5018                 TOKEN(',');
5019             else
5020                 AopNOASSIGN(OP_CONCAT);
5021         }
5022         return yylex();
5023
5024     case LEX_INTERPENDMAYBE:
5025         if (intuit_more(PL_bufptr, PL_bufend)) {
5026             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
5027             break;
5028         }
5029         /* FALLTHROUGH */
5030
5031     case LEX_INTERPEND:
5032         if (PL_lex_dojoin) {
5033             const U8 dojoin_was = PL_lex_dojoin;
5034             PL_lex_dojoin = FALSE;
5035             PL_lex_state = LEX_INTERPCONCAT;
5036             PL_lex_allbrackets--;
5037             return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN);
5038         }
5039         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
5040             && SvEVALED(PL_lex_repl))
5041         {
5042             if (PL_bufptr != PL_bufend)
5043                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
5044             PL_lex_repl = NULL;
5045         }
5046         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
5047            re_eval_str.  If the here-doc body’s length equals the previous
5048            value of re_eval_start, re_eval_start will now be null.  So
5049            check re_eval_str as well. */
5050         if (PL_parser->lex_shared->re_eval_start
5051          || PL_parser->lex_shared->re_eval_str) {
5052             SV *sv;
5053             if (*PL_bufptr != ')')
5054                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5055             PL_bufptr++;
5056             /* having compiled a (?{..}) expression, return the original
5057              * text too, as a const */
5058             if (PL_parser->lex_shared->re_eval_str) {
5059                 sv = PL_parser->lex_shared->re_eval_str;
5060                 PL_parser->lex_shared->re_eval_str = NULL;
5061                 SvCUR_set(sv,
5062                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
5063                 SvPV_shrink_to_cur(sv);
5064             }
5065             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5066                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
5067             NEXTVAL_NEXTTOKE.opval =
5068                     newSVOP(OP_CONST, 0,
5069                                  sv);
5070             force_next(THING);
5071             PL_parser->lex_shared->re_eval_start = NULL;
5072             PL_expect = XTERM;
5073             return REPORT(',');
5074         }
5075
5076         /* FALLTHROUGH */
5077     case LEX_INTERPCONCAT:
5078 #ifdef DEBUGGING
5079         if (PL_lex_brackets)
5080             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5081                        (long) PL_lex_brackets);
5082 #endif
5083         if (PL_bufptr == PL_bufend)
5084             return REPORT(sublex_done());
5085
5086         /* m'foo' still needs to be parsed for possible (?{...}) */
5087         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5088             SV *sv = newSVsv(PL_linestr);
5089             sv = tokeq(sv);
5090             pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
5091             s = PL_bufend;
5092         }
5093         else {
5094             int save_error_count = PL_error_count;
5095
5096             s = scan_const(PL_bufptr);
5097
5098             /* Set flag if this was a pattern and there were errors.  op.c will
5099              * refuse to compile a pattern with this flag set.  Otherwise, we
5100              * could get segfaults, etc. */
5101             if (PL_lex_inpat && PL_error_count > save_error_count) {
5102                 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
5103             }
5104             if (*s == '\\')
5105                 PL_lex_state = LEX_INTERPCASEMOD;
5106             else
5107                 PL_lex_state = LEX_INTERPSTART;
5108         }
5109
5110         if (s != PL_bufptr) {
5111             NEXTVAL_NEXTTOKE = pl_yylval;
5112             PL_expect = XTERM;
5113             force_next(THING);
5114             if (PL_lex_starts++) {
5115                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5116                 if (!PL_lex_casemods && PL_lex_inpat)
5117                     TOKEN(',');
5118                 else
5119                     AopNOASSIGN(OP_CONCAT);
5120             }
5121             else {
5122                 PL_bufptr = s;
5123                 return yylex();
5124             }
5125         }
5126
5127         return yylex();
5128     case LEX_FORMLINE:
5129         if (PL_parser->sub_error_count != PL_error_count) {
5130             /* There was an error parsing a formline, which tends to
5131                mess up the parser.
5132                Unlike interpolated sub-parsing, we can't treat any of
5133                these as recoverable, so no need to check sub_no_recover.
5134             */
5135             yyquit();
5136         }
5137         assert(PL_lex_formbrack);
5138         s = scan_formline(PL_bufptr);
5139         if (!PL_lex_formbrack)
5140         {
5141             formbrack = 1;
5142             goto rightbracket;
5143         }
5144         PL_bufptr = s;
5145         return yylex();
5146     }
5147
5148     /* We really do *not* want PL_linestr ever becoming a COW. */
5149     assert (!SvIsCOW(PL_linestr));
5150     s = PL_bufptr;
5151     PL_oldoldbufptr = PL_oldbufptr;
5152     PL_oldbufptr = s;
5153     PL_parser->saw_infix_sigil = 0;
5154
5155     if (PL_in_my == KEY_sigvar) {
5156         /* we expect the sigil and optional var name part of a
5157          * signature element here. Since a '$' is not necessarily
5158          * followed by a var name, handle it specially here; the general
5159          * yylex code would otherwise try to interpret whatever follows
5160          * as a var; e.g. ($, ...) would be seen as the var '$,'
5161          */
5162
5163         U8 sigil;
5164
5165         s = skipspace(s);
5166         sigil = *s++;
5167         PL_bufptr = s; /* for error reporting */
5168         switch (sigil) {
5169         case '$':
5170         case '@':
5171         case '%':
5172             /* spot stuff that looks like an prototype */
5173             if (strchr("$:@%&*;\\[]", *s)) {
5174                 yyerror("Illegal character following sigil in a subroutine signature");
5175                 break;
5176             }
5177             /* '$#' is banned, while '$ # comment' isn't */
5178             if (*s == '#') {
5179                 yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
5180                 break;
5181             }
5182             s = skipspace(s);
5183             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5184                 char *dest = PL_tokenbuf + 1;
5185                 /* read var name, including sigil, into PL_tokenbuf */
5186                 PL_tokenbuf[0] = sigil;
5187                 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
5188                     0, cBOOL(UTF), FALSE, FALSE);
5189                 *dest = '\0';
5190                 assert(PL_tokenbuf[1]); /* we have a variable name */
5191             }
5192             else {
5193                 *PL_tokenbuf = 0;
5194                 PL_in_my = 0;
5195             }
5196
5197             s = skipspace(s);
5198             /* parse the = for the default ourselves to avoid '+=' etc being accepted here
5199              * as the ASSIGNOP, and exclude other tokens that start with =
5200              */
5201             if (*s == '=' && (!s[1] || strchr("=~>", s[1]) == 0)) {
5202                 /* save now to report with the same context as we did when
5203                  * all ASSIGNOPS were accepted */
5204                 PL_oldbufptr = s;
5205
5206                 ++s;
5207                 NEXTVAL_NEXTTOKE.ival = 0;
5208                 force_next(ASSIGNOP);
5209                 PL_expect = XTERM;
5210             }
5211             else if (*s == ',' || *s == ')') {
5212                 PL_expect = XOPERATOR;
5213             }
5214             else {
5215                 /* make sure the context shows the unexpected character and
5216                  * hopefully a bit more */
5217                 if (*s) ++s;
5218                 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5219                     s++;
5220                 PL_bufptr = s; /* for error reporting */
5221                 yyerror("Illegal operator following parameter in a subroutine signature");
5222                 PL_in_my = 0;
5223             }
5224             if (*PL_tokenbuf) {
5225                 NEXTVAL_NEXTTOKE.ival = sigil;
5226                 force_next('p'); /* force a signature pending identifier */
5227             }
5228             break;
5229
5230         case ')':
5231             PL_expect = XBLOCK;
5232             break;
5233         case ',': /* handle ($a,,$b) */
5234             break;
5235
5236         default:
5237             PL_in_my = 0;
5238             yyerror("A signature parameter must start with '$', '@' or '%'");
5239             /* very crude error recovery: skip to likely next signature
5240              * element */
5241             while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5242                 s++;
5243             break;
5244         }
5245         TOKEN(sigil);
5246     }
5247
5248   retry:
5249     switch (*s) {
5250     default:
5251         if (UTF) {
5252             if (isIDFIRST_utf8_safe(s, PL_bufend)) {
5253                 goto keylookup;
5254             }
5255         }
5256         else if (isALNUMC(*s)) {
5257             goto keylookup;
5258         }
5259     {
5260         SV *dsv = newSVpvs_flags("", SVs_TEMP);
5261         const char *c;
5262         if (UTF) {
5263             STRLEN skiplen = UTF8SKIP(s);
5264             STRLEN stravail = PL_bufend - s;
5265             c = sv_uni_display(dsv, newSVpvn_flags(s,
5266                                                    skiplen > stravail ? stravail : skiplen,
5267                                                    SVs_TEMP | SVf_UTF8),
5268                                10, UNI_DISPLAY_ISPRINT);
5269         }
5270         else {
5271             c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5272         }
5273
5274         if (s >= PL_linestart) {
5275             d = PL_linestart;
5276         }
5277         else {
5278             /* somehow (probably due to a parse failure), PL_linestart has advanced
5279              * pass PL_bufptr, get a reasonable beginning of line
5280              */
5281             d = s;
5282             while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
5283                 --d;
5284         }
5285         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
5286         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5287             d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
5288         }
5289
5290         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
5291                           UTF8fARG(UTF, (s - d), d),
5292                          (int) len + 1);
5293     }
5294     case 4:
5295     case 26:
5296         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
5297     case 0:
5298         if ((!PL_rsfp || PL_lex_inwhat)
5299          && (!PL_parser->filtered || s+1 < PL_bufend)) {
5300             PL_last_uni = 0;
5301             PL_last_lop = 0;
5302             if (PL_lex_brackets
5303                 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
5304             {
5305                 yyerror((const char *)
5306                         (PL_lex_formbrack
5307                          ? "Format not terminated"
5308                          : "Missing right curly or square bracket"));
5309             }
5310             DEBUG_T( { PerlIO_printf(Perl_debug_log,
5311                         "### Tokener got EOF\n");
5312             } );
5313             TOKEN(0);
5314         }
5315         if (s++ < PL_bufend)
5316             goto retry;                 /* ignore stray nulls */
5317         PL_last_uni = 0;
5318         PL_last_lop = 0;
5319         if (!PL_in_eval && !PL_preambled) {
5320             PL_preambled = TRUE;
5321             if (PL_perldb) {
5322                 /* Generate a string of Perl code to load the debugger.
5323                  * If PERL5DB is set, it will return the contents of that,
5324                  * otherwise a compile-time require of perl5db.pl.  */
5325
5326                 const char * const pdb = PerlEnv_getenv("PERL5DB");
5327
5328                 if (pdb) {
5329                     sv_setpv(PL_linestr, pdb);
5330                     sv_catpvs(PL_linestr,";");
5331                 } else {
5332                     SETERRNO(0,SS_NORMAL);
5333                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5334                 }
5335                 PL_parser->preambling = CopLINE(PL_curcop);
5336             } else
5337                 SvPVCLEAR(PL_linestr);
5338             if (PL_preambleav) {
5339                 SV **svp = AvARRAY(PL_preambleav);
5340                 SV **const end = svp + AvFILLp(PL_preambleav);
5341                 while(svp <= end) {
5342                     sv_catsv(PL_linestr, *svp);
5343                     ++svp;
5344                     sv_catpvs(PL_linestr, ";");
5345                 }
5346                 sv_free(MUTABLE_SV(PL_preambleav));
5347                 PL_preambleav = NULL;
5348             }
5349             if (PL_minus_E)
5350                 sv_catpvs(PL_linestr,
5351                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5352             if (PL_minus_n || PL_minus_p) {
5353                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5354                 if (PL_minus_l)
5355                     sv_catpvs(PL_linestr,"chomp;");
5356                 if (PL_minus_a) {
5357                     if (PL_minus_F) {
5358                         if (   (   *PL_splitstr == '/'
5359                                 || *PL_splitstr == '\''
5360                                 || *PL_splitstr == '"')
5361                             && strchr(PL_splitstr + 1, *PL_splitstr))
5362                         {
5363                             /* strchr is ok, because -F pattern can't contain
5364                              * embeddded NULs */
5365                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5366                         }
5367                         else {
5368                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5369                                bytes can be used as quoting characters.  :-) */
5370                             const char *splits = PL_splitstr;
5371                             sv_catpvs(PL_linestr, "our @F=split(q\0");
5372                             do {
5373                                 /* Need to \ \s  */
5374                                 if (*splits == '\\')
5375                                     sv_catpvn(PL_linestr, splits, 1);
5376                                 sv_catpvn(PL_linestr, splits, 1);
5377                             } while (*splits++);
5378                             /* This loop will embed the trailing NUL of
5379                                PL_linestr as the last thing it does before
5380                                terminating.  */
5381                             sv_catpvs(PL_linestr, ");");
5382                         }
5383                     }
5384                     else
5385                         sv_catpvs(PL_linestr,"our @F=split(' ');");
5386                 }
5387             }
5388             sv_catpvs(PL_linestr, "\n");
5389             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5390             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5391             PL_last_lop = PL_last_uni = NULL;
5392             if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
5393                 update_debugger_info(PL_linestr, NULL, 0);
5394             goto retry;
5395         }
5396         do {
5397             fake_eof = 0;
5398             bof = cBOOL(PL_rsfp);
5399             if (0) {
5400               fake_eof:
5401                 fake_eof = LEX_FAKE_EOF;
5402             }
5403             PL_bufptr = PL_bufend;
5404             COPLINE_INC_WITH_HERELINES;
5405             if (!lex_next_chunk(fake_eof)) {
5406                 CopLINE_dec(PL_curcop);
5407                 s = PL_bufptr;
5408                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
5409             }
5410             CopLINE_dec(PL_curcop);
5411             s = PL_bufptr;
5412             /* If it looks like the start of a BOM or raw UTF-16,
5413              * check if it in fact is. */
5414             if (bof && PL_rsfp
5415                 && (   *s == 0
5416                     || *(U8*)s == BOM_UTF8_FIRST_BYTE
5417                     || *(U8*)s >= 0xFE
5418                     || s[1] == 0))
5419             {
5420                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5421                 bof = (offset == (Off_t)SvCUR(PL_linestr));
5422 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5423                 /* offset may include swallowed CR */
5424                 if (!bof)
5425                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5426 #endif
5427                 if (bof) {
5428                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5429                     s = swallow_bom((U8*)s);
5430                 }
5431             }
5432             if (PL_parser->in_pod) {
5433                 /* Incest with pod. */
5434                 if (    memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
5435                     && !isALPHA(s[4]))
5436                 {
5437                     SvPVCLEAR(PL_linestr);
5438                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5439                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5440                     PL_last_lop = PL_last_uni = NULL;
5441                     PL_parser->in_pod = 0;
5442                 }
5443             }
5444             if (PL_rsfp || PL_parser->filtered)
5445                 incline(s, PL_bufend);
5446         } while (PL_parser->in_pod);
5447         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5448         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5449         PL_last_lop = PL_last_uni = NULL;
5450         if (CopLINE(PL_curcop) == 1) {
5451             while (s < PL_bufend && isSPACE(*s))
5452                 s++;
5453             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5454                 s++;
5455             d = NULL;
5456             if (!PL_in_eval) {
5457                 if (*s == '#' && *(s+1) == '!')
5458                     d = s + 2;
5459 #ifdef ALTERNATE_SHEBANG
5460                 else {
5461                     static char const as[] = ALTERNATE_SHEBANG;
5462                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5463                         d = s + (sizeof(as) - 1);
5464                 }
5465 #endif /* ALTERNATE_SHEBANG */
5466             }
5467             if (d) {
5468                 char *ipath;
5469                 char *ipathend;
5470
5471                 while (isSPACE(*d))
5472                     d++;
5473                 ipath = d;
5474                 while (*d && !isSPACE(*d))
5475                     d++;
5476                 ipathend = d;
5477
5478 #ifdef ARG_ZERO_IS_SCRIPT
5479                 if (ipathend > ipath) {
5480                     /*
5481                      * HP-UX (at least) sets argv[0] to the script name,
5482                      * which makes $^X incorrect.  And Digital UNIX and Linux,
5483                      * at least, set argv[0] to the basename of the Perl
5484                      * interpreter. So, having found "#!", we'll set it right.
5485                      */
5486                     SV* copfilesv = CopFILESV(PL_curcop);
5487                     if (copfilesv) {
5488                         SV * const x =
5489                             GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5490                                              SVt_PV)); /* $^X */
5491                         assert(SvPOK(x) || SvGMAGICAL(x));
5492                         if (sv_eq(x, copfilesv)) {
5493                             sv_setpvn(x, ipath, ipathend - ipath);
5494                             SvSETMAGIC(x);
5495                         }
5496                         else {
5497                             STRLEN blen;
5498                             STRLEN llen;
5499                             const char *bstart = SvPV_const(copfilesv, blen);
5500                             const char * const lstart = SvPV_const(x, llen);
5501                             if (llen < blen) {
5502                                 bstart += blen - llen;
5503                                 if (strnEQ(bstart, lstart, llen) &&     bstart[-1] == '/') {
5504                                     sv_setpvn(x, ipath, ipathend - ipath);
5505                                     SvSETMAGIC(x);
5506                                 }
5507                             }
5508                         }
5509                     }
5510                     else {
5511                         /* Anything to do if no copfilesv? */
5512                     }
5513                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
5514                 }
5515 #endif /* ARG_ZERO_IS_SCRIPT */
5516
5517                 /*
5518                  * Look for options.
5519                  */
5520                 d = instr(s,"perl -");
5521                 if (!d) {
5522                     d = instr(s,"perl");
5523 #if defined(DOSISH)
5524                     /* avoid getting into infinite loops when shebang
5525                      * line contains "Perl" rather than "perl" */
5526                     if (!d) {
5527                         for (d = ipathend-4; d >= ipath; --d) {
5528                             if (isALPHA_FOLD_EQ(*d, 'p')
5529                                 && !ibcmp(d, "perl", 4))
5530                             {
5531                                 break;
5532                             }
5533                         }
5534                         if (d < ipath)
5535                             d = NULL;
5536                     }
5537 #endif
5538                 }
5539 #ifdef ALTERNATE_SHEBANG
5540                 /*
5541                  * If the ALTERNATE_SHEBANG on this system starts with a
5542                  * character that can be part of a Perl expression, then if
5543                  * we see it but not "perl", we're probably looking at the
5544                  * start of Perl code, not a request to hand off to some
5545                  * other interpreter.  Similarly, if "perl" is there, but
5546                  * not in the first 'word' of the line, we assume the line
5547                  * contains the start of the Perl program.
5548                  */
5549                 if (d && *s != '#') {
5550                     const char *c = ipath;
5551                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5552                         c++;
5553                     if (c < d)
5554                         d = NULL;       /* "perl" not in first word; ignore */
5555                     else
5556                         *s = '#';       /* Don't try to parse shebang line */
5557                 }
5558 #endif /* ALTERNATE_SHEBANG */
5559                 if (!d
5560                     && *s == '#'
5561                     && ipathend > ipath
5562                     && !PL_minus_c
5563                     && !instr(s,"indir")
5564                     && instr(PL_origargv[0],"perl"))
5565                 {
5566                     dVAR;
5567                     char **newargv;
5568
5569                     *ipathend = '\0';
5570                     s = ipathend + 1;
5571                     while (s < PL_bufend && isSPACE(*s))
5572                         s++;
5573                     if (s < PL_bufend) {
5574                         Newx(newargv,PL_origargc+3,char*);
5575                         newargv[1] = s;
5576                         while (s < PL_bufend && !isSPACE(*s))
5577                             s++;
5578                         *s = '\0';
5579                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5580                     }
5581                     else
5582                         newargv = PL_origargv;
5583                     newargv[0] = ipath;
5584                     PERL_FPU_PRE_EXEC
5585                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5586                     PERL_FPU_POST_EXEC
5587                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5588                 }
5589                 if (d) {
5590                     while (*d && !isSPACE(*d))
5591                         d++;
5592                     while (SPACE_OR_TAB(*d))
5593                         d++;
5594
5595                     if (*d++ == '-') {
5596                         const bool switches_done = PL_doswitches;
5597                         const U32 oldpdb = PL_perldb;
5598                         const bool oldn = PL_minus_n;
5599                         const bool oldp = PL_minus_p;
5600                         const char *d1 = d;
5601
5602                         do {
5603                             bool baduni = FALSE;
5604                             if (*d1 == 'C') {
5605                                 const char *d2 = d1 + 1;
5606                                 if (parse_unicode_opts((const char **)&d2)
5607                                     != PL_unicode)
5608                                     baduni = TRUE;
5609                             }
5610                             if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
5611                                 const char * const m = d1;
5612                                 while (*d1 && !isSPACE(*d1))
5613                                     d1++;
5614                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5615                                       (int)(d1 - m), m);
5616                             }
5617                             d1 = moreswitches(d1);
5618                         } while (d1);
5619                         if (PL_doswitches && !switches_done) {
5620                             int argc = PL_origargc;
5621                             char **argv = PL_origargv;
5622                             do {
5623                                 argc--,argv++;
5624                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5625                             init_argv_symbols(argc,argv);
5626                         }
5627                         if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
5628                             || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5629                               /* if we have already added "LINE: while (<>) {",
5630                                  we must not do it again */
5631                         {
5632                             SvPVCLEAR(PL_linestr);
5633                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5634                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5635                             PL_last_lop = PL_last_uni = NULL;
5636                             PL_preambled = FALSE;
5637                             if (PERLDB_LINE_OR_SAVESRC)
5638                                 (void)gv_fetchfile(PL_origfilename);
5639                             goto retry;
5640                         }
5641                     }
5642                 }
5643             }
5644         }
5645         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5646             PL_lex_state = LEX_FORMLINE;
5647             force_next(FORMRBRACK);
5648             TOKEN(';');
5649         }
5650         goto retry;
5651     case '\r':
5652 #ifdef PERL_STRICT_CR
5653         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5654         Perl_croak(aTHX_
5655       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5656 #endif
5657     case ' ': case '\t': case '\f': case '\v':
5658         s++;
5659         goto retry;
5660     case '#':
5661     case '\n':
5662         if (PL_lex_state != LEX_NORMAL
5663             || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
5664         {
5665             const bool in_comment = *s == '#';
5666             if (*s == '#' && s == PL_linestart && PL_in_eval
5667              && !PL_rsfp && !PL_parser->filtered) {
5668                 /* handle eval qq[#line 1 "foo"\n ...] */
5669                 CopLINE_dec(PL_curcop);
5670                 incline(s, PL_bufend);
5671             }
5672             d = s;
5673             while (d < PL_bufend && *d != '\n')
5674                 d++;
5675             if (d < PL_bufend)
5676                 d++;
5677             s = d;
5678             if (in_comment && d == PL_bufend
5679                 && PL_lex_state == LEX_INTERPNORMAL
5680                 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5681                 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5682             else
5683                 incline(s, PL_bufend);
5684             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5685                 PL_lex_state = LEX_FORMLINE;
5686                 force_next(FORMRBRACK);
5687                 TOKEN(';');
5688             }
5689         }
5690         else {
5691             while (s < PL_bufend && *s != '\n')
5692                 s++;
5693             if (s < PL_bufend)
5694                 {
5695                     s++;
5696                     if (s < PL_bufend)
5697                         incline(s, PL_bufend);
5698                 }
5699         }
5700         goto retry;
5701     case '-':
5702         if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5703             I32 ftst = 0;
5704             char tmp;
5705
5706             s++;
5707             PL_bufptr = s;
5708             tmp = *s++;
5709
5710             while (s < PL_bufend && SPACE_OR_TAB(*s))
5711                 s++;
5712
5713             if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5714                 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5715                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5716                 OPERATOR('-');          /* unary minus */
5717             }
5718             switch (tmp) {
5719             case 'r': ftst = OP_FTEREAD;        break;
5720             case 'w': ftst = OP_FTEWRITE;       break;
5721             case 'x': ftst = OP_FTEEXEC;        break;
5722             case 'o': ftst = OP_FTEOWNED;       break;
5723             case 'R': ftst = OP_FTRREAD;        break;
5724             case 'W': ftst = OP_FTRWRITE;       break;
5725             case 'X': ftst = OP_FTREXEC;        break;
5726             case 'O': ftst = OP_FTROWNED;       break;
5727             case 'e': ftst = OP_FTIS;           break;
5728             case 'z': ftst = OP_FTZERO;         break;
5729             case 's': ftst = OP_FTSIZE;         break;
5730             case 'f': ftst = OP_FTFILE;         break;
5731             case 'd': ftst = OP_FTDIR;          break;
5732             case 'l': ftst = OP_FTLINK;         break;
5733             case 'p': ftst = OP_FTPIPE;         break;
5734             case 'S': ftst = OP_FTSOCK;         break;
5735             case 'u': ftst = OP_FTSUID;         break;
5736             case 'g': ftst = OP_FTSGID;         break;
5737             case 'k': ftst = OP_FTSVTX;         break;
5738             case 'b': ftst = OP_FTBLK;          break;
5739             case 'c': ftst = OP_FTCHR;          break;
5740             case 't': ftst = OP_FTTTY;          break;
5741             case 'T': ftst = OP_FTTEXT;         break;
5742             case 'B': ftst = OP_FTBINARY;       break;
5743             case 'M': case 'A': case 'C':
5744                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5745                 switch (tmp) {
5746                 case 'M': ftst = OP_FTMTIME;    break;
5747                 case 'A': ftst = OP_FTATIME;    break;
5748                 case 'C': ftst = OP_FTCTIME;    break;
5749                 default:                        break;
5750                 }
5751                 break;
5752             default:
5753                 break;
5754             }
5755             if (ftst) {
5756                 PL_last_uni = PL_oldbufptr;
5757                 PL_last_lop_op = (OPCODE)ftst;
5758                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5759                         "### Saw file test %c\n", (int)tmp);
5760                 } );
5761                 FTST(ftst);
5762             }
5763             else {
5764                 /* Assume it was a minus followed by a one-letter named
5765                  * subroutine call (or a -bareword), then. */
5766                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5767                         "### '-%c' looked like a file test but was not\n",
5768                         (int) tmp);
5769                 } );
5770                 s = --PL_bufptr;
5771             }
5772         }
5773         {
5774             const char tmp = *s++;
5775             if (*s == tmp) {
5776                 s++;
5777                 if (PL_expect == XOPERATOR)
5778                     TERM(POSTDEC);
5779                 else
5780                     OPERATOR(PREDEC);
5781             }
5782             else if (*s == '>') {
5783                 s++;
5784                 s = skipspace(s);
5785                 if (((*s == '$' || *s == '&') && s[1] == '*')
5786                   ||(*s == '$' && s[1] == '#' && s[2] == '*')
5787                   ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5788                   ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5789                  )
5790                 {
5791                     PL_expect = XPOSTDEREF;
5792                     TOKEN(ARROW);
5793                 }
5794                 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5795                     s = force_word(s,METHOD,FALSE,TRUE);
5796                     TOKEN(ARROW);
5797                 }
5798                 else if (*s == '$')
5799                     OPERATOR(ARROW);
5800                 else
5801                     TERM(ARROW);
5802             }
5803             if (PL_expect == XOPERATOR) {
5804                 if (*s == '='
5805                     && !PL_lex_allbrackets
5806                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5807                 {
5808                     s--;
5809                     TOKEN(0);
5810                 }
5811                 Aop(OP_SUBTRACT);
5812             }
5813             else {
5814                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5815                     check_uni();
5816                 OPERATOR('-');          /* unary minus */
5817             }
5818         }
5819
5820     case '+':
5821         {
5822             const char tmp = *s++;
5823             if (*s == tmp) {
5824                 s++;
5825                 if (PL_expect == XOPERATOR)
5826                     TERM(POSTINC);
5827                 else
5828                     OPERATOR(PREINC);
5829             }
5830             if (PL_expect == XOPERATOR) {
5831                 if (*s == '='
5832                     && !PL_lex_allbrackets
5833                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5834                 {
5835                     s--;
5836                     TOKEN(0);
5837                 }
5838                 Aop(OP_ADD);
5839             }
5840             else {
5841                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5842                     check_uni();
5843                 OPERATOR('+');
5844             }
5845         }
5846
5847     case '*':
5848         if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5849         if (PL_expect != XOPERATOR) {
5850             s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5851             PL_expect = XOPERATOR;
5852             force_ident(PL_tokenbuf, '*');
5853             if (!*PL_tokenbuf)
5854                 PREREF('*');
5855             TERM('*');
5856         }
5857         s++;
5858         if (*s == '*') {
5859             s++;
5860             if (*s == '=' && !PL_lex_allbrackets
5861                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5862             {
5863                 s -= 2;
5864                 TOKEN(0);
5865             }
5866             PWop(OP_POW);
5867         }
5868         if (*s == '='
5869             && !PL_lex_allbrackets
5870             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5871         {
5872             s--;
5873             TOKEN(0);
5874         }
5875         PL_parser->saw_infix_sigil = 1;
5876         Mop(OP_MULTIPLY);
5877
5878     case '%':
5879     {
5880         if (PL_expect == XOPERATOR) {
5881             if (s[1] == '='
5882                 && !PL_lex_allbrackets
5883                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5884             {
5885                 TOKEN(0);
5886             }
5887             ++s;
5888             PL_parser->saw_infix_sigil = 1;
5889             Mop(OP_MODULO);
5890         }
5891         else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5892         PL_tokenbuf[0] = '%';
5893         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5894         pl_yylval.ival = 0;
5895         if (!PL_tokenbuf[1]) {
5896             PREREF('%');
5897         }
5898         if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5899             && intuit_more(s, PL_bufend)) {
5900             if (*s == '[')
5901                 PL_tokenbuf[0] = '@';
5902         }
5903         PL_expect = XOPERATOR;
5904         force_ident_maybe_lex('%');
5905         TERM('%');
5906     }
5907     case '^':
5908         d = s;
5909         bof = FEATURE_BITWISE_IS_ENABLED;
5910         if (bof && s[1] == '.')
5911             s++;
5912         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5913                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5914         {
5915             s = d;
5916             TOKEN(0);
5917         }
5918         s++;
5919         BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5920     case '[':
5921         if (PL_lex_brackets > 100)
5922             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5923         PL_lex_brackstack[PL_lex_brackets++] = 0;
5924         PL_lex_allbrackets++;
5925         {
5926             const char tmp = *s++;
5927             OPERATOR(tmp);
5928         }
5929     case '~':
5930         if (s[1] == '~'
5931             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5932         {
5933             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5934                 TOKEN(0);
5935             s += 2;
5936             Perl_ck_warner_d(aTHX_
5937                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5938                 "Smartmatch is experimental");
5939             Eop(OP_SMARTMATCH);
5940         }
5941         s++;
5942         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
5943             s++;
5944             BCop(OP_SCOMPLEMENT);
5945         }
5946         BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
5947     case ',':
5948         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5949             TOKEN(0);
5950         s++;
5951         OPERATOR(',');
5952     case ':':
5953         if (s[1] == ':') {
5954             len = 0;
5955             goto just_a_word_zero_gv;
5956         }
5957         s++;
5958         {
5959         OP *attrs;
5960
5961         switch (PL_expect) {
5962         case XOPERATOR:
5963             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5964                 break;
5965             PL_bufptr = s;      /* update in case we back off */
5966             if (*s == '=') {
5967                 Perl_croak(aTHX_
5968                            "Use of := for an empty attribute list is not allowed");
5969             }
5970             goto grabattrs;
5971         case XATTRBLOCK:
5972             PL_expect = XBLOCK;
5973             goto grabattrs;
5974         case XATTRTERM:
5975             PL_expect = XTERMBLOCK;
5976          grabattrs:
5977             /* NB: as well as parsing normal attributes, we also end up
5978              * here if there is something looking like attributes
5979              * following a signature (which is illegal, but used to be
5980              * legal in 5.20..5.26). If the latter, we still parse the
5981              * attributes so that error messages(s) are less confusing,
5982              * but ignore them (parser->sig_seen).
5983              */
5984             s = skipspace(s);
5985             attrs = NULL;
5986             while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5987                 bool sig = PL_parser->sig_seen;
5988                 I32 tmp;
5989                 SV *sv;
5990                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5991                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5992                     if (tmp < 0) tmp = -tmp;
5993                     switch (tmp) {
5994                     case KEY_or:
5995                     case KEY_and:
5996                     case KEY_for:
5997                     case KEY_foreach:
5998                     case KEY_unless:
5999                     case KEY_if:
6000                     case KEY_while:
6001                     case KEY_until:
6002                         goto got_attrs;
6003                     default:
6004                         break;
6005                     }
6006                 }
6007                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
6008                 if (*d == '(') {
6009                     d = scan_str(d,TRUE,TRUE,FALSE,NULL);
6010                     if (!d) {
6011                         if (attrs)
6012                             op_free(attrs);
6013                         sv_free(sv);
6014                         Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
6015                     }
6016                     COPLINE_SET_FROM_MULTI_END;
6017                 }
6018                 if (PL_lex_stuff) {
6019                     sv_catsv(sv, PL_lex_stuff);
6020                     attrs = op_append_elem(OP_LIST, attrs,
6021                                         newSVOP(OP_CONST, 0, sv));
6022                     SvREFCNT_dec_NN(PL_lex_stuff);
6023                     PL_lex_stuff = NULL;
6024                 }
6025                 else {
6026                     /* NOTE: any CV attrs applied here need to be part of
6027                        the CVf_BUILTIN_ATTRS define in cv.h! */
6028                     if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
6029                         sv_free(sv);
6030                         if (!sig)
6031                             CvLVALUE_on(PL_compcv);
6032                     }
6033                     else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
6034                         sv_free(sv);
6035                         if (!sig)
6036                             CvMETHOD_on(PL_compcv);
6037                     }
6038                     else if (!PL_in_my && memEQs(SvPVX(sv), len, "const"))
6039                     {
6040                         sv_free(sv);
6041                         if (!sig) {
6042                             Perl_ck_warner_d(aTHX_
6043                                 packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
6044                                ":const is experimental"
6045                             );
6046                             CvANONCONST_on(PL_compcv);
6047                             if (!CvANON(PL_compcv))
6048                                 yyerror(":const is not permitted on named "
6049                                         "subroutines");
6050                         }
6051                     }
6052                     /* After we've set the flags, it could be argued that
6053                        we don't need to do the attributes.pm-based setting
6054                        process, and shouldn't bother appending recognized
6055                        flags.  To experiment with that, uncomment the
6056                        following "else".  (Note that's already been
6057                        uncommented.  That keeps the above-applied built-in
6058                        attributes from being intercepted (and possibly
6059                        rejected) by a package's attribute routines, but is
6060                        justified by the performance win for the common case
6061                        of applying only built-in attributes.) */
6062                     else
6063                         attrs = op_append_elem(OP_LIST, attrs,
6064                                             newSVOP(OP_CONST, 0,
6065                                                     sv));
6066                 }
6067                 s = skipspace(d);
6068                 if (*s == ':' && s[1] != ':')
6069                     s = skipspace(s+1);
6070                 else if (s == d)
6071                     break;      /* require real whitespace or :'s */
6072                 /* XXX losing whitespace on sequential attributes here */
6073             }
6074             {
6075                 if (*s != ';'
6076                     && *s != '}'
6077                     && !(PL_expect == XOPERATOR
6078                          ? (*s == '=' ||  *s == ')')
6079                          : (*s == '{' ||  *s == '(')))
6080                 {
6081                     const char q = ((*s == '\'') ? '"' : '\'');
6082                     /* If here for an expression, and parsed no attrs, back
6083                        off. */
6084                     if (PL_expect == XOPERATOR && !attrs) {
6085                         s = PL_bufptr;
6086                         break;
6087                     }
6088                     /* MUST advance bufptr here to avoid bogus "at end of line"
6089                        context messages from yyerror().
6090                     */
6091                     PL_bufptr = s;
6092                     yyerror( (const char *)
6093                              (*s
6094                               ? Perl_form(aTHX_ "Invalid separator character "
6095                                           "%c%c%c in attribute list", q, *s, q)
6096                               : "Unterminated attribute list" ) );
6097                     if (attrs)
6098                         op_free(attrs);
6099                     OPERATOR(':');
6100                 }
6101             }
6102         got_attrs:
6103             if (PL_parser->sig_seen) {
6104                 /* see comment about about sig_seen and parser error
6105                  * handling */
6106                 if (attrs)
6107                     op_free(attrs);
6108                 Perl_croak(aTHX_ "Subroutine attributes must come "
6109                                  "before the signature");
6110                 }
6111             if (attrs) {
6112                 NEXTVAL_NEXTTOKE.opval = attrs;
6113                 force_next(THING);
6114             }
6115             TOKEN(COLONATTR);
6116         }
6117         }
6118         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6119             s--;
6120             TOKEN(0);
6121         }
6122         PL_lex_allbrackets--;
6123         OPERATOR(':');
6124     case '(':
6125         s++;
6126         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6127             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
6128         else
6129             PL_expect = XTERM;
6130         s = skipspace(s);
6131         PL_lex_allbrackets++;
6132         TOKEN('(');
6133     case ';':
6134         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6135             TOKEN(0);
6136         CLINE;
6137         s++;
6138         PL_expect = XSTATE;
6139         TOKEN(';');
6140     case ')':
6141         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6142             TOKEN(0);
6143         s++;
6144         PL_lex_allbrackets--;
6145         s = skipspace(s);
6146         if (*s == '{')
6147             PREBLOCK(')');
6148         TERM(')');
6149     case ']':
6150         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6151             TOKEN(0);
6152         s++;
6153         if (PL_lex_brackets <= 0)
6154             /* diag_listed_as: Unmatched right %s bracket */
6155             yyerror("Unmatched right square bracket");
6156         else
6157             --PL_lex_brackets;
6158         PL_lex_allbrackets--;
6159         if (PL_lex_state == LEX_INTERPNORMAL) {
6160             if (PL_lex_brackets == 0) {
6161                 if (*s == '-' && s[1] == '>')
6162                     PL_lex_state = LEX_INTERPENDMAYBE;
6163                 else if (*s != '[' && *s != '{')
6164                     PL_lex_state = LEX_INTERPEND;
6165             }
6166         }
6167         TERM(']');
6168     case '{':
6169         s++;
6170       leftbracket:
6171         if (PL_lex_brackets > 100) {
6172             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6173         }
6174         switch (PL_expect) {
6175         case XTERM:
6176         case XTERMORDORDOR:
6177             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6178             PL_lex_allbrackets++;
6179             OPERATOR(HASHBRACK);
6180         case XOPERATOR:
6181             while (s < PL_bufend && SPACE_OR_TAB(*s))
6182                 s++;
6183             d = s;
6184             PL_tokenbuf[0] = '\0';
6185             if (d < PL_bufend && *d == '-') {
6186                 PL_tokenbuf[0] = '-';
6187                 d++;
6188                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6189                     d++;
6190             }
6191             if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6192                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6193                               FALSE, &len);
6194                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6195                     d++;
6196                 if (*d == '}') {
6197                     const char minus = (PL_tokenbuf[0] == '-');
6198                     s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6199                     if (minus)
6200                         force_next('-');
6201                 }
6202             }
6203             /* FALLTHROUGH */
6204         case XATTRTERM:
6205         case XTERMBLOCK:
6206             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6207             PL_lex_allbrackets++;
6208             PL_expect = XSTATE;
6209             break;
6210         case XATTRBLOCK:
6211         case XBLOCK:
6212             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6213             PL_lex_allbrackets++;
6214             PL_expect = XSTATE;
6215             break;
6216         case XBLOCKTERM:
6217             PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6218             PL_lex_allbrackets++;
6219             PL_expect = XSTATE;
6220             break;
6221         default: {
6222                 const char *t;
6223                 if (PL_oldoldbufptr == PL_last_lop)
6224                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6225                 else
6226                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6227                 PL_lex_allbrackets++;
6228                 s = skipspace(s);
6229                 if (*s == '}') {
6230                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6231                         PL_expect = XTERM;
6232                         /* This hack is to get the ${} in the message. */
6233                         PL_bufptr = s+1;
6234                         yyerror("syntax error");
6235                         break;
6236                     }
6237                     OPERATOR(HASHBRACK);
6238                 }
6239                 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6240                     /* ${...} or @{...} etc., but not print {...}
6241                      * Skip the disambiguation and treat this as a block.
6242                      */
6243                     goto block_expectation;
6244                 }
6245                 /* This hack serves to disambiguate a pair of curlies
6246                  * as being a block or an anon hash.  Normally, expectation
6247                  * determines that, but in cases where we're not in a
6248                  * position to expect anything in particular (like inside
6249                  * eval"") we have to resolve the ambiguity.  This code
6250                  * covers the case where the first term in the curlies is a
6251                  * quoted string.  Most other cases need to be explicitly
6252                  * disambiguated by prepending a "+" before the opening
6253                  * curly in order to force resolution as an anon hash.
6254                  *
6255                  * XXX should probably propagate the outer expectation
6256                  * into eval"" to rely less on this hack, but that could
6257                  * potentially break current behavior of eval"".
6258                  * GSAR 97-07-21
6259                  */
6260                 t = s;
6261                 if (*s == '\'' || *s == '"' || *s == '`') {
6262                     /* common case: get past first string, handling escapes */
6263                     for (t++; t < PL_bufend && *t != *s;)
6264                         if (*t++ == '\\')
6265                             t++;
6266                     t++;
6267                 }
6268                 else if (*s == 'q') {
6269                     if (++t < PL_bufend
6270                         && (!isWORDCHAR(*t)
6271                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6272                                 && !isWORDCHAR(*t))))
6273                     {
6274                         /* skip q//-like construct */
6275                         const char *tmps;
6276                         char open, close, term;
6277                         I32 brackets = 1;
6278
6279                         while (t < PL_bufend && isSPACE(*t))
6280                             t++;
6281                         /* check for q => */
6282                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6283                             OPERATOR(HASHBRACK);
6284                         }
6285                         term = *t;
6286                         open = term;
6287                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6288                             term = tmps[5];
6289                         close = term;
6290                         if (open == close)
6291                             for (t++; t < PL_bufend; t++) {
6292                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6293                                     t++;
6294                                 else if (*t == open)
6295                                     break;
6296                             }
6297                         else {
6298                             for (t++; t < PL_bufend; t++) {
6299                                 if (*t == '\\' && t+1 < PL_bufend)
6300                                     t++;
6301                                 else if (*t == close && --brackets <= 0)
6302                                     break;
6303                                 else if (*t == open)
6304                                     brackets++;
6305                             }
6306                         }
6307                         t++;
6308                     }
6309                     else
6310                         /* skip plain q word */
6311                         while (   t < PL_bufend
6312                                && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6313                         {
6314                             t += UTF ? UTF8SKIP(t) : 1;
6315                         }
6316                 }
6317                 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6318                     t += UTF ? UTF8SKIP(t) : 1;
6319                     while (   t < PL_bufend
6320                            && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6321                     {
6322                         t += UTF ? UTF8SKIP(t) : 1;
6323                     }
6324                 }
6325                 while (t < PL_bufend && isSPACE(*t))
6326                     t++;
6327                 /* if comma follows first term, call it an anon hash */
6328                 /* XXX it could be a comma expression with loop modifiers */
6329                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6330                                    || (*t == '=' && t[1] == '>')))
6331                     OPERATOR(HASHBRACK);
6332                 if (PL_expect == XREF)
6333                 {
6334                   block_expectation:
6335                     /* If there is an opening brace or 'sub:', treat it
6336                        as a term to make ${{...}}{k} and &{sub:attr...}
6337                        dwim.  Otherwise, treat it as a statement, so
6338                        map {no strict; ...} works.
6339                      */
6340                     s = skipspace(s);
6341                     if (*s == '{') {
6342                         PL_expect = XTERM;
6343                         break;
6344                     }
6345                     if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6346                         PL_bufptr = s;
6347                         d = s + 3;
6348                         d = skipspace(d);
6349                         s = PL_bufptr;
6350                         if (*d == ':') {
6351                             PL_expect = XTERM;
6352                             break;
6353                         }
6354                     }
6355                     PL_expect = XSTATE;
6356                 }
6357                 else {
6358                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6359                     PL_expect = XSTATE;
6360                 }
6361             }
6362             break;
6363         }
6364         pl_yylval.ival = CopLINE(PL_curcop);
6365         PL_copline = NOLINE;   /* invalidate current command line number */
6366         TOKEN(formbrack ? '=' : '{');
6367     case '}':
6368         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6369             TOKEN(0);
6370       rightbracket:
6371         assert(s != PL_bufend);
6372         s++;
6373         if (PL_lex_brackets <= 0)
6374             /* diag_listed_as: Unmatched right %s bracket */
6375             yyerror("Unmatched right curly bracket");
6376         else
6377             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6378         PL_lex_allbrackets--;
6379         if (PL_lex_state == LEX_INTERPNORMAL) {
6380             if (PL_lex_brackets == 0) {
6381                 if (PL_expect & XFAKEBRACK) {
6382                     PL_expect &= XENUMMASK;
6383                     PL_lex_state = LEX_INTERPEND;
6384                     PL_bufptr = s;
6385                     return yylex();     /* ignore fake brackets */
6386                 }
6387                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6388                  && SvEVALED(PL_lex_repl))
6389                     PL_lex_state = LEX_INTERPEND;
6390                 else if (*s == '-' && s[1] == '>')
6391                     PL_lex_state = LEX_INTERPENDMAYBE;
6392                 else if (*s != '[' && *s != '{')
6393                     PL_lex_state = LEX_INTERPEND;
6394             }
6395         }
6396         if (PL_expect & XFAKEBRACK) {
6397             PL_expect &= XENUMMASK;
6398             PL_bufptr = s;
6399             return yylex();             /* ignore fake brackets */
6400         }
6401         force_next(formbrack ? '.' : '}');
6402         if (formbrack) LEAVE_with_name("lex_format");
6403         if (formbrack == 2) { /* means . where arguments were expected */
6404             force_next(';');
6405             TOKEN(FORMRBRACK);
6406         }
6407         TOKEN(';');
6408     case '&':
6409         if (PL_expect == XPOSTDEREF) POSTDEREF('&');
6410         s++;
6411         if (*s++ == '&') {
6412             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6413                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6414                 s -= 2;
6415                 TOKEN(0);
6416             }
6417             AOPERATOR(ANDAND);
6418         }
6419         s--;
6420         if (PL_expect == XOPERATOR) {
6421             if (   PL_bufptr == PL_linestart
6422                 && ckWARN(WARN_SEMICOLON)
6423                 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6424             {
6425                 CopLINE_dec(PL_curcop);
6426                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6427                 CopLINE_inc(PL_curcop);
6428             }
6429             d = s;
6430             if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6431                 s++;
6432             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6433                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6434                 s = d;
6435                 s--;
6436                 TOKEN(0);
6437             }
6438             if (d == s) {
6439                 PL_parser->saw_infix_sigil = 1;
6440                 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6441             }
6442             else
6443                 BAop(OP_SBIT_AND);
6444         }
6445
6446         PL_tokenbuf[0] = '&';
6447         s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6448         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6449         if (PL_tokenbuf[1]) {
6450             force_ident_maybe_lex('&');
6451         }
6452         else
6453             PREREF('&');
6454         TERM('&');
6455
6456     case '|':
6457         s++;
6458         if (*s++ == '|') {
6459             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6460                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6461                 s -= 2;
6462                 TOKEN(0);
6463             }
6464             AOPERATOR(OROR);
6465         }
6466         s--;
6467         d = s;
6468         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6469             s++;
6470         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6471                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6472             s = d - 1;
6473             TOKEN(0);
6474         }
6475         BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6476     case '=':
6477         s++;
6478         {
6479             const char tmp = *s++;
6480             if (tmp == '=') {
6481                 if (   (s == PL_linestart+2 || s[-3] == '\n')
6482                     && memBEGINs(s, (STRLEN) (PL_bufend - s), "====="))
6483                 {
6484                     s = vcs_conflict_marker(s + 5);
6485                     goto retry;
6486                 }
6487                 if (!PL_lex_allbrackets
6488                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6489                 {
6490                     s -= 2;
6491                     TOKEN(0);
6492                 }
6493                 Eop(OP_EQ);
6494             }
6495             if (tmp == '>') {
6496                 if (!PL_lex_allbrackets
6497                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
6498                 {
6499                     s -= 2;
6500                     TOKEN(0);
6501                 }
6502                 OPERATOR(',');
6503             }
6504             if (tmp == '~')
6505                 PMop(OP_MATCH);
6506             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6507                 && strchr("+-*/%.^&|<",tmp))
6508                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6509                             "Reversed %c= operator",(int)tmp);
6510             s--;
6511             if (PL_expect == XSTATE
6512                 && isALPHA(tmp)
6513                 && (s == PL_linestart+1 || s[-2] == '\n') )
6514             {
6515                 if (   (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6516                     || PL_lex_state != LEX_NORMAL)
6517                 {
6518                     d = PL_bufend;
6519                     while (s < d) {
6520                         if (*s++ == '\n') {
6521                             incline(s, PL_bufend);
6522                             if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
6523                             {
6524                                 s = (char *) memchr(s,'\n', d - s);
6525                                 if (s)
6526                                     s++;
6527                                 else
6528                                     s = d;
6529                                 incline(s, PL_bufend);
6530                                 goto retry;
6531                             }
6532                         }
6533                     }
6534                     goto retry;
6535                 }
6536                 s = PL_bufend;
6537                 PL_parser->in_pod = 1;
6538                 goto retry;
6539             }
6540         }
6541         if (PL_expect == XBLOCK) {
6542             const char *t = s;
6543 #ifdef PERL_STRICT_CR
6544             while (SPACE_OR_TAB(*t))
6545 #else
6546             while (SPACE_OR_TAB(*t) || *t == '\r')
6547 #endif
6548                 t++;
6549             if (*t == '\n' || *t == '#') {
6550                 formbrack = 1;
6551                 ENTER_with_name("lex_format");
6552                 SAVEI8(PL_parser->form_lex_state);
6553                 SAVEI32(PL_lex_formbrack);
6554                 PL_parser->form_lex_state = PL_lex_state;
6555                 PL_lex_formbrack = PL_lex_brackets + 1;
6556                 PL_parser->sub_error_count = PL_error_count;
6557                 goto leftbracket;
6558             }
6559         }
6560         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6561             s--;
6562             TOKEN(0);
6563         }
6564         pl_yylval.ival = 0;
6565         OPERATOR(ASSIGNOP);
6566     case '!':
6567         s++;
6568         {
6569             const char tmp = *s++;
6570             if (tmp == '=') {
6571                 /* was this !=~ where !~ was meant?
6572                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6573
6574                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6575                     const char *t = s+1;
6576
6577                     while (t < PL_bufend && isSPACE(*t))
6578                         ++t;
6579
6580                     if (*t == '/' || *t == '?'
6581                         || ((*t == 'm' || *t == 's' || *t == 'y')
6582                             && !isWORDCHAR(t[1]))
6583                         || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6584                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6585                                     "!=~ should be !~");
6586                 }
6587                 if (!PL_lex_allbrackets
6588                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6589                 {
6590                     s -= 2;
6591                     TOKEN(0);
6592                 }
6593                 Eop(OP_NE);
6594             }
6595             if (tmp == '~')
6596                 PMop(OP_NOT);
6597         }
6598         s--;
6599         OPERATOR('!');
6600     case '<':
6601         if (PL_expect != XOPERATOR) {
6602             if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6603                 check_uni();
6604             if (s[1] == '<' && s[2] != '>') {
6605                 if (   (s == PL_linestart || s[-1] == '\n')
6606                     && memBEGINs(s+2, (STRLEN) (PL_bufend - (s+2)), "<<<<<"))
6607                 {
6608                     s = vcs_conflict_marker(s + 7);
6609                     goto retry;
6610                 }
6611                 s = scan_heredoc(s);
6612             }
6613             else
6614                 s = scan_inputsymbol(s);
6615             PL_expect = XOPERATOR;
6616             TOKEN(sublex_start());
6617         }
6618         s++;
6619         {
6620             char tmp = *s++;
6621             if (tmp == '<') {
6622                 if (   (s == PL_linestart+2 || s[-3] == '\n')
6623                     && memBEGINs(s, (STRLEN) (PL_bufend - s), "<<<<<"))
6624                 {
6625                     s = vcs_conflict_marker(s + 5);
6626                     goto retry;
6627                 }
6628                 if (*s == '=' && !PL_lex_allbrackets
6629                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6630                 {
6631                     s -= 2;
6632                     TOKEN(0);
6633                 }
6634                 SHop(OP_LEFT_SHIFT);
6635             }
6636             if (tmp == '=') {
6637                 tmp = *s++;
6638                 if (tmp == '>') {
6639                     if (!PL_lex_allbrackets
6640                         && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6641                     {
6642                         s -= 3;
6643                         TOKEN(0);
6644                     }
6645                     Eop(OP_NCMP);
6646                 }
6647                 s--;
6648                 if (!PL_lex_allbrackets
6649                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6650                 {
6651                     s -= 2;
6652                     TOKEN(0);
6653                 }
6654                 Rop(OP_LE);
6655             }
6656         }
6657         s--;
6658         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6659             s--;
6660             TOKEN(0);
6661         }
6662         Rop(OP_LT);
6663     case '>':
6664         s++;
6665         {
6666             const char tmp = *s++;
6667             if (tmp == '>') {
6668                 if (   (s == PL_linestart+2 || s[-3] == '\n')
6669                     && memBEGINs(s, (STRLEN) (PL_bufend - s), ">>>>>"))
6670                 {
6671                     s = vcs_conflict_marker(s + 5);
6672                     goto retry;
6673                 }
6674                 if (*s == '=' && !PL_lex_allbrackets
6675                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6676                 {
6677                     s -= 2;
6678                     TOKEN(0);
6679                 }
6680                 SHop(OP_RIGHT_SHIFT);
6681             }
6682             else if (tmp == '=') {
6683                 if (!PL_lex_allbrackets
6684                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6685                 {
6686                     s -= 2;
6687                     TOKEN(0);
6688                 }
6689                 Rop(OP_GE);
6690             }
6691         }
6692         s--;
6693         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6694             s--;
6695             TOKEN(0);
6696         }
6697         Rop(OP_GT);
6698
6699     case '$':
6700         CLINE;
6701
6702         if (PL_expect == XPOSTDEREF) {
6703             if (s[1] == '#') {
6704                 s++;
6705                 POSTDEREF(DOLSHARP);
6706             }
6707             POSTDEREF('$');
6708         }
6709
6710         if (   s[1] == '#'
6711             && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
6712                 || strchr("{$:+-@", s[2])))
6713         {
6714             PL_tokenbuf[0] = '@';
6715             s = scan_ident(s + 1, PL_tokenbuf + 1,
6716                            sizeof PL_tokenbuf - 1, FALSE);
6717             if (PL_expect == XOPERATOR) {
6718                 d = s;
6719                 if (PL_bufptr > s) {
6720                     d = PL_bufptr-1;
6721                     PL_bufptr = PL_oldbufptr;
6722                 }
6723                 no_op("Array length", d);
6724             }
6725             if (!PL_tokenbuf[1])
6726                 PREREF(DOLSHARP);
6727             PL_expect = XOPERATOR;
6728             force_ident_maybe_lex('#');
6729             TOKEN(DOLSHARP);
6730         }
6731
6732         PL_tokenbuf[0] = '$';
6733         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6734         if (PL_expect == XOPERATOR) {
6735             d = s;
6736             if (PL_bufptr > s) {
6737                 d = PL_bufptr-1;
6738                 PL_bufptr = PL_oldbufptr;
6739             }
6740             no_op("Scalar", d);
6741         }
6742         if (!PL_tokenbuf[1]) {
6743             if (s == PL_bufend)
6744                 yyerror("Final $ should be \\$ or $name");
6745             PREREF('$');
6746         }
6747
6748         d = s;
6749         {
6750             const char tmp = *s;
6751             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6752                 s = skipspace(s);
6753
6754             if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6755                 && intuit_more(s, PL_bufend)) {
6756                 if (*s == '[') {
6757                     PL_tokenbuf[0] = '@';
6758                     if (ckWARN(WARN_SYNTAX)) {
6759                         char *t = s+1;
6760
6761                         while (   isSPACE(*t)
6762                                || isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)
6763                                || *t == '$')
6764                         {
6765                             t += UTF ? UTF8SKIP(t) : 1;
6766                         }
6767                         if (*t++ == ',') {
6768                             PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
6769                             while (t < PL_bufend && *t != ']')
6770                                 t++;
6771                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6772                                         "Multidimensional syntax %" UTF8f " not supported",
6773                                         UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
6774                         }
6775                     }
6776                 }
6777                 else if (*s == '{') {
6778                     char *t;
6779                     PL_tokenbuf[0] = '%';
6780                     if (    strEQ(PL_tokenbuf+1, "SIG")
6781                         && ckWARN(WARN_SYNTAX)
6782                         && (t = (char *) memchr(s, '}', PL_bufend - s))
6783                         && (t = (char *) memchr(t, '=', PL_bufend - t)))
6784                     {
6785                         char tmpbuf[sizeof PL_tokenbuf];
6786                         do {
6787                             t++;
6788                         } while (isSPACE(*t));
6789                         if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
6790                             STRLEN len;
6791                             t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6792                                             &len);
6793                             while (isSPACE(*t))
6794                                 t++;
6795                             if (  *t == ';'
6796                                 && get_cvn_flags(tmpbuf, len, UTF
6797                                                                 ? SVf_UTF8
6798                                                                 : 0))
6799                             {
6800                                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6801                                     "You need to quote \"%" UTF8f "\"",
6802                                         UTF8fARG(UTF, len, tmpbuf));
6803                             }
6804                         }
6805                     }
6806                 }
6807             }
6808
6809             PL_expect = XOPERATOR;
6810             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6811                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6812                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6813                     PL_expect = XOPERATOR;
6814                 else if (strchr("$@\"'`q", *s))
6815                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6816                 else if (   strchr("&*<%", *s)
6817                          && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
6818                 {
6819                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6820                 }
6821                 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6822                     char tmpbuf[sizeof PL_tokenbuf];
6823                     int t2;
6824                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6825                     if ((t2 = keyword(tmpbuf, len, 0))) {
6826                         /* binary operators exclude handle interpretations */
6827                         switch (t2) {
6828                         case -KEY_x:
6829                         case -KEY_eq:
6830                         case -KEY_ne:
6831                         case -KEY_gt:
6832                         case -KEY_lt:
6833                         case -KEY_ge:
6834                         case -KEY_le:
6835                         case -KEY_cmp:
6836                             break;
6837                         default:
6838                             PL_expect = XTERM;  /* e.g. print $fh length() */
6839                             break;
6840                         }
6841                     }
6842                     else {
6843                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6844                     }
6845                 }
6846                 else if (isDIGIT(*s))
6847                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6848                 else if (*s == '.' && isDIGIT(s[1]))
6849                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6850                 else if ((*s == '?' || *s == '-' || *s == '+')
6851                          && !isSPACE(s[1]) && s[1] != '=')
6852                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6853                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6854                          && s[1] != '/')
6855                     PL_expect = XTERM;          /* e.g. print $fh /.../
6856                                                    XXX except DORDOR operator
6857                                                 */
6858                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6859                          && s[2] != '=')
6860                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6861             }
6862         }
6863         force_ident_maybe_lex('$');
6864         TOKEN('$');
6865
6866     case '@':
6867         if (PL_expect == XPOSTDEREF)
6868             POSTDEREF('@');
6869         PL_tokenbuf[0] = '@';
6870         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6871         if (PL_expect == XOPERATOR) {
6872             d = s;
6873             if (PL_bufptr > s) {
6874                 d = PL_bufptr-1;
6875                 PL_bufptr = PL_oldbufptr;
6876             }
6877             no_op("Array", d);
6878         }
6879         pl_yylval.ival = 0;
6880         if (!PL_tokenbuf[1]) {
6881             PREREF('@');
6882         }
6883         if (PL_lex_state == LEX_NORMAL)
6884             s = skipspace(s);
6885         if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6886             && intuit_more(s, PL_bufend))
6887         {
6888             if (*s == '{')
6889                 PL_tokenbuf[0] = '%';
6890
6891             /* Warn about @ where they meant $. */
6892             if (*s == '[' || *s == '{') {
6893                 if (ckWARN(WARN_SYNTAX)) {
6894                     S_check_scalar_slice(aTHX_ s);
6895                 }
6896             }
6897         }
6898         PL_expect = XOPERATOR;
6899         force_ident_maybe_lex('@');
6900         TERM('@');
6901
6902      case '/':                  /* may be division, defined-or, or pattern */
6903         if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6904             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6905                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6906                 TOKEN(0);
6907             s += 2;
6908             AOPERATOR(DORDOR);
6909         }
6910         else if (PL_expect == XOPERATOR) {
6911             s++;
6912             if (*s == '=' && !PL_lex_allbrackets
6913                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6914             {
6915                 s--;
6916                 TOKEN(0);
6917             }
6918             Mop(OP_DIVIDE);
6919         }
6920         else {
6921             /* Disable warning on "study /blah/" */
6922             if (    PL_oldoldbufptr == PL_last_uni
6923                 && (   *PL_last_uni != 's' || s - PL_last_uni < 5
6924                     || memNE(PL_last_uni, "study", 5)
6925                     || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6926              ))
6927                 check_uni();
6928             s = scan_pat(s,OP_MATCH);
6929             TERM(sublex_start());
6930         }
6931
6932      case '?':                  /* conditional */
6933         s++;
6934         if (!PL_lex_allbrackets
6935             && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
6936         {
6937             s--;
6938             TOKEN(0);
6939         }
6940         PL_lex_allbrackets++;
6941         OPERATOR('?');
6942
6943     case '.':
6944         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6945 #ifdef PERL_STRICT_CR
6946             && s[1] == '\n'
6947 #else
6948             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6949 #endif
6950             && (s == PL_linestart || s[-1] == '\n') )
6951         {
6952             PL_expect = XSTATE;
6953             formbrack = 2; /* dot seen where arguments expected */
6954             goto rightbracket;
6955         }
6956         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6957             s += 3;
6958             OPERATOR(YADAYADA);
6959         }
6960         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6961             char tmp = *s++;
6962             if (*s == tmp) {
6963                 if (!PL_lex_allbrackets
6964                     && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
6965                 {
6966                     s--;
6967                     TOKEN(0);
6968                 }
6969                 s++;
6970                 if (*s == tmp) {
6971                     s++;
6972                     pl_yylval.ival = OPf_SPECIAL;
6973                 }
6974                 else
6975                     pl_yylval.ival = 0;
6976                 OPERATOR(DOTDOT);
6977             }
6978             if (*s == '=' && !PL_lex_allbrackets
6979                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6980             {
6981                 s--;
6982                 TOKEN(0);
6983             }
6984             Aop(OP_CONCAT);
6985         }
6986         /* FALLTHROUGH */
6987     case '0': case '1': case '2': case '3': case '4':
6988     case '5': case '6': case '7': case '8': case '9':
6989         s = scan_num(s, &pl_yylval);
6990         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6991         if (PL_expect == XOPERATOR)
6992             no_op("Number",s);
6993         TERM(THING);
6994
6995     case '\'':
6996         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6997         if (!s)
6998             missingterm(NULL, 0);
6999         COPLINE_SET_FROM_MULTI_END;
7000         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
7001         if (PL_expect == XOPERATOR) {
7002             no_op("String",s);
7003         }
7004         pl_yylval.ival = OP_CONST;
7005         TERM(sublex_start());
7006
7007     case '"':
7008         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7009         DEBUG_T( {
7010             if (s)
7011                 printbuf("### Saw string before %s\n", s);
7012             else
7013                 PerlIO_printf(Perl_debug_log,
7014                              "### Saw unterminated string\n");
7015         } );
7016         if (PL_expect == XOPERATOR) {
7017                 no_op("String",s);
7018         }
7019         if (!s)
7020             missingterm(NULL, 0);
7021         pl_yylval.ival = OP_CONST;
7022         /* FIXME. I think that this can be const if char *d is replaced by
7023            more localised variables.  */
7024         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
7025             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
7026                 pl_yylval.ival = OP_STRINGIFY;
7027                 break;
7028             }
7029         }
7030         if (pl_yylval.ival == OP_CONST)
7031             COPLINE_SET_FROM_MULTI_END;
7032         TERM(sublex_start());
7033
7034     case '`':
7035         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7036         DEBUG_T( {
7037             if (s)
7038                 printbuf("### Saw backtick string before %s\n", s);
7039             else
7040                 PerlIO_printf(Perl_debug_log,
7041                              "### Saw unterminated backtick string\n");
7042         } );
7043         if (PL_expect == XOPERATOR)
7044             no_op("Backticks",s);
7045         if (!s)
7046             missingterm(NULL, 0);
7047         pl_yylval.ival = OP_BACKTICK;
7048         TERM(sublex_start());
7049
7050     case '\\':
7051         s++;
7052         if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
7053          && isDIGIT(*s))
7054             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
7055                            *s, *s);
7056         if (PL_expect == XOPERATOR)
7057             no_op("Backslash",s);
7058         OPERATOR(REFGEN);
7059
7060     case 'v':
7061         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
7062             char *start = s + 2;
7063             while (isDIGIT(*start) || *start == '_')
7064                 start++;
7065             if (*start == '.' && isDIGIT(start[1])) {
7066                 s = scan_num(s, &pl_yylval);
7067                 TERM(THING);
7068             }
7069             else if ((*start == ':' && start[1] == ':')
7070                   || (PL_expect == XSTATE && *start == ':'))
7071                 goto keylookup;
7072             else if (PL_expect == XSTATE) {
7073                 d = start;
7074                 while (d < PL_bufend && isSPACE(*d)) d++;
7075                 if (*d == ':') goto keylookup;
7076             }
7077             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
7078             if (!isALPHA(*start) && (PL_expect == XTERM
7079                         || PL_expect == XREF || PL_expect == XSTATE
7080                         || PL_expect == XTERMORDORDOR)) {
7081                 GV *const gv = gv_fetchpvn_flags(s, start - s,
7082                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
7083                 if (!gv) {
7084                     s = scan_num(s, &pl_yylval);
7085                     TERM(THING);
7086                 }
7087             }
7088         }
7089         goto keylookup;
7090     case 'x':
7091         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
7092             s++;
7093             Mop(OP_REPEAT);
7094         }
7095         goto keylookup;
7096
7097     case '_':
7098     case 'a': case 'A':
7099     case 'b': case 'B':
7100     case 'c': case 'C':
7101     case 'd': case 'D':
7102     case 'e': case 'E':
7103     case 'f': case 'F':
7104     case 'g': case 'G':
7105     case 'h': case 'H':
7106     case 'i': case 'I':
7107     case 'j': case 'J':
7108     case 'k': case 'K':
7109     case 'l': case 'L':
7110     case 'm': case 'M':
7111     case 'n': case 'N':
7112     case 'o': case 'O':
7113     case 'p': case 'P':
7114     case 'q': case 'Q':
7115     case 'r': case 'R':
7116     case 's': case 'S':
7117     case 't': case 'T':
7118     case 'u': case 'U':
7119               case 'V':
7120     case 'w': case 'W':
7121               case 'X':
7122     case 'y': case 'Y':
7123     case 'z': case 'Z':
7124
7125       keylookup: {
7126         bool anydelim;
7127         bool lex;
7128         I32 tmp;
7129         SV *sv;
7130         CV *cv;
7131         PADOFFSET off;
7132         OP *rv2cv_op;
7133
7134         lex = FALSE;
7135         orig_keyword = 0;
7136         off = 0;
7137         sv = NULL;
7138         cv = NULL;
7139         gv = NULL;
7140         gvp = NULL;
7141         rv2cv_op = NULL;
7142
7143         PL_bufptr = s;
7144         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7145
7146         /* Some keywords can be followed by any delimiter, including ':' */
7147         anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
7148
7149         /* x::* is just a word, unless x is "CORE" */
7150         if (!anydelim && *s == ':' && s[1] == ':') {
7151             if (memEQs(PL_tokenbuf, len, "CORE")) goto case_KEY_CORE;
7152             goto just_a_word;
7153         }
7154
7155         d = s;
7156         while (d < PL_bufend && isSPACE(*d))
7157                 d++;    /* no comments skipped here, or s### is misparsed */
7158
7159         /* Is this a word before a => operator? */
7160         if (*d == '=' && d[1] == '>') {
7161           fat_arrow:
7162             CLINE;
7163             pl_yylval.opval
7164                 = newSVOP(OP_CONST, 0,
7165                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7166             pl_yylval.opval->op_private = OPpCONST_BARE;
7167             TERM(BAREWORD);
7168         }
7169
7170         /* Check for plugged-in keyword */
7171         {
7172             OP *o;
7173             int result;
7174             char *saved_bufptr = PL_bufptr;
7175             PL_bufptr = s;
7176             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
7177             s = PL_bufptr;
7178             if (result == KEYWORD_PLUGIN_DECLINE) {
7179                 /* not a plugged-in keyword */
7180                 PL_bufptr = saved_bufptr;
7181             } else if (result == KEYWORD_PLUGIN_STMT) {
7182                 pl_yylval.opval = o;
7183                 CLINE;
7184                 if (!PL_nexttoke) PL_expect = XSTATE;
7185                 return REPORT(PLUGSTMT);
7186             } else if (result == KEYWORD_PLUGIN_EXPR) {
7187                 pl_yylval.opval = o;
7188                 CLINE;
7189                 if (!PL_nexttoke) PL_expect = XOPERATOR;
7190                 return REPORT(PLUGEXPR);
7191             } else {
7192                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
7193                                         PL_tokenbuf);
7194             }
7195         }
7196
7197         /* Check for built-in keyword */
7198         tmp = keyword(PL_tokenbuf, len, 0);
7199
7200         /* Is this a label? */
7201         if (!anydelim && PL_expect == XSTATE
7202               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7203             s = d + 1;
7204             pl_yylval.opval =
7205                 newSVOP(OP_CONST, 0,
7206                     newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
7207             CLINE;
7208             TOKEN(LABEL);
7209         }
7210
7211         /* Check for lexical sub */
7212         if (PL_expect != XOPERATOR) {
7213             char tmpbuf[sizeof PL_tokenbuf + 1];
7214             *tmpbuf = '&';
7215             Copy(PL_tokenbuf, tmpbuf+1, len, char);
7216             off = pad_findmy_pvn(tmpbuf, len+1, 0);
7217             if (off != NOT_IN_PAD) {
7218                 assert(off); /* we assume this is boolean-true below */
7219                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7220                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
7221                     HEK * const stashname = HvNAME_HEK(stash);
7222                     sv = newSVhek(stashname);
7223                     sv_catpvs(sv, "::");
7224                     sv_catpvn_flags(sv, PL_tokenbuf, len,
7225                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
7226                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7227                                     SVt_PVCV);
7228                     off = 0;
7229                     if (!gv) {
7230                         sv_free(sv);
7231                         sv = NULL;
7232                         goto just_a_word;
7233                     }
7234                 }
7235                 else {
7236                     rv2cv_op = newOP(OP_PADANY, 0);
7237                     rv2cv_op->op_targ = off;
7238                     cv = find_lexical_cv(off);
7239                 }
7240                 lex = TRUE;
7241                 goto just_a_word;
7242             }
7243             off = 0;
7244         }
7245
7246         if (tmp < 0) {                  /* second-class keyword? */
7247             GV *ogv = NULL;     /* override (winner) */
7248             GV *hgv = NULL;     /* hidden (loser) */
7249             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7250                 CV *cv;
7251                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7252                                             (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
7253                                             SVt_PVCV))
7254                     && (cv = GvCVu(gv)))
7255                 {
7256                     if (GvIMPORTED_CV(gv))
7257                         ogv = gv;
7258                     else if (! CvMETHOD(cv))
7259                         hgv = gv;
7260                 }
7261                 if (!ogv
7262                     && (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7263                                                               len, FALSE))
7264                     && (gv = *gvp)
7265                     && (isGV_with_GP(gv)
7266                         ? GvCVu(gv) && GvIMPORTED_CV(gv)
7267                         :   SvPCS_IMPORTED(gv)
7268                         && (gv_init(gv, PL_globalstash, PL_tokenbuf,
7269                                                                  len, 0), 1)))
7270                 {
7271                     ogv = gv;
7272                 }
7273             }
7274             if (ogv) {
7275                 orig_keyword = tmp;
7276                 tmp = 0;                /* overridden by import or by GLOBAL */
7277             }
7278             else if (gv && !gvp
7279                      && -tmp==KEY_lock  /* XXX generalizable kludge */
7280                      && GvCVu(gv))
7281             {
7282                 tmp = 0;                /* any sub overrides "weak" keyword */
7283             }
7284             else {                      /* no override */
7285                 tmp = -tmp;
7286                 if (tmp == KEY_dump) {
7287                     Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
7288                 }
7289                 gv = NULL;
7290                 gvp = 0;
7291                 if (hgv && tmp != KEY_x)        /* never ambiguous */
7292                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7293                                    "Ambiguous call resolved as CORE::%s(), "
7294                                    "qualify as such or use &",
7295                                    GvENAME(hgv));
7296             }
7297         }
7298
7299         if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7300          && (!anydelim || *s != '#')) {
7301             /* no override, and not s### either; skipspace is safe here
7302              * check for => on following line */
7303             bool arrow;
7304             STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7305             STRLEN   soff = s         - SvPVX(PL_linestr);
7306             s = peekspace(s);
7307             arrow = *s == '=' && s[1] == '>';
7308             PL_bufptr = SvPVX(PL_linestr) + bufoff;
7309             s         = SvPVX(PL_linestr) +   soff;
7310             if (arrow)
7311                 goto fat_arrow;
7312         }
7313
7314       reserved_word:
7315         switch (tmp) {
7316
7317             /* Trade off - by using this evil construction we can pull the
7318                variable gv into the block labelled keylookup. If not, then
7319                we have to give it function scope so that the goto from the
7320                earlier ':' case doesn't bypass the initialisation.  */
7321             just_a_word_zero_gv:
7322                 sv = NULL;
7323                 cv = NULL;
7324                 gv = NULL;
7325                 gvp = NULL;
7326                 rv2cv_op = NULL;
7327                 orig_keyword = 0;
7328                 lex = 0;
7329                 off = 0;
7330             /* FALLTHROUGH */
7331         default:                        /* not a keyword */
7332           just_a_word: {
7333                 int pkgname = 0;
7334                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7335                 bool safebw;
7336                 bool no_op_error = FALSE;
7337
7338                 if (PL_expect == XOPERATOR) {
7339                     if (PL_bufptr == PL_linestart) {
7340                         CopLINE_dec(PL_curcop);
7341                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7342                         CopLINE_inc(PL_curcop);
7343                     }
7344                     else
7345                         /* We want to call no_op with s pointing after the
7346                            bareword, so defer it.  But we want it to come
7347                            before the Bad name croak.  */
7348                         no_op_error = TRUE;
7349                 }
7350
7351                 /* Get the rest if it looks like a package qualifier */
7352
7353                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7354                     STRLEN morelen;
7355                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7356                                   TRUE, &morelen);
7357                     if (no_op_error) {
7358                         no_op("Bareword",s);
7359                         no_op_error = FALSE;
7360                     }
7361                     if (!morelen)
7362                         Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7363                                 UTF8fARG(UTF, len, PL_tokenbuf),
7364                                 *s == '\'' ? "'" : "::");
7365                     len += morelen;
7366                     pkgname = 1;
7367                 }
7368
7369                 if (no_op_error)
7370                         no_op("Bareword",s);
7371
7372                 /* See if the name is "Foo::",
7373                    in which case Foo is a bareword
7374                    (and a package name). */
7375
7376                 if (len > 2
7377                     && PL_tokenbuf[len - 2] == ':'
7378                     && PL_tokenbuf[len - 1] == ':')
7379                 {
7380                     if (ckWARN(WARN_BAREWORD)
7381                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7382                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7383                                     "Bareword \"%" UTF8f
7384                                     "\" refers to nonexistent package",
7385                                     UTF8fARG(UTF, len, PL_tokenbuf));
7386                     len -= 2;
7387                     PL_tokenbuf[len] = '\0';
7388                     gv = NULL;
7389                     gvp = 0;
7390                     safebw = TRUE;
7391                 }
7392                 else {
7393                     safebw = FALSE;
7394                 }
7395
7396                 /* if we saw a global override before, get the right name */
7397
7398                 if (!sv)
7399                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7400                                                 len);
7401                 if (gvp) {
7402                     SV * const tmp_sv = sv;
7403                     sv = newSVpvs("CORE::GLOBAL::");
7404                     sv_catsv(sv, tmp_sv);
7405                     SvREFCNT_dec(tmp_sv);
7406                 }
7407
7408
7409                 /* Presume this is going to be a bareword of some sort. */
7410                 CLINE;
7411                 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
7412                 pl_yylval.opval->op_private = OPpCONST_BARE;
7413
7414                 /* And if "Foo::", then that's what it certainly is. */
7415                 if (safebw)
7416                     goto safe_bareword;
7417
7418                 if (!off)
7419                 {
7420                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7421                     const_op->op_private = OPpCONST_BARE;
7422                     rv2cv_op =
7423                         newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7424                     cv = lex
7425                         ? isGV(gv)
7426                             ? GvCV(gv)
7427                             : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
7428                                 ? (CV *)SvRV(gv)
7429                                 : ((CV *)gv)
7430                         : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
7431                 }
7432
7433                 /* Use this var to track whether intuit_method has been
7434                    called.  intuit_method returns 0 or > 255.  */
7435                 tmp = 1;
7436
7437                 /* See if it's the indirect object for a list operator. */
7438
7439                 if (PL_oldoldbufptr
7440                     && PL_oldoldbufptr < PL_bufptr
7441                     && (PL_oldoldbufptr == PL_last_lop
7442                         || PL_oldoldbufptr == PL_last_uni)
7443                     && /* NO SKIPSPACE BEFORE HERE! */
7444                        (PL_expect == XREF
7445                         || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7446                                                                == OA_FILEREF))
7447                 {
7448                     bool immediate_paren = *s == '(';
7449                     SSize_t s_off;
7450
7451                     /* (Now we can afford to cross potential line boundary.) */
7452                     s = skipspace(s);
7453
7454                     /* intuit_method() can indirectly call lex_next_chunk(),
7455                      * invalidating s
7456                      */
7457                     s_off = s - SvPVX(PL_linestr);
7458                     /* Two barewords in a row may indicate method call. */
7459                     if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7460                             || *s == '$')
7461                         && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
7462                     {
7463                         /* the code at method: doesn't use s */
7464                         goto method;
7465                     }
7466                     s = SvPVX(PL_linestr) + s_off;
7467
7468                     /* If not a declared subroutine, it's an indirect object. */
7469                     /* (But it's an indir obj regardless for sort.) */
7470                     /* Also, if "_" follows a filetest operator, it's a bareword */
7471
7472                     if (
7473                         ( !immediate_paren && (PL_last_lop_op == OP_SORT
7474                          || (!cv
7475                              && (PL_last_lop_op != OP_MAPSTART
7476                                  && PL_last_lop_op != OP_GREPSTART))))
7477                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7478                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7479                                                             == OA_FILESTATOP))
7480                        )
7481                     {
7482                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7483                         goto bareword;
7484                     }
7485                 }
7486
7487                 PL_expect = XOPERATOR;
7488                 s = skipspace(s);
7489
7490                 /* Is this a word before a => operator? */
7491                 if (*s == '=' && s[1] == '>' && !pkgname) {
7492                     op_free(rv2cv_op);
7493                     CLINE;
7494                     if (gvp || (lex && !off)) {
7495                         assert (cSVOPx(pl_yylval.opval)->op_sv == sv);
7496                         /* This is our own scalar, created a few lines
7497                            above, so this is safe. */
7498                         SvREADONLY_off(sv);
7499                         sv_setpv(sv, PL_tokenbuf);
7500                         if (UTF && !IN_BYTES
7501                          && is_utf8_string((U8*)PL_tokenbuf, len))
7502                               SvUTF8_on(sv);
7503                         SvREADONLY_on(sv);
7504                     }
7505                     TERM(BAREWORD);
7506                 }
7507
7508                 /* If followed by a paren, it's certainly a subroutine. */
7509                 if (*s == '(') {
7510                     CLINE;
7511                     if (cv) {
7512                         d = s + 1;
7513                         while (SPACE_OR_TAB(*d))
7514                             d++;
7515                         if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7516                             s = d + 1;
7517                             goto its_constant;
7518                         }
7519                     }
7520                     NEXTVAL_NEXTTOKE.opval =
7521                         off ? rv2cv_op : pl_yylval.opval;
7522                     if (off)
7523                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
7524                     else op_free(rv2cv_op),        force_next(BAREWORD);
7525                     pl_yylval.ival = 0;
7526                     TOKEN('&');
7527                 }
7528
7529                 /* If followed by var or block, call it a method (unless sub) */
7530
7531                 if ((*s == '$' || *s == '{') && !cv) {
7532                     op_free(rv2cv_op);
7533                     PL_last_lop = PL_oldbufptr;
7534                     PL_last_lop_op = OP_METHOD;
7535                     if (!PL_lex_allbrackets
7536                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7537                     {
7538                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7539                     }
7540                     PL_expect = XBLOCKTERM;
7541                     PL_bufptr = s;
7542                     return REPORT(METHOD);
7543                 }
7544
7545                 /* If followed by a bareword, see if it looks like indir obj. */
7546
7547                 if (   tmp == 1
7548                     && !orig_keyword
7549                     && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7550                     && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
7551                 {
7552                   method:
7553                     if (lex && !off) {
7554                         assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
7555                         SvREADONLY_off(sv);
7556                         sv_setpvn(sv, PL_tokenbuf, len);
7557                         if (UTF && !IN_BYTES
7558                          && is_utf8_string((U8*)PL_tokenbuf, len))
7559                             SvUTF8_on (sv);
7560                         else SvUTF8_off(sv);
7561                     }
7562                     op_free(rv2cv_op);
7563                     if (tmp == METHOD && !PL_lex_allbrackets
7564                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7565                     {
7566                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7567                     }
7568                     return REPORT(tmp);
7569                 }
7570
7571                 /* Not a method, so call it a subroutine (if defined) */
7572
7573                 if (cv) {
7574                     /* Check for a constant sub */
7575                     if ((sv = cv_const_sv_or_av(cv))) {
7576                   its_constant:
7577                         op_free(rv2cv_op);
7578                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7579                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7580                         if (SvTYPE(sv) == SVt_PVAV)
7581                             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7582                                                       pl_yylval.opval);
7583                         else {
7584                             pl_yylval.opval->op_private = 0;
7585                             pl_yylval.opval->op_folded = 1;
7586                             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7587                         }
7588                         TOKEN(BAREWORD);
7589                     }
7590
7591                     op_free(pl_yylval.opval);
7592                     pl_yylval.opval =
7593                         off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7594                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7595                     PL_last_lop = PL_oldbufptr;
7596                     PL_last_lop_op = OP_ENTERSUB;
7597                     /* Is there a prototype? */
7598                     if (
7599                         SvPOK(cv))
7600                     {
7601                         STRLEN protolen = CvPROTOLEN(cv);
7602                         const char *proto = CvPROTO(cv);
7603                         bool optional;
7604                         proto = S_strip_spaces(aTHX_ proto, &protolen);
7605                         if (!protolen)
7606                             TERM(FUNC0SUB);
7607                         if ((optional = *proto == ';'))
7608                           do
7609                             proto++;
7610                           while (*proto == ';');
7611                         if (
7612                             (
7613                                 (
7614                                     *proto == '$' || *proto == '_'
7615                                  || *proto == '*' || *proto == '+'
7616                                 )
7617                              && proto[1] == '\0'
7618                             )
7619                          || (
7620                              *proto == '\\' && proto[1] && proto[2] == '\0'
7621                             )
7622                         )
7623                             UNIPROTO(UNIOPSUB,optional);
7624                         if (*proto == '\\' && proto[1] == '[') {
7625                             const char *p = proto + 2;
7626                             while(*p && *p != ']')
7627                                 ++p;
7628                             if(*p == ']' && !p[1])
7629                                 UNIPROTO(UNIOPSUB,optional);
7630                         }
7631                         if (*proto == '&' && *s == '{') {
7632                             if (PL_curstash)
7633                                 sv_setpvs(PL_subname, "__ANON__");
7634                             else
7635                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7636                             if (!PL_lex_allbrackets
7637                                 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7638                             {
7639                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7640                             }
7641                             PREBLOCK(LSTOPSUB);
7642                         }
7643                     }
7644                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7645                     PL_expect = XTERM;
7646                     force_next(off ? PRIVATEREF : BAREWORD);
7647                     if (!PL_lex_allbrackets
7648                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7649                     {
7650                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7651                     }
7652                     TOKEN(NOAMP);
7653                 }
7654
7655                 /* Call it a bare word */
7656
7657                 if (PL_hints & HINT_STRICT_SUBS)
7658                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
7659                 else {
7660                 bareword:
7661                     /* after "print" and similar functions (corresponding to
7662                      * "F? L" in opcode.pl), whatever wasn't already parsed as
7663                      * a filehandle should be subject to "strict subs".
7664                      * Likewise for the optional indirect-object argument to system
7665                      * or exec, which can't be a bareword */
7666                     if ((PL_last_lop_op == OP_PRINT
7667                             || PL_last_lop_op == OP_PRTF
7668                             || PL_last_lop_op == OP_SAY
7669                             || PL_last_lop_op == OP_SYSTEM
7670                             || PL_last_lop_op == OP_EXEC)
7671                             && (PL_hints & HINT_STRICT_SUBS))
7672                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7673                     if (lastchar != '-') {
7674                         if (ckWARN(WARN_RESERVED)) {
7675                             d = PL_tokenbuf;
7676                             while (isLOWER(*d))
7677                                 d++;
7678                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7679                             {
7680                                 /* PL_warn_reserved is constant */
7681                                 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7682                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7683                                        PL_tokenbuf);
7684                                 GCC_DIAG_RESTORE_STMT;
7685                             }
7686                         }
7687                     }
7688                 }
7689                 op_free(rv2cv_op);
7690
7691             safe_bareword:
7692                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7693                  && saw_infix_sigil) {
7694                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7695                                      "Operator or semicolon missing before %c%" UTF8f,
7696                                      lastchar,
7697                                      UTF8fARG(UTF, strlen(PL_tokenbuf),
7698                                               PL_tokenbuf));
7699                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7700                                      "Ambiguous use of %c resolved as operator %c",
7701                                      lastchar, lastchar);
7702                 }
7703                 TOKEN(BAREWORD);
7704             }
7705
7706         case KEY___FILE__:
7707             FUN0OP(
7708                 newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7709             );
7710
7711         case KEY___LINE__:
7712             FUN0OP(
7713                 newSVOP(OP_CONST, 0,
7714                     Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7715             );
7716
7717         case KEY___PACKAGE__:
7718             FUN0OP(
7719                 newSVOP(OP_CONST, 0,
7720                                         (PL_curstash
7721                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7722                                          : &PL_sv_undef))
7723             );
7724
7725         case KEY___DATA__:
7726         case KEY___END__: {
7727             GV *gv;
7728             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7729                 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7730                                         ? PL_curstash
7731                                         : PL_defstash;
7732                 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7733                 if (!isGV(gv))
7734                     gv_init(gv,stash,"DATA",4,0);
7735                 GvMULTI_on(gv);
7736                 if (!GvIO(gv))
7737                     GvIOp(gv) = newIO();
7738                 IoIFP(GvIOp(gv)) = PL_rsfp;
7739                 /* Mark this internal pseudo-handle as clean */
7740                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7741                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7742                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7743                 else
7744                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7745 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7746                 /* if the script was opened in binmode, we need to revert
7747                  * it to text mode for compatibility; but only iff it has CRs
7748                  * XXX this is a questionable hack at best. */
7749                 if (PL_bufend-PL_bufptr > 2
7750                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7751                 {
7752                     Off_t loc = 0;
7753                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7754                         loc = PerlIO_tell(PL_rsfp);
7755                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7756                     }
7757 #ifdef NETWARE
7758                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7759 #else
7760                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7761 #endif  /* NETWARE */
7762                         if (loc > 0)
7763                             PerlIO_seek(PL_rsfp, loc, 0);
7764                     }
7765                 }
7766 #endif
7767 #ifdef PERLIO_LAYERS
7768                 if (!IN_BYTES) {
7769                     if (UTF)
7770                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7771                 }
7772 #endif
7773                 PL_rsfp = NULL;
7774             }
7775             goto fake_eof;
7776         }
7777
7778         case KEY___SUB__:
7779             FUN0OP(CvCLONE(PL_compcv)
7780                         ? newOP(OP_RUNCV, 0)
7781                         : newPVOP(OP_RUNCV,0,NULL));
7782
7783         case KEY_AUTOLOAD:
7784         case KEY_DESTROY:
7785         case KEY_BEGIN:
7786         case KEY_UNITCHECK:
7787         case KEY_CHECK:
7788         case KEY_INIT:
7789         case KEY_END:
7790             if (PL_expect == XSTATE) {
7791                 s = PL_bufptr;
7792                 goto really_sub;
7793             }
7794             goto just_a_word;
7795
7796         case_KEY_CORE:
7797             {
7798                 STRLEN olen = len;
7799                 d = s;
7800                 s += 2;
7801                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7802                 if ((*s == ':' && s[1] == ':')
7803                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7804                 {
7805                     s = d;
7806                     len = olen;
7807                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7808                     goto just_a_word;
7809                 }
7810                 if (!tmp)
7811                     Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
7812                                       UTF8fARG(UTF, len, PL_tokenbuf));
7813                 if (tmp < 0)
7814                     tmp = -tmp;
7815                 else if (tmp == KEY_require || tmp == KEY_do
7816                       || tmp == KEY_glob)
7817                     /* that's a way to remember we saw "CORE::" */
7818                     orig_keyword = tmp;
7819                 goto reserved_word;
7820             }
7821
7822         case KEY_abs:
7823             UNI(OP_ABS);
7824
7825         case KEY_alarm:
7826             UNI(OP_ALARM);
7827
7828         case KEY_accept:
7829             LOP(OP_ACCEPT,XTERM);
7830
7831         case KEY_and:
7832             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7833                 return REPORT(0);
7834             OPERATOR(ANDOP);
7835
7836         case KEY_atan2:
7837             LOP(OP_ATAN2,XTERM);
7838
7839         case KEY_bind:
7840             LOP(OP_BIND,XTERM);
7841
7842         case KEY_binmode:
7843             LOP(OP_BINMODE,XTERM);
7844
7845         case KEY_bless:
7846             LOP(OP_BLESS,XTERM);
7847
7848         case KEY_break:
7849             FUN0(OP_BREAK);
7850
7851         case KEY_chop:
7852             UNI(OP_CHOP);
7853
7854         case KEY_continue:
7855                     /* We have to disambiguate the two senses of
7856                       "continue". If the next token is a '{' then
7857                       treat it as the start of a continue block;
7858                       otherwise treat it as a control operator.
7859                      */
7860                     s = skipspace(s);
7861                     if (*s == '{')
7862             PREBLOCK(CONTINUE);
7863                     else
7864                         FUN0(OP_CONTINUE);
7865
7866         case KEY_chdir:
7867             /* may use HOME */
7868             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7869             UNI(OP_CHDIR);
7870
7871         case KEY_close:
7872             UNI(OP_CLOSE);
7873
7874         case KEY_closedir:
7875             UNI(OP_CLOSEDIR);
7876
7877         case KEY_cmp:
7878             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7879                 return REPORT(0);
7880             Eop(OP_SCMP);
7881
7882         case KEY_caller:
7883             UNI(OP_CALLER);
7884
7885         case KEY_crypt:
7886 #ifdef FCRYPT
7887             if (!PL_cryptseen) {
7888                 PL_cryptseen = TRUE;
7889                 init_des();
7890             }
7891 #endif
7892             LOP(OP_CRYPT,XTERM);
7893
7894         case KEY_chmod:
7895             LOP(OP_CHMOD,XTERM);
7896
7897         case KEY_chown:
7898             LOP(OP_CHOWN,XTERM);
7899
7900         case KEY_connect:
7901             LOP(OP_CONNECT,XTERM);
7902
7903         case KEY_chr:
7904             UNI(OP_CHR);
7905
7906         case KEY_cos:
7907             UNI(OP_COS);
7908
7909         case KEY_chroot:
7910             UNI(OP_CHROOT);
7911
7912         case KEY_default:
7913             PREBLOCK(DEFAULT);
7914
7915         case KEY_do:
7916             s = skipspace(s);
7917             if (*s == '{')
7918                 PRETERMBLOCK(DO);
7919             if (*s != '\'') {
7920                 *PL_tokenbuf = '&';
7921                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7922                               1, &len);
7923                 if (len && memNEs(PL_tokenbuf+1, len, "CORE")
7924                  && !keyword(PL_tokenbuf + 1, len, 0)) {
7925                     SSize_t off = s-SvPVX(PL_linestr);
7926                     d = skipspace(d);
7927                     s = SvPVX(PL_linestr)+off;
7928                     if (*d == '(') {
7929                         force_ident_maybe_lex('&');
7930                         s = d;
7931                     }
7932                 }
7933             }
7934             if (orig_keyword == KEY_do) {
7935                 orig_keyword = 0;
7936                 pl_yylval.ival = 1;
7937             }
7938             else
7939                 pl_yylval.ival = 0;
7940             OPERATOR(DO);
7941
7942         case KEY_die:
7943             PL_hints |= HINT_BLOCK_SCOPE;
7944             LOP(OP_DIE,XTERM);
7945
7946         case KEY_defined:
7947             UNI(OP_DEFINED);
7948
7949         case KEY_delete:
7950             UNI(OP_DELETE);
7951
7952         case KEY_dbmopen:
7953             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7954                               STR_WITH_LEN("NDBM_File::"),
7955                               STR_WITH_LEN("DB_File::"),
7956                               STR_WITH_LEN("GDBM_File::"),
7957                               STR_WITH_LEN("SDBM_File::"),
7958                               STR_WITH_LEN("ODBM_File::"),
7959                               NULL);
7960             LOP(OP_DBMOPEN,XTERM);
7961
7962         case KEY_dbmclose:
7963             UNI(OP_DBMCLOSE);
7964
7965         case KEY_dump:
7966             LOOPX(OP_DUMP);
7967
7968         case KEY_else:
7969             PREBLOCK(ELSE);
7970
7971         case KEY_elsif:
7972             pl_yylval.ival = CopLINE(PL_curcop);
7973             OPERATOR(ELSIF);
7974
7975         case KEY_eq:
7976             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7977                 return REPORT(0);
7978             Eop(OP_SEQ);
7979
7980         case KEY_exists:
7981             UNI(OP_EXISTS);
7982
7983         case KEY_exit:
7984             UNI(OP_EXIT);
7985
7986         case KEY_eval:
7987             s = skipspace(s);
7988             if (*s == '{') { /* block eval */
7989                 PL_expect = XTERMBLOCK;
7990                 UNIBRACK(OP_ENTERTRY);
7991             }
7992             else { /* string eval */
7993                 PL_expect = XTERM;
7994                 UNIBRACK(OP_ENTEREVAL);
7995             }
7996
7997         case KEY_evalbytes:
7998             PL_expect = XTERM;
7999             UNIBRACK(-OP_ENTEREVAL);
8000
8001         case KEY_eof:
8002             UNI(OP_EOF);
8003
8004         case KEY_exp:
8005             UNI(OP_EXP);
8006
8007         case KEY_each:
8008             UNI(OP_EACH);
8009
8010         case KEY_exec:
8011             LOP(OP_EXEC,XREF);
8012
8013         case KEY_endhostent:
8014             FUN0(OP_EHOSTENT);
8015
8016         case KEY_endnetent:
8017             FUN0(OP_ENETENT);
8018
8019         case KEY_endservent:
8020             FUN0(OP_ESERVENT);
8021
8022         case KEY_endprotoent:
8023             FUN0(OP_EPROTOENT);
8024
8025         case KEY_endpwent:
8026             FUN0(OP_EPWENT);
8027
8028         case KEY_endgrent:
8029             FUN0(OP_EGRENT);
8030
8031         case KEY_for:
8032         case KEY_foreach:
8033             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8034                 return REPORT(0);
8035             pl_yylval.ival = CopLINE(PL_curcop);
8036             s = skipspace(s);
8037             if (   PL_expect == XSTATE
8038                 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
8039             {
8040                 char *p = s;
8041                 SSize_t s_off = s - SvPVX(PL_linestr);
8042
8043                 if (   memBEGINPs(p, (STRLEN) (PL_bufend - p), "my")
8044                     && isSPACE(*(p + 2)))
8045                 {
8046                     p += 2;
8047                 }
8048                 else if (   memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")
8049                          && isSPACE(*(p + 3)))
8050                 {
8051                     p += 3;
8052                 }
8053
8054                 p = skipspace(p);
8055                 /* skip optional package name, as in "for my abc $x (..)" */
8056                 if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
8057                     p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8058                     p = skipspace(p);
8059                 }
8060                 if (*p != '$' && *p != '\\')
8061                     Perl_croak(aTHX_ "Missing $ on loop variable");
8062
8063                 /* The buffer may have been reallocated, update s */
8064                 s = SvPVX(PL_linestr) + s_off;
8065             }
8066             OPERATOR(FOR);
8067
8068         case KEY_formline:
8069             LOP(OP_FORMLINE,XTERM);
8070
8071         case KEY_fork:
8072             FUN0(OP_FORK);
8073
8074         case KEY_fc:
8075             UNI(OP_FC);
8076
8077         case KEY_fcntl:
8078             LOP(OP_FCNTL,XTERM);
8079
8080         case KEY_fileno:
8081             UNI(OP_FILENO);
8082
8083         case KEY_flock:
8084             LOP(OP_FLOCK,XTERM);
8085
8086         case KEY_gt:
8087             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8088                 return REPORT(0);
8089             Rop(OP_SGT);
8090
8091         case KEY_ge:
8092             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8093                 return REPORT(0);
8094             Rop(OP_SGE);
8095
8096         case KEY_grep:
8097             LOP(OP_GREPSTART, XREF);
8098
8099         case KEY_goto:
8100             LOOPX(OP_GOTO);
8101
8102         case KEY_gmtime:
8103             UNI(OP_GMTIME);
8104
8105         case KEY_getc:
8106             UNIDOR(OP_GETC);
8107
8108         case KEY_getppid:
8109             FUN0(OP_GETPPID);
8110
8111         case KEY_getpgrp:
8112             UNI(OP_GETPGRP);
8113
8114         case KEY_getpriority:
8115             LOP(OP_GETPRIORITY,XTERM);
8116
8117         case KEY_getprotobyname:
8118             UNI(OP_GPBYNAME);
8119
8120         case KEY_getprotobynumber:
8121             LOP(OP_GPBYNUMBER,XTERM);
8122
8123         case KEY_getprotoent:
8124             FUN0(OP_GPROTOENT);
8125
8126         case KEY_getpwent:
8127             FUN0(OP_GPWENT);
8128
8129         case KEY_getpwnam:
8130             UNI(OP_GPWNAM);
8131
8132         case KEY_getpwuid:
8133             UNI(OP_GPWUID);
8134
8135         case KEY_getpeername:
8136             UNI(OP_GETPEERNAME);
8137
8138         case KEY_gethostbyname:
8139             UNI(OP_GHBYNAME);
8140
8141         case KEY_gethostbyaddr:
8142             LOP(OP_GHBYADDR,XTERM);
8143
8144         case KEY_gethostent:
8145             FUN0(OP_GHOSTENT);
8146
8147         case KEY_getnetbyname:
8148             UNI(OP_GNBYNAME);
8149
8150         case KEY_getnetbyaddr:
8151             LOP(OP_GNBYADDR,XTERM);
8152
8153         case KEY_getnetent:
8154             FUN0(OP_GNETENT);
8155
8156         case KEY_getservbyname:
8157             LOP(OP_GSBYNAME,XTERM);
8158
8159         case KEY_getservbyport:
8160             LOP(OP_GSBYPORT,XTERM);
8161
8162         case KEY_getservent:
8163             FUN0(OP_GSERVENT);
8164
8165         case KEY_getsockname:
8166             UNI(OP_GETSOCKNAME);
8167
8168         case KEY_getsockopt:
8169             LOP(OP_GSOCKOPT,XTERM);
8170
8171         case KEY_getgrent:
8172             FUN0(OP_GGRENT);
8173
8174         case KEY_getgrnam:
8175             UNI(OP_GGRNAM);
8176
8177         case KEY_getgrgid:
8178             UNI(OP_GGRGID);
8179
8180         case KEY_getlogin:
8181             FUN0(OP_GETLOGIN);
8182
8183         case KEY_given:
8184             pl_yylval.ival = CopLINE(PL_curcop);
8185             Perl_ck_warner_d(aTHX_
8186                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8187                 "given is experimental");
8188             OPERATOR(GIVEN);
8189
8190         case KEY_glob:
8191             LOP(
8192              orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
8193              XTERM
8194             );
8195
8196         case KEY_hex:
8197             UNI(OP_HEX);
8198
8199         case KEY_if:
8200             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8201                 return REPORT(0);
8202             pl_yylval.ival = CopLINE(PL_curcop);
8203             OPERATOR(IF);
8204
8205         case KEY_index:
8206             LOP(OP_INDEX,XTERM);
8207
8208         case KEY_int:
8209             UNI(OP_INT);
8210
8211         case KEY_ioctl:
8212             LOP(OP_IOCTL,XTERM);
8213
8214         case KEY_join:
8215             LOP(OP_JOIN,XTERM);
8216
8217         case KEY_keys:
8218             UNI(OP_KEYS);
8219
8220         case KEY_kill:
8221             LOP(OP_KILL,XTERM);
8222
8223         case KEY_last:
8224             LOOPX(OP_LAST);
8225
8226         case KEY_lc:
8227             UNI(OP_LC);
8228
8229         case KEY_lcfirst:
8230             UNI(OP_LCFIRST);
8231
8232         case KEY_local:
8233             OPERATOR(LOCAL);
8234
8235         case KEY_length:
8236             UNI(OP_LENGTH);
8237
8238         case KEY_lt:
8239             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8240                 return REPORT(0);
8241             Rop(OP_SLT);
8242
8243         case KEY_le:
8244             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8245                 return REPORT(0);
8246             Rop(OP_SLE);
8247
8248         case KEY_localtime:
8249             UNI(OP_LOCALTIME);
8250
8251         case KEY_log:
8252             UNI(OP_LOG);
8253
8254         case KEY_link:
8255             LOP(OP_LINK,XTERM);
8256
8257         case KEY_listen:
8258             LOP(OP_LISTEN,XTERM);
8259
8260         case KEY_lock:
8261             UNI(OP_LOCK);
8262
8263         case KEY_lstat:
8264             UNI(OP_LSTAT);
8265
8266         case KEY_m:
8267             s = scan_pat(s,OP_MATCH);
8268             TERM(sublex_start());
8269
8270         case KEY_map:
8271             LOP(OP_MAPSTART, XREF);
8272
8273         case KEY_mkdir:
8274             LOP(OP_MKDIR,XTERM);
8275
8276         case KEY_msgctl:
8277             LOP(OP_MSGCTL,XTERM);
8278
8279         case KEY_msgget:
8280             LOP(OP_MSGGET,XTERM);
8281
8282         case KEY_msgrcv:
8283             LOP(OP_MSGRCV,XTERM);
8284
8285         case KEY_msgsnd:
8286             LOP(OP_MSGSND,XTERM);
8287
8288         case KEY_our:
8289         case KEY_my:
8290         case KEY_state:
8291             if (PL_in_my) {
8292                 PL_bufptr = s;
8293                 yyerror(Perl_form(aTHX_
8294                                   "Can't redeclare \"%s\" in \"%s\"",
8295                                    tmp      == KEY_my    ? "my" :
8296                                    tmp      == KEY_state ? "state" : "our",
8297                                    PL_in_my == KEY_my    ? "my" :
8298                                    PL_in_my == KEY_state ? "state" : "our"));
8299             }
8300             PL_in_my = (U16)tmp;
8301             s = skipspace(s);
8302             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8303                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8304                 if (memEQs(PL_tokenbuf, len, "sub"))
8305                     goto really_sub;
8306                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8307                 if (!PL_in_my_stash) {
8308                     char tmpbuf[1024];
8309                     int len;
8310                     PL_bufptr = s;
8311                     len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8312                     PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
8313                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8314                 }
8315             }
8316             else if (*s == '\\') {
8317                 if (!FEATURE_MYREF_IS_ENABLED)
8318                     Perl_croak(aTHX_ "The experimental declared_refs "
8319                                      "feature is not enabled");
8320                 Perl_ck_warner_d(aTHX_
8321                      packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
8322                     "Declaring references is experimental");
8323             }
8324             OPERATOR(MY);
8325
8326         case KEY_next:
8327             LOOPX(OP_NEXT);
8328
8329         case KEY_ne:
8330             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8331                 return REPORT(0);
8332             Eop(OP_SNE);
8333
8334         case KEY_no:
8335             s = tokenize_use(0, s);
8336             TOKEN(USE);
8337
8338         case KEY_not:
8339             if (*s == '(' || (s = skipspace(s), *s == '('))
8340                 FUN1(OP_NOT);
8341             else {
8342                 if (!PL_lex_allbrackets
8343                     && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8344                 {
8345                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8346                 }
8347                 OPERATOR(NOTOP);
8348             }
8349
8350         case KEY_open:
8351             s = skipspace(s);
8352             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8353                 const char *t;
8354                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8355                               &len);
8356                 for (t=d; isSPACE(*t);)
8357                     t++;
8358                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8359                     /* [perl #16184] */
8360                     && !(t[0] == '=' && t[1] == '>')
8361                     && !(t[0] == ':' && t[1] == ':')
8362                     && !keyword(s, d-s, 0)
8363                 ) {
8364                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8365                        "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8366                         UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8367                 }
8368             }
8369             LOP(OP_OPEN,XTERM);
8370
8371         case KEY_or:
8372             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8373                 return REPORT(0);
8374             pl_yylval.ival = OP_OR;
8375             OPERATOR(OROP);
8376
8377         case KEY_ord:
8378             UNI(OP_ORD);
8379
8380         case KEY_oct:
8381             UNI(OP_OCT);
8382
8383         case KEY_opendir:
8384             LOP(OP_OPEN_DIR,XTERM);
8385
8386         case KEY_print:
8387             checkcomma(s,PL_tokenbuf,"filehandle");
8388             LOP(OP_PRINT,XREF);
8389
8390         case KEY_printf:
8391             checkcomma(s,PL_tokenbuf,"filehandle");
8392             LOP(OP_PRTF,XREF);
8393
8394         case KEY_prototype:
8395             UNI(OP_PROTOTYPE);
8396
8397         case KEY_push:
8398             LOP(OP_PUSH,XTERM);
8399
8400         case KEY_pop:
8401             UNIDOR(OP_POP);
8402
8403         case KEY_pos:
8404             UNIDOR(OP_POS);
8405
8406         case KEY_pack:
8407             LOP(OP_PACK,XTERM);
8408
8409         case KEY_package:
8410             s = force_word(s,BAREWORD,FALSE,TRUE);
8411             s = skipspace(s);
8412             s = force_strict_version(s);
8413             PREBLOCK(PACKAGE);
8414
8415         case KEY_pipe:
8416             LOP(OP_PIPE_OP,XTERM);
8417
8418         case KEY_q:
8419             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8420             if (!s)
8421                 missingterm(NULL, 0);
8422             COPLINE_SET_FROM_MULTI_END;
8423             pl_yylval.ival = OP_CONST;
8424             TERM(sublex_start());
8425
8426         case KEY_quotemeta:
8427             UNI(OP_QUOTEMETA);
8428
8429         case KEY_qw: {
8430             OP *words = NULL;
8431             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8432             if (!s)
8433                 missingterm(NULL, 0);
8434             COPLINE_SET_FROM_MULTI_END;
8435             PL_expect = XOPERATOR;
8436             if (SvCUR(PL_lex_stuff)) {
8437                 int warned_comma = !ckWARN(WARN_QW);
8438                 int warned_comment = warned_comma;
8439                 d = SvPV_force(PL_lex_stuff, len);
8440                 while (len) {
8441                     for (; isSPACE(*d) && len; --len, ++d)
8442                         /**/;
8443                     if (len) {
8444                         SV *sv;
8445                         const char *b = d;
8446                         if (!warned_comma || !warned_comment) {
8447                             for (; !isSPACE(*d) && len; --len, ++d) {
8448                                 if (!warned_comma && *d == ',') {
8449                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8450                                         "Possible attempt to separate words with commas");
8451                                     ++warned_comma;
8452                                 }
8453                                 else if (!warned_comment && *d == '#') {
8454                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8455                                         "Possible attempt to put comments in qw() list");
8456                                     ++warned_comment;
8457                                 }
8458                             }
8459                         }
8460                         else {
8461                             for (; !isSPACE(*d) && len; --len, ++d)
8462                                 /**/;
8463                         }
8464                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8465                         words = op_append_elem(OP_LIST, words,
8466                                             newSVOP(OP_CONST, 0, tokeq(sv)));
8467                     }
8468                 }
8469             }
8470             if (!words)
8471                 words = newNULLLIST();
8472             SvREFCNT_dec_NN(PL_lex_stuff);
8473             PL_lex_stuff = NULL;
8474             PL_expect = XOPERATOR;
8475             pl_yylval.opval = sawparens(words);
8476             TOKEN(QWLIST);
8477         }
8478
8479         case KEY_qq:
8480             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8481             if (!s)
8482                 missingterm(NULL, 0);
8483             pl_yylval.ival = OP_STRINGIFY;
8484             if (SvIVX(PL_lex_stuff) == '\'')
8485                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
8486             TERM(sublex_start());
8487
8488         case KEY_qr:
8489             s = scan_pat(s,OP_QR);
8490             TERM(sublex_start());
8491
8492         case KEY_qx:
8493             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8494             if (!s)
8495                 missingterm(NULL, 0);
8496             pl_yylval.ival = OP_BACKTICK;
8497             TERM(sublex_start());
8498
8499         case KEY_return:
8500             OLDLOP(OP_RETURN);
8501
8502         case KEY_require:
8503             s = skipspace(s);
8504             if (isDIGIT(*s)) {
8505                 s = force_version(s, FALSE);
8506             }
8507             else if (*s != 'v' || !isDIGIT(s[1])
8508                     || (s = force_version(s, TRUE), *s == 'v'))
8509             {
8510                 *PL_tokenbuf = '\0';
8511                 s = force_word(s,BAREWORD,TRUE,TRUE);
8512                 if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
8513                                            PL_tokenbuf + sizeof(PL_tokenbuf),
8514                                            UTF))
8515                 {
8516                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8517                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
8518                 }
8519                 else if (*s == '<')
8520                     yyerror("<> at require-statement should be quotes");
8521             }
8522             if (orig_keyword == KEY_require) {
8523                 orig_keyword = 0;
8524                 pl_yylval.ival = 1;
8525             }
8526             else
8527                 pl_yylval.ival = 0;
8528             PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
8529             PL_bufptr = s;
8530             PL_last_uni = PL_oldbufptr;
8531             PL_last_lop_op = OP_REQUIRE;
8532             s = skipspace(s);
8533             return REPORT( (int)REQUIRE );
8534
8535         case KEY_reset:
8536             UNI(OP_RESET);
8537
8538         case KEY_redo:
8539             LOOPX(OP_REDO);
8540
8541         case KEY_rename:
8542             LOP(OP_RENAME,XTERM);
8543
8544         case KEY_rand:
8545             UNI(OP_RAND);
8546
8547         case KEY_rmdir:
8548             UNI(OP_RMDIR);
8549
8550         case KEY_rindex:
8551             LOP(OP_RINDEX,XTERM);
8552
8553         case KEY_read:
8554             LOP(OP_READ,XTERM);
8555
8556         case KEY_readdir:
8557             UNI(OP_READDIR);
8558
8559         case KEY_readline:
8560             UNIDOR(OP_READLINE);
8561
8562         case KEY_readpipe:
8563             UNIDOR(OP_BACKTICK);
8564
8565         case KEY_rewinddir:
8566             UNI(OP_REWINDDIR);
8567
8568         case KEY_recv:
8569             LOP(OP_RECV,XTERM);
8570
8571         case KEY_reverse:
8572             LOP(OP_REVERSE,XTERM);
8573
8574         case KEY_readlink:
8575             UNIDOR(OP_READLINK);
8576
8577         case KEY_ref:
8578             UNI(OP_REF);
8579
8580         case KEY_s:
8581             s = scan_subst(s);
8582             if (pl_yylval.opval)
8583                 TERM(sublex_start());
8584             else
8585                 TOKEN(1);       /* force error */
8586
8587         case KEY_say:
8588             checkcomma(s,PL_tokenbuf,"filehandle");
8589             LOP(OP_SAY,XREF);
8590
8591         case KEY_chomp:
8592             UNI(OP_CHOMP);
8593
8594         case KEY_scalar:
8595             UNI(OP_SCALAR);
8596
8597         case KEY_select:
8598             LOP(OP_SELECT,XTERM);
8599
8600         case KEY_seek:
8601             LOP(OP_SEEK,XTERM);
8602
8603         case KEY_semctl:
8604             LOP(OP_SEMCTL,XTERM);
8605
8606         case KEY_semget:
8607             LOP(OP_SEMGET,XTERM);
8608
8609         case KEY_semop:
8610             LOP(OP_SEMOP,XTERM);
8611
8612         case KEY_send:
8613             LOP(OP_SEND,XTERM);
8614
8615         case KEY_setpgrp:
8616             LOP(OP_SETPGRP,XTERM);
8617
8618         case KEY_setpriority:
8619             LOP(OP_SETPRIORITY,XTERM);
8620
8621         case KEY_sethostent:
8622             UNI(OP_SHOSTENT);
8623
8624         case KEY_setnetent:
8625             UNI(OP_SNETENT);
8626
8627         case KEY_setservent:
8628             UNI(OP_SSERVENT);
8629
8630         case KEY_setprotoent:
8631             UNI(OP_SPROTOENT);
8632
8633         case KEY_setpwent:
8634             FUN0(OP_SPWENT);
8635
8636         case KEY_setgrent:
8637             FUN0(OP_SGRENT);
8638
8639         case KEY_seekdir:
8640             LOP(OP_SEEKDIR,XTERM);
8641
8642         case KEY_setsockopt:
8643             LOP(OP_SSOCKOPT,XTERM);
8644
8645         case KEY_shift:
8646             UNIDOR(OP_SHIFT);
8647
8648         case KEY_shmctl:
8649             LOP(OP_SHMCTL,XTERM);
8650
8651         case KEY_shmget:
8652             LOP(OP_SHMGET,XTERM);
8653
8654         case KEY_shmread:
8655             LOP(OP_SHMREAD,XTERM);
8656
8657         case KEY_shmwrite:
8658             LOP(OP_SHMWRITE,XTERM);
8659
8660         case KEY_shutdown:
8661             LOP(OP_SHUTDOWN,XTERM);
8662
8663         case KEY_sin:
8664             UNI(OP_SIN);
8665
8666         case KEY_sleep:
8667             UNI(OP_SLEEP);
8668
8669         case KEY_socket:
8670             LOP(OP_SOCKET,XTERM);
8671
8672         case KEY_socketpair:
8673             LOP(OP_SOCKPAIR,XTERM);
8674
8675         case KEY_sort:
8676             checkcomma(s,PL_tokenbuf,"subroutine name");
8677             s = skipspace(s);
8678             PL_expect = XTERM;
8679             s = force_word(s,BAREWORD,TRUE,TRUE);
8680             LOP(OP_SORT,XREF);
8681
8682         case KEY_split:
8683             LOP(OP_SPLIT,XTERM);
8684
8685         case KEY_sprintf:
8686             LOP(OP_SPRINTF,XTERM);
8687
8688         case KEY_splice:
8689             LOP(OP_SPLICE,XTERM);
8690
8691         case KEY_sqrt:
8692             UNI(OP_SQRT);
8693
8694         case KEY_srand:
8695             UNI(OP_SRAND);
8696
8697         case KEY_stat:
8698             UNI(OP_STAT);
8699
8700         case KEY_study:
8701             UNI(OP_STUDY);
8702
8703         case KEY_substr:
8704             LOP(OP_SUBSTR,XTERM);
8705
8706         case KEY_format:
8707         case KEY_sub:
8708           really_sub:
8709             {
8710                 char * const tmpbuf = PL_tokenbuf + 1;
8711                 bool have_name, have_proto;
8712                 const int key = tmp;
8713                 SV *format_name = NULL;
8714                 bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
8715
8716                 SSize_t off = s-SvPVX(PL_linestr);
8717                 s = skipspace(s);
8718                 d = SvPVX(PL_linestr)+off;
8719
8720                 SAVEBOOL(PL_parser->sig_seen);
8721                 PL_parser->sig_seen = FALSE;
8722
8723                 if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
8724                     || *s == '\''
8725                     || (*s == ':' && s[1] == ':'))
8726                 {
8727
8728                     PL_expect = XATTRBLOCK;
8729                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8730                                   &len);
8731                     if (key == KEY_format)
8732                         format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8733                     *PL_tokenbuf = '&';
8734                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
8735                      || pad_findmy_pvn(
8736                             PL_tokenbuf, len + 1, 0
8737                         ) != NOT_IN_PAD)
8738                         sv_setpvn(PL_subname, tmpbuf, len);
8739                     else {
8740                         sv_setsv(PL_subname,PL_curstname);
8741                         sv_catpvs(PL_subname,"::");
8742                         sv_catpvn(PL_subname,tmpbuf,len);
8743                     }
8744                     if (SvUTF8(PL_linestr))
8745                         SvUTF8_on(PL_subname);
8746                     have_name = TRUE;
8747
8748
8749                     s = skipspace(d);
8750                 }
8751                 else {
8752                     if (key == KEY_my || key == KEY_our || key==KEY_state)
8753                     {
8754                         *d = '\0';
8755                         /* diag_listed_as: Missing name in "%s sub" */
8756                         Perl_croak(aTHX_
8757                                   "Missing name in \"%s\"", PL_bufptr);
8758                     }
8759                     PL_expect = XATTRTERM;
8760                     sv_setpvs(PL_subname,"?");
8761                     have_name = FALSE;
8762                 }
8763
8764                 if (key == KEY_format) {
8765                     if (format_name) {
8766                         NEXTVAL_NEXTTOKE.opval
8767                             = newSVOP(OP_CONST,0, format_name);
8768                         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8769                         force_next(BAREWORD);
8770                     }
8771                     PREBLOCK(FORMAT);
8772                 }
8773
8774                 /* Look for a prototype */
8775                 if (*s == '(' && !is_sigsub) {
8776                     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8777                     if (!s)
8778                         Perl_croak(aTHX_ "Prototype not terminated");
8779                     COPLINE_SET_FROM_MULTI_END;
8780                     (void)validate_proto(PL_subname, PL_lex_stuff,
8781                                          ckWARN(WARN_ILLEGALPROTO), 0);
8782                     have_proto = TRUE;
8783
8784                     s = skipspace(s);
8785                 }
8786                 else
8787                     have_proto = FALSE;
8788
8789                 if (  !(*s == ':' && s[1] != ':')
8790                     && (*s != '{' && *s != '(') && key != KEY_format)
8791                 {
8792                     assert(key == KEY_sub || key == KEY_AUTOLOAD ||
8793                            key == KEY_DESTROY || key == KEY_BEGIN ||
8794                            key == KEY_UNITCHECK || key == KEY_CHECK ||
8795                            key == KEY_INIT || key == KEY_END ||
8796                            key == KEY_my || key == KEY_state ||
8797                            key == KEY_our);
8798                     if (!have_name)
8799                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8800                     else if (*s != ';' && *s != '}')
8801                         Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
8802                 }
8803
8804                 if (have_proto) {
8805                     NEXTVAL_NEXTTOKE.opval =
8806                         newSVOP(OP_CONST, 0, PL_lex_stuff);
8807                     PL_lex_stuff = NULL;
8808                     force_next(THING);
8809                 }
8810                 if (!have_name) {
8811                     if (PL_curstash)
8812                         sv_setpvs(PL_subname, "__ANON__");
8813                     else
8814                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8815                     if (is_sigsub)
8816                         TOKEN(ANON_SIGSUB);
8817                     else
8818                         TOKEN(ANONSUB);
8819                 }
8820                 force_ident_maybe_lex('&');
8821                 if (is_sigsub)
8822                     TOKEN(SIGSUB);
8823                 else
8824                     TOKEN(SUB);
8825             }
8826
8827         case KEY_system:
8828             LOP(OP_SYSTEM,XREF);
8829
8830         case KEY_symlink:
8831             LOP(OP_SYMLINK,XTERM);
8832
8833         case KEY_syscall:
8834             LOP(OP_SYSCALL,XTERM);
8835
8836         case KEY_sysopen:
8837             LOP(OP_SYSOPEN,XTERM);
8838
8839         case KEY_sysseek:
8840             LOP(OP_SYSSEEK,XTERM);
8841
8842         case KEY_sysread:
8843             LOP(OP_SYSREAD,XTERM);
8844
8845         case KEY_syswrite:
8846             LOP(OP_SYSWRITE,XTERM);
8847
8848         case KEY_tr:
8849         case KEY_y:
8850             s = scan_trans(s);
8851             TERM(sublex_start());
8852
8853         case KEY_tell:
8854             UNI(OP_TELL);
8855
8856         case KEY_telldir:
8857             UNI(OP_TELLDIR);
8858
8859         case KEY_tie:
8860             LOP(OP_TIE,XTERM);
8861
8862         case KEY_tied:
8863             UNI(OP_TIED);
8864
8865         case KEY_time:
8866             FUN0(OP_TIME);
8867
8868         case KEY_times:
8869             FUN0(OP_TMS);
8870
8871         case KEY_truncate:
8872             LOP(OP_TRUNCATE,XTERM);
8873
8874         case KEY_uc:
8875             UNI(OP_UC);
8876
8877         case KEY_ucfirst:
8878             UNI(OP_UCFIRST);
8879
8880         case KEY_untie:
8881             UNI(OP_UNTIE);
8882
8883         case KEY_until:
8884             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8885                 return REPORT(0);
8886             pl_yylval.ival = CopLINE(PL_curcop);
8887             OPERATOR(UNTIL);
8888
8889         case KEY_unless:
8890             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8891                 return REPORT(0);
8892             pl_yylval.ival = CopLINE(PL_curcop);
8893             OPERATOR(UNLESS);
8894
8895         case KEY_unlink:
8896             LOP(OP_UNLINK,XTERM);
8897
8898         case KEY_undef:
8899             UNIDOR(OP_UNDEF);
8900
8901         case KEY_unpack:
8902             LOP(OP_UNPACK,XTERM);
8903
8904         case KEY_utime:
8905             LOP(OP_UTIME,XTERM);
8906
8907         case KEY_umask:
8908             UNIDOR(OP_UMASK);
8909
8910         case KEY_unshift:
8911             LOP(OP_UNSHIFT,XTERM);
8912
8913         case KEY_use:
8914             s = tokenize_use(1, s);
8915             TOKEN(USE);
8916
8917         case KEY_values:
8918             UNI(OP_VALUES);
8919
8920         case KEY_vec:
8921             LOP(OP_VEC,XTERM);
8922
8923         case KEY_when:
8924             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8925                 return REPORT(0);
8926             pl_yylval.ival = CopLINE(PL_curcop);
8927             Perl_ck_warner_d(aTHX_
8928                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8929                 "when is experimental");
8930             OPERATOR(WHEN);
8931
8932         case KEY_while:
8933             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8934                 return REPORT(0);
8935             pl_yylval.ival = CopLINE(PL_curcop);
8936             OPERATOR(WHILE);
8937
8938         case KEY_warn:
8939             PL_hints |= HINT_BLOCK_SCOPE;
8940             LOP(OP_WARN,XTERM);
8941
8942         case KEY_wait:
8943             FUN0(OP_WAIT);
8944
8945         case KEY_waitpid:
8946             LOP(OP_WAITPID,XTERM);
8947
8948         case KEY_wantarray:
8949             FUN0(OP_WANTARRAY);
8950
8951         case KEY_write:
8952             /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8953              * we use the same number on EBCDIC */
8954             gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8955             UNI(OP_ENTERWRITE);
8956
8957         case KEY_x:
8958             if (PL_expect == XOPERATOR) {
8959                 if (*s == '=' && !PL_lex_allbrackets
8960                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8961                 {
8962                     return REPORT(0);
8963                 }
8964                 Mop(OP_REPEAT);
8965             }
8966             check_uni();
8967             goto just_a_word;
8968
8969         case KEY_xor:
8970             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8971                 return REPORT(0);
8972             pl_yylval.ival = OP_XOR;
8973             OPERATOR(OROP);
8974         }
8975     }}
8976 }
8977
8978 /*
8979   S_pending_ident
8980
8981   Looks up an identifier in the pad or in a package
8982
8983   is_sig indicates that this is a subroutine signature variable
8984   rather than a plain pad var.
8985
8986   Returns:
8987     PRIVATEREF if this is a lexical name.
8988     BAREWORD   if this belongs to a package.
8989
8990   Structure:
8991       if we're in a my declaration
8992           croak if they tried to say my($foo::bar)
8993           build the ops for a my() declaration
8994       if it's an access to a my() variable
8995           build ops for access to a my() variable
8996       if in a dq string, and they've said @foo and we can't find @foo
8997           warn
8998       build ops for a bareword
8999 */
9000
9001 static int
9002 S_pending_ident(pTHX)
9003 {
9004     PADOFFSET tmp = 0;
9005     const char pit = (char)pl_yylval.ival;
9006     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9007     /* All routes through this function want to know if there is a colon.  */
9008     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9009
9010     DEBUG_T({ PerlIO_printf(Perl_debug_log,
9011           "### Pending identifier '%s'\n", PL_tokenbuf); });
9012     assert(tokenbuf_len >= 2);
9013
9014     /* if we're in a my(), we can't allow dynamics here.
9015        $foo'bar has already been turned into $foo::bar, so
9016        just check for colons.
9017
9018        if it's a legal name, the OP is a PADANY.
9019     */
9020     if (PL_in_my) {
9021         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
9022             if (has_colon)
9023                 /* diag_listed_as: No package name allowed for variable %s
9024                                    in "our" */
9025                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9026                                   "%se %s in \"our\"",
9027                                   *PL_tokenbuf=='&' ?"subroutin":"variabl",
9028                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9029             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9030         }
9031         else {
9032             OP *o;
9033             if (has_colon) {
9034                 /* "my" variable %s can't be in a package */
9035                 /* PL_no_myglob is constant */
9036                 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
9037                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9038                             PL_in_my == KEY_my ? "my" : "state",
9039                             *PL_tokenbuf == '&' ? "subroutin" : "variabl",
9040                             PL_tokenbuf),
9041                             UTF ? SVf_UTF8 : 0);
9042                 GCC_DIAG_RESTORE_STMT;
9043             }
9044
9045             if (PL_in_my == KEY_sigvar) {
9046                 /* A signature 'padop' needs in addition, an op_first to
9047                  * point to a child sigdefelem, and an extra field to hold
9048                  * the signature index. We can achieve both by using an
9049                  * UNOP_AUX and (ab)using the op_aux field to hold the
9050                  * index. If we ever need more fields, use a real malloced
9051                  * aux strut instead.
9052                  */
9053                 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
9054                                     INT2PTR(UNOP_AUX_item *,
9055                                         (PL_parser->sig_elems)));
9056                 o->op_private |= (  PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
9057                                   : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
9058                                   :                         OPpARGELEM_HV);
9059             }
9060             else
9061                 o = newOP(OP_PADANY, 0);
9062             o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9063                                                         UTF ? SVf_UTF8 : 0);
9064             if (PL_in_my == KEY_sigvar)
9065                 PL_in_my = 0;
9066
9067             pl_yylval.opval = o;
9068             return PRIVATEREF;
9069         }
9070     }
9071
9072     /*
9073        build the ops for accesses to a my() variable.
9074     */
9075
9076     if (!has_colon) {
9077         if (!PL_in_my)
9078             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9079                                  0);
9080         if (tmp != NOT_IN_PAD) {
9081             /* might be an "our" variable" */
9082             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9083                 /* build ops for a bareword */
9084                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
9085                 HEK * const stashname = HvNAME_HEK(stash);
9086                 SV *  const sym = newSVhek(stashname);
9087                 sv_catpvs(sym, "::");
9088                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9089                 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9090                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9091                 if (pit != '&')
9092                   gv_fetchsv(sym,
9093                     GV_ADDMULTI,
9094                     ((PL_tokenbuf[0] == '$') ? SVt_PV
9095                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9096                      : SVt_PVHV));
9097                 return BAREWORD;
9098             }
9099
9100             pl_yylval.opval = newOP(OP_PADANY, 0);
9101             pl_yylval.opval->op_targ = tmp;
9102             return PRIVATEREF;
9103         }
9104     }
9105
9106     /*
9107        Whine if they've said @foo or @foo{key} in a doublequoted string,
9108        and @foo (or %foo) isn't a variable we can find in the symbol
9109        table.
9110     */
9111     if (ckWARN(WARN_AMBIGUOUS)
9112         && pit == '@'
9113         && PL_lex_state != LEX_NORMAL
9114         && !PL_lex_brackets)
9115     {
9116         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9117                                          ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9118                                          SVt_PVAV);
9119         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9120            )
9121         {
9122             /* Downgraded from fatal to warning 20000522 mjd */
9123             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9124                         "Possible unintended interpolation of %" UTF8f
9125                         " in string",
9126                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9127         }
9128     }
9129
9130     /* build ops for a bareword */
9131     pl_yylval.opval = newSVOP(OP_CONST, 0,
9132                                    newSVpvn_flags(PL_tokenbuf + 1,
9133                                                       tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9134                                                       UTF ? SVf_UTF8 : 0 ));
9135     pl_yylval.opval->op_private = OPpCONST_ENTERED;
9136     if (pit != '&')
9137         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9138                      (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9139                      | ( UTF ? SVf_UTF8 : 0 ),
9140                      ((PL_tokenbuf[0] == '$') ? SVt_PV
9141                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9142                       : SVt_PVHV));
9143     return BAREWORD;
9144 }
9145
9146 STATIC void
9147 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9148 {
9149     PERL_ARGS_ASSERT_CHECKCOMMA;
9150
9151     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
9152         if (ckWARN(WARN_SYNTAX)) {
9153             int level = 1;
9154             const char *w;
9155             for (w = s+2; *w && level; w++) {
9156                 if (*w == '(')
9157                     ++level;
9158                 else if (*w == ')')
9159                     --level;
9160             }
9161             while (isSPACE(*w))
9162                 ++w;
9163             /* the list of chars below is for end of statements or
9164              * block / parens, boolean operators (&&, ||, //) and branch
9165              * constructs (or, and, if, until, unless, while, err, for).
9166              * Not a very solid hack... */
9167             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9168                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9169                             "%s (...) interpreted as function",name);
9170         }
9171     }
9172     while (s < PL_bufend && isSPACE(*s))
9173         s++;
9174     if (*s == '(')
9175         s++;
9176     while (s < PL_bufend && isSPACE(*s))
9177         s++;
9178     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9179         const char * const w = s;
9180         s += UTF ? UTF8SKIP(s) : 1;
9181         while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9182             s += UTF ? UTF8SKIP(s) : 1;
9183         while (s < PL_bufend && isSPACE(*s))
9184             s++;
9185         if (*s == ',') {
9186             GV* gv;
9187             if (keyword(w, s - w, 0))
9188                 return;
9189
9190             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9191             if (gv && GvCVu(gv))
9192                 return;
9193             if (s - w <= 254) {
9194                 PADOFFSET off;
9195                 char tmpbuf[256];
9196                 Copy(w, tmpbuf+1, s - w, char);
9197                 *tmpbuf = '&';
9198                 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9199                 if (off != NOT_IN_PAD) return;
9200             }
9201             Perl_croak(aTHX_ "No comma allowed after %s", what);
9202         }
9203     }
9204 }
9205
9206 /* S_new_constant(): do any overload::constant lookup.
9207
9208    Either returns sv, or mortalizes/frees sv and returns a new SV*.
9209    Best used as sv=new_constant(..., sv, ...).
9210    If s, pv are NULL, calls subroutine with one argument,
9211    and <type> is used with error messages only.
9212    <type> is assumed to be well formed UTF-8.
9213
9214    If error_msg is not NULL, *error_msg will be set to any error encountered.
9215    Otherwise yyerror() will be used to output it */
9216
9217 STATIC SV *
9218 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9219                SV *sv, SV *pv, const char *type, STRLEN typelen,
9220                const char ** error_msg)
9221 {
9222     dSP;
9223     HV * table = GvHV(PL_hintgv);                /* ^H */
9224     SV *res;
9225     SV *errsv = NULL;
9226     SV **cvp;
9227     SV *cv, *typesv;
9228     const char *why1 = "", *why2 = "", *why3 = "";
9229
9230     PERL_ARGS_ASSERT_NEW_CONSTANT;
9231     /* We assume that this is true: */
9232     if (*key == 'c') { assert (strEQ(key, "charnames")); }
9233     assert(type || s);
9234
9235     sv_2mortal(sv);                     /* Parent created it permanently */
9236     if (!table
9237         || ! (PL_hints & HINT_LOCALIZE_HH)
9238         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9239         || ! SvOK(*cvp))
9240     {
9241         char *msg;
9242
9243         /* Here haven't found what we're looking for.  If it is charnames,
9244          * perhaps it needs to be loaded.  Try doing that before giving up */
9245         if (*key == 'c') {
9246             Perl_load_module(aTHX_
9247                             0,
9248                             newSVpvs("_charnames"),
9249                              /* version parameter; no need to specify it, as if
9250                               * we get too early a version, will fail anyway,
9251                               * not being able to find '_charnames' */
9252                             NULL,
9253                             newSVpvs(":full"),
9254                             newSVpvs(":short"),
9255                             NULL);
9256             assert(sp == PL_stack_sp);
9257             table = GvHV(PL_hintgv);
9258             if (table
9259                 && (PL_hints & HINT_LOCALIZE_HH)
9260                 && (cvp = hv_fetch(table, key, keylen, FALSE))
9261                 && SvOK(*cvp))
9262             {
9263                 goto now_ok;
9264             }
9265         }
9266         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9267             msg = Perl_form(aTHX_
9268                                "Constant(%.*s) unknown",
9269                                 (int)(type ? typelen : len),
9270                                 (type ? type: s));
9271         }
9272         else {
9273             why1 = "$^H{";
9274             why2 = key;
9275             why3 = "} is not defined";
9276         report:
9277             if (*key == 'c') {
9278                 msg = Perl_form(aTHX_
9279                             /* The +3 is for '\N{'; -4 for that, plus '}' */
9280                             "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9281                       );
9282             }
9283             else {
9284                 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9285                                     (int)(type ? typelen : len),
9286                                     (type ? type: s), why1, why2, why3);
9287             }
9288         }
9289         if (error_msg) {
9290             *error_msg = msg;
9291         }
9292         else {
9293             yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9294         }
9295         return SvREFCNT_inc_simple_NN(sv);
9296     }
9297   now_ok:
9298     cv = *cvp;
9299     if (!pv && s)
9300         pv = newSVpvn_flags(s, len, SVs_TEMP);
9301     if (type && pv)
9302         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9303     else
9304         typesv = &PL_sv_undef;
9305
9306     PUSHSTACKi(PERLSI_OVERLOAD);
9307     ENTER ;
9308     SAVETMPS;
9309
9310     PUSHMARK(SP) ;
9311     EXTEND(sp, 3);
9312     if (pv)
9313         PUSHs(pv);
9314     PUSHs(sv);
9315     if (pv)
9316         PUSHs(typesv);
9317     PUTBACK;
9318     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9319
9320     SPAGAIN ;
9321
9322     /* Check the eval first */
9323     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9324         STRLEN errlen;
9325         const char * errstr;
9326         sv_catpvs(errsv, "Propagated");
9327         errstr = SvPV_const(errsv, errlen);
9328         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9329         (void)POPs;
9330         res = SvREFCNT_inc_simple_NN(sv);
9331     }
9332     else {
9333         res = POPs;
9334         SvREFCNT_inc_simple_void_NN(res);
9335     }
9336
9337     PUTBACK ;
9338     FREETMPS ;
9339     LEAVE ;
9340     POPSTACK;
9341
9342     if (!SvOK(res)) {
9343         why1 = "Call to &{$^H{";
9344         why2 = key;
9345         why3 = "}} did not return a defined value";
9346         sv = res;
9347         (void)sv_2mortal(sv);
9348         goto report;
9349     }
9350
9351     return res;
9352 }
9353
9354 PERL_STATIC_INLINE void
9355 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9356                     bool is_utf8, bool check_dollar, bool tick_warn)
9357 {
9358     int saw_tick = 0;
9359     const char *olds = *s;
9360     PERL_ARGS_ASSERT_PARSE_IDENT;
9361
9362     while (*s < PL_bufend) {
9363         if (*d >= e)
9364             Perl_croak(aTHX_ "%s", ident_too_long);
9365         if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9366              /* The UTF-8 case must come first, otherwise things
9367              * like c\N{COMBINING TILDE} would start failing, as the
9368              * isWORDCHAR_A case below would gobble the 'c' up.
9369              */
9370
9371             char *t = *s + UTF8SKIP(*s);
9372             while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
9373                 t += UTF8SKIP(t);
9374             }
9375             if (*d + (t - *s) > e)
9376                 Perl_croak(aTHX_ "%s", ident_too_long);
9377             Copy(*s, *d, t - *s, char);
9378             *d += t - *s;
9379             *s = t;
9380         }
9381         else if ( isWORDCHAR_A(**s) ) {
9382             do {
9383                 *(*d)++ = *(*s)++;
9384             } while (isWORDCHAR_A(**s) && *d < e);
9385         }
9386         else if (   allow_package
9387                  && **s == '\''
9388                  && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
9389         {
9390             *(*d)++ = ':';
9391             *(*d)++ = ':';
9392             (*s)++;
9393             saw_tick++;
9394         }
9395         else if (allow_package && **s == ':' && (*s)[1] == ':'
9396            /* Disallow things like Foo::$bar. For the curious, this is
9397             * the code path that triggers the "Bad name after" warning
9398             * when looking for barewords.
9399             */
9400            && !(check_dollar && (*s)[2] == '$')) {
9401             *(*d)++ = *(*s)++;
9402             *(*d)++ = *(*s)++;
9403         }
9404         else
9405             break;
9406     }
9407     if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
9408               && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
9409         char *d;
9410         char *d2;
9411         Newx(d, *s - olds + saw_tick + 2, char); /* +2 for $# */
9412         d2 = d;
9413         SAVEFREEPV(d);
9414         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9415                          "Old package separator used in string");
9416         if (olds[-1] == '#')
9417             *d2++ = olds[-2];
9418         *d2++ = olds[-1];
9419         while (olds < *s) {
9420             if (*olds == '\'') {
9421                 *d2++ = '\\';
9422                 *d2++ = *olds++;
9423             }
9424             else
9425                 *d2++ = *olds++;
9426         }
9427         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9428                          "\t(Did you mean \"%" UTF8f "\" instead?)\n",
9429                           UTF8fARG(is_utf8, d2-d, d));
9430     }
9431     return;
9432 }
9433
9434 /* Returns a NUL terminated string, with the length of the string written to
9435    *slp
9436    */
9437 STATIC char *
9438 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9439 {
9440     char *d = dest;
9441     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
9442     bool is_utf8 = cBOOL(UTF);
9443
9444     PERL_ARGS_ASSERT_SCAN_WORD;
9445
9446     parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
9447     *d = '\0';
9448     *slp = d - dest;
9449     return s;
9450 }
9451
9452 /* Is the byte 'd' a legal single character identifier name?  'u' is true
9453  * iff Unicode semantics are to be used.  The legal ones are any of:
9454  *  a) all ASCII characters except:
9455  *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
9456  *          2) '{'
9457  *     The final case currently doesn't get this far in the program, so we
9458  *     don't test for it.  If that were to change, it would be ok to allow it.
9459  *  b) When not under Unicode rules, any upper Latin1 character
9460  *  c) Otherwise, when unicode rules are used, all XIDS characters.
9461  *
9462  *      Because all ASCII characters have the same representation whether
9463  *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
9464  *      '{' without knowing if is UTF-8 or not. */
9465 #define VALID_LEN_ONE_IDENT(s, e, is_utf8)                                  \
9466     (isGRAPH_A(*(s)) || ((is_utf8)                                          \
9467                          ? isIDFIRST_utf8_safe(s, e)                        \
9468                          : (isGRAPH_L1(*s)                                  \
9469                             && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
9470
9471 STATIC char *
9472 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9473 {
9474     I32 herelines = PL_parser->herelines;
9475     SSize_t bracket = -1;
9476     char funny = *s++;
9477     char *d = dest;
9478     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
9479     bool is_utf8 = cBOOL(UTF);
9480     I32 orig_copline = 0, tmp_copline = 0;
9481
9482     PERL_ARGS_ASSERT_SCAN_IDENT;
9483
9484     if (isSPACE(*s) || !*s)
9485         s = skipspace(s);
9486     if (isDIGIT(*s)) {
9487         while (isDIGIT(*s)) {
9488             if (d >= e)
9489                 Perl_croak(aTHX_ "%s", ident_too_long);
9490             *d++ = *s++;
9491         }
9492     }
9493     else {  /* See if it is a "normal" identifier */
9494         parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
9495     }
9496     *d = '\0';
9497     d = dest;
9498     if (*d) {
9499         /* Either a digit variable, or parse_ident() found an identifier
9500            (anything valid as a bareword), so job done and return.  */
9501         if (PL_lex_state != LEX_NORMAL)
9502             PL_lex_state = LEX_INTERPENDMAYBE;
9503         return s;
9504     }
9505
9506     /* Here, it is not a run-of-the-mill identifier name */
9507
9508     if (*s == '$' && s[1]
9509         && (   isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
9510             || isDIGIT_A((U8)s[1])
9511             || s[1] == '$'
9512             || s[1] == '{'
9513             || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
9514     {
9515         /* Dereferencing a value in a scalar variable.
9516            The alternatives are different syntaxes for a scalar variable.
9517            Using ' as a leading package separator isn't allowed. :: is.   */
9518         return s;
9519     }
9520     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
9521     if (*s == '{') {
9522         bracket = s - SvPVX(PL_linestr);
9523         s++;
9524         orig_copline = CopLINE(PL_curcop);
9525         if (s < PL_bufend && isSPACE(*s)) {
9526             s = skipspace(s);
9527         }
9528     }
9529     if ((s <= PL_bufend - (is_utf8)
9530                           ? UTF8SKIP(s)
9531                           : 1)
9532         && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
9533     {
9534         if (is_utf8) {
9535             const STRLEN skip = UTF8SKIP(s);
9536             STRLEN i;
9537             d[skip] = '\0';
9538             for ( i = 0; i < skip; i++ )
9539                 d[i] = *s++;
9540         }
9541         else {
9542             *d = *s++;
9543             d[1] = '\0';
9544         }
9545     }
9546     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9547     if (*d == '^' && *s && isCONTROLVAR(*s)) {
9548         *d = toCTRL(*s);
9549         s++;
9550     }
9551     /* Warn about ambiguous code after unary operators if {...} notation isn't
9552        used.  There's no difference in ambiguity; it's merely a heuristic
9553        about when not to warn.  */
9554     else if (ck_uni && bracket == -1)
9555         check_uni();
9556     if (bracket != -1) {
9557         bool skip;
9558         char *s2;
9559         /* If we were processing {...} notation then...  */
9560         if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
9561             || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9562                  && isWORDCHAR(*s))
9563         ) {
9564             /* note we have to check for a normal identifier first,
9565              * as it handles utf8 symbols, and only after that has
9566              * been ruled out can we look at the caret words */
9567             if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
9568                 /* if it starts as a valid identifier, assume that it is one.
9569                    (the later check for } being at the expected point will trap
9570                    cases where this doesn't pan out.)  */
9571                 d += is_utf8 ? UTF8SKIP(d) : 1;
9572                 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
9573                 *d = '\0';
9574             }
9575             else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
9576                 d++;
9577                 while (isWORDCHAR(*s) && d < e) {
9578                     *d++ = *s++;
9579                 }
9580                 if (d >= e)
9581                     Perl_croak(aTHX_ "%s", ident_too_long);
9582                 *d = '\0';
9583             }
9584             tmp_copline = CopLINE(PL_curcop);
9585             if (s < PL_bufend && isSPACE(*s)) {
9586                 s = skipspace(s);
9587             }
9588             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9589                 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation.  */
9590                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9591                     const char * const brack =
9592                         (const char *)
9593                         ((*s == '[') ? "[...]" : "{...}");
9594                     orig_copline = CopLINE(PL_curcop);
9595                     CopLINE_set(PL_curcop, tmp_copline);
9596    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9597                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9598                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9599                         funny, dest, brack, funny, dest, brack);
9600                     CopLINE_set(PL_curcop, orig_copline);
9601                 }
9602                 bracket++;
9603                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9604                 PL_lex_allbrackets++;
9605                 return s;
9606             }
9607         }
9608
9609         if ( !tmp_copline )
9610             tmp_copline = CopLINE(PL_curcop);
9611         if ((skip = s < PL_bufend && isSPACE(*s))) {
9612             /* Avoid incrementing line numbers or resetting PL_linestart,
9613                in case we have to back up.  */
9614             STRLEN s_off = s - SvPVX(PL_linestr);
9615             s2 = peekspace(s);
9616             s = SvPVX(PL_linestr) + s_off;
9617         }
9618         else
9619             s2 = s;
9620
9621         /* Expect to find a closing } after consuming any trailing whitespace.
9622          */
9623         if (*s2 == '}') {
9624             /* Now increment line numbers if applicable.  */
9625             if (skip)
9626                 s = skipspace(s);
9627             s++;
9628             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9629                 PL_lex_state = LEX_INTERPEND;
9630                 PL_expect = XREF;
9631             }
9632             if (PL_lex_state == LEX_NORMAL) {
9633                 if (ckWARN(WARN_AMBIGUOUS)
9634                     && (keyword(dest, d - dest, 0)
9635                         || get_cvn_flags(dest, d - dest, is_utf8
9636                            ? SVf_UTF8
9637                            : 0)))
9638                 {
9639                     SV *tmp = newSVpvn_flags( dest, d - dest,
9640                                         SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9641                     if (funny == '#')
9642                         funny = '@';
9643                     orig_copline = CopLINE(PL_curcop);
9644                     CopLINE_set(PL_curcop, tmp_copline);
9645                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9646                         "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
9647                         funny, SVfARG(tmp), funny, SVfARG(tmp));
9648                     CopLINE_set(PL_curcop, orig_copline);
9649                 }
9650             }
9651         }
9652         else {
9653             /* Didn't find the closing } at the point we expected, so restore
9654                state such that the next thing to process is the opening { and */
9655             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9656             CopLINE_set(PL_curcop, orig_copline);
9657             PL_parser->herelines = herelines;
9658             *dest = '\0';
9659             PL_parser->sub_no_recover = TRUE;
9660         }
9661     }
9662     else if (   PL_lex_state == LEX_INTERPNORMAL
9663              && !PL_lex_brackets
9664              && !intuit_more(s, PL_bufend))
9665         PL_lex_state = LEX_INTERPEND;
9666     return s;
9667 }
9668
9669 static bool
9670 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
9671
9672     /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
9673      * found in the parse starting at 's', based on the subset that are valid
9674      * in this context input to this routine in 'valid_flags'. Advances s.
9675      * Returns TRUE if the input should be treated as a valid flag, so the next
9676      * char may be as well; otherwise FALSE. 'charset' should point to a NUL
9677      * upon first call on the current regex.  This routine will set it to any
9678      * charset modifier found.  The caller shouldn't change it.  This way,
9679      * another charset modifier encountered in the parse can be detected as an
9680      * error, as we have decided to allow only one */
9681
9682     const char c = **s;
9683     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9684
9685     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9686         if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
9687             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9688                        UTF ? SVf_UTF8 : 0);
9689             (*s) += charlen;
9690             /* Pretend that it worked, so will continue processing before
9691              * dieing */
9692             return TRUE;
9693         }
9694         return FALSE;
9695     }
9696
9697     switch (c) {
9698
9699         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
9700         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9701         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9702         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9703         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9704         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9705         case LOCALE_PAT_MOD:
9706             if (*charset) {
9707                 goto multiple_charsets;
9708             }
9709             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9710             *charset = c;
9711             break;
9712         case UNICODE_PAT_MOD:
9713             if (*charset) {
9714                 goto multiple_charsets;
9715             }
9716             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9717             *charset = c;
9718             break;
9719         case ASCII_RESTRICT_PAT_MOD:
9720             if (! *charset) {
9721                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9722             }
9723             else {
9724
9725                 /* Error if previous modifier wasn't an 'a', but if it was, see
9726                  * if, and accept, a second occurrence (only) */
9727                 if (*charset != 'a'
9728                     || get_regex_charset(*pmfl)
9729                         != REGEX_ASCII_RESTRICTED_CHARSET)
9730                 {
9731                         goto multiple_charsets;
9732                 }
9733                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9734             }
9735             *charset = c;
9736             break;
9737         case DEPENDS_PAT_MOD:
9738             if (*charset) {
9739                 goto multiple_charsets;
9740             }
9741             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9742             *charset = c;
9743             break;
9744     }
9745
9746     (*s)++;
9747     return TRUE;
9748
9749     multiple_charsets:
9750         if (*charset != c) {
9751             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9752         }
9753         else if (c == 'a') {
9754   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9755             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9756         }
9757         else {
9758             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9759         }
9760
9761         /* Pretend that it worked, so will continue processing before dieing */
9762         (*s)++;
9763         return TRUE;
9764 }
9765
9766 STATIC char *
9767 S_scan_pat(pTHX_ char *start, I32 type)
9768 {
9769     PMOP *pm;
9770     char *s;
9771     const char * const valid_flags =
9772         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9773     char charset = '\0';    /* character set modifier */
9774     unsigned int x_mod_count = 0;
9775
9776     PERL_ARGS_ASSERT_SCAN_PAT;
9777
9778     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
9779     if (!s)
9780         Perl_croak(aTHX_ "Search pattern not terminated");
9781
9782     pm = (PMOP*)newPMOP(type, 0);
9783     if (PL_multi_open == '?') {
9784         /* This is the only point in the code that sets PMf_ONCE:  */
9785         pm->op_pmflags |= PMf_ONCE;
9786
9787         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9788            allows us to restrict the list needed by reset to just the ??
9789            matches.  */
9790         assert(type != OP_TRANS);
9791         if (PL_curstash) {
9792             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9793             U32 elements;
9794             if (!mg) {
9795                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9796                                  0);
9797             }
9798             elements = mg->mg_len / sizeof(PMOP**);
9799             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9800             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9801             mg->mg_len = elements * sizeof(PMOP**);
9802             PmopSTASH_set(pm,PL_curstash);
9803         }
9804     }
9805
9806     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9807      * anon CV. False positives like qr/[(?{]/ are harmless */
9808
9809     if (type == OP_QR) {
9810         STRLEN len;
9811         char *e, *p = SvPV(PL_lex_stuff, len);
9812         e = p + len;
9813         for (; p < e; p++) {
9814             if (p[0] == '(' && p[1] == '?'
9815                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9816             {
9817                 pm->op_pmflags |= PMf_HAS_CV;
9818                 break;
9819             }
9820         }
9821         pm->op_pmflags |= PMf_IS_QR;
9822     }
9823
9824     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
9825                                 &s, &charset, &x_mod_count))
9826     {};
9827     /* issue a warning if /c is specified,but /g is not */
9828     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9829     {
9830         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9831                        "Use of /c modifier is meaningless without /g" );
9832     }
9833
9834     PL_lex_op = (OP*)pm;
9835     pl_yylval.ival = OP_MATCH;
9836     return s;
9837 }
9838
9839 STATIC char *
9840 S_scan_subst(pTHX_ char *start)
9841 {
9842     char *s;
9843     PMOP *pm;
9844     I32 first_start;
9845     line_t first_line;
9846     line_t linediff = 0;
9847     I32 es = 0;
9848     char charset = '\0';    /* character set modifier */
9849     unsigned int x_mod_count = 0;
9850     char *t;
9851
9852     PERL_ARGS_ASSERT_SCAN_SUBST;
9853
9854     pl_yylval.ival = OP_NULL;
9855
9856     s = scan_str(start, TRUE, FALSE, FALSE, &t);
9857
9858     if (!s)
9859         Perl_croak(aTHX_ "Substitution pattern not terminated");
9860
9861     s = t;
9862
9863     first_start = PL_multi_start;
9864     first_line = CopLINE(PL_curcop);
9865     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9866     if (!s) {
9867         SvREFCNT_dec_NN(PL_lex_stuff);
9868         PL_lex_stuff = NULL;
9869         Perl_croak(aTHX_ "Substitution replacement not terminated");
9870     }
9871     PL_multi_start = first_start;       /* so whole substitution is taken together */
9872
9873     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9874
9875
9876     while (*s) {
9877         if (*s == EXEC_PAT_MOD) {
9878             s++;
9879             es++;
9880         }
9881         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
9882                                   &s, &charset, &x_mod_count))
9883         {
9884             break;
9885         }
9886     }
9887
9888     if ((pm->op_pmflags & PMf_CONTINUE)) {
9889         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9890     }
9891
9892     if (es) {
9893         SV * const repl = newSVpvs("");
9894
9895         PL_multi_end = 0;
9896         pm->op_pmflags |= PMf_EVAL;
9897         for (; es > 1; es--) {
9898             sv_catpvs(repl, "eval ");
9899         }
9900         sv_catpvs(repl, "do {");
9901         sv_catsv(repl, PL_parser->lex_sub_repl);
9902         sv_catpvs(repl, "}");
9903         SvREFCNT_dec(PL_parser->lex_sub_repl);
9904         PL_parser->lex_sub_repl = repl;
9905     }
9906
9907
9908     linediff = CopLINE(PL_curcop) - first_line;
9909     if (linediff)
9910         CopLINE_set(PL_curcop, first_line);
9911
9912     if (linediff || es) {
9913         /* the IVX field indicates that the replacement string is a s///e;
9914          * the NVX field indicates how many src code lines the replacement
9915          * spreads over */
9916         sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
9917         ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
9918         ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
9919                                                                     cBOOL(es);
9920     }
9921
9922     PL_lex_op = (OP*)pm;
9923     pl_yylval.ival = OP_SUBST;
9924     return s;
9925 }
9926
9927 STATIC char *
9928 S_scan_trans(pTHX_ char *start)
9929 {
9930     char* s;
9931     OP *o;
9932     U8 squash;
9933     U8 del;
9934     U8 complement;
9935     bool nondestruct = 0;
9936     char *t;
9937
9938     PERL_ARGS_ASSERT_SCAN_TRANS;
9939
9940     pl_yylval.ival = OP_NULL;
9941
9942     s = scan_str(start,FALSE,FALSE,FALSE,&t);
9943     if (!s)
9944         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9945
9946     s = t;
9947
9948     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9949     if (!s) {
9950         SvREFCNT_dec_NN(PL_lex_stuff);
9951         PL_lex_stuff = NULL;
9952         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9953     }
9954
9955     complement = del = squash = 0;
9956     while (1) {
9957         switch (*s) {
9958         case 'c':
9959             complement = OPpTRANS_COMPLEMENT;
9960             break;
9961         case 'd':
9962             del = OPpTRANS_DELETE;
9963             break;
9964         case 's':
9965             squash = OPpTRANS_SQUASH;
9966             break;
9967         case 'r':
9968             nondestruct = 1;
9969             break;
9970         default:
9971             goto no_more;
9972         }
9973         s++;
9974     }
9975   no_more:
9976
9977     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9978     o->op_private &= ~OPpTRANS_ALL;
9979     o->op_private |= del|squash|complement|
9980       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9981       (DO_UTF8(PL_parser->lex_sub_repl) ? OPpTRANS_TO_UTF   : 0);
9982
9983     PL_lex_op = o;
9984     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9985
9986
9987     return s;
9988 }
9989
9990 /* scan_heredoc
9991    Takes a pointer to the first < in <<FOO.
9992    Returns a pointer to the byte following <<FOO.
9993
9994    This function scans a heredoc, which involves different methods
9995    depending on whether we are in a string eval, quoted construct, etc.
9996    This is because PL_linestr could containing a single line of input, or
9997    a whole string being evalled, or the contents of the current quote-
9998    like operator.
9999
10000    The two basic methods are:
10001     - Steal lines from the input stream
10002     - Scan the heredoc in PL_linestr and remove it therefrom
10003
10004    In a file scope or filtered eval, the first method is used; in a
10005    string eval, the second.
10006
10007    In a quote-like operator, we have to choose between the two,
10008    depending on where we can find a newline.  We peek into outer lex-
10009    ing scopes until we find one with a newline in it.  If we reach the
10010    outermost lexing scope and it is a file, we use the stream method.
10011    Otherwise it is treated as an eval.
10012 */
10013
10014 STATIC char *
10015 S_scan_heredoc(pTHX_ char *s)
10016 {
10017     I32 op_type = OP_SCALAR;
10018     I32 len;
10019     SV *tmpstr;
10020     char term;
10021     char *d;
10022     char *e;
10023     char *peek;
10024     char *indent = 0;
10025     I32 indent_len = 0;
10026     bool indented = FALSE;
10027     const bool infile = PL_rsfp || PL_parser->filtered;
10028     const line_t origline = CopLINE(PL_curcop);
10029     LEXSHARED *shared = PL_parser->lex_shared;
10030
10031     PERL_ARGS_ASSERT_SCAN_HEREDOC;
10032
10033     s += 2;
10034     d = PL_tokenbuf + 1;
10035     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10036     *PL_tokenbuf = '\n';
10037     peek = s;
10038
10039     if (*peek == '~') {
10040         indented = TRUE;
10041         peek++; s++;
10042     }
10043
10044     while (SPACE_OR_TAB(*peek))
10045         peek++;
10046
10047     if (*peek == '`' || *peek == '\'' || *peek =='"') {
10048         s = peek;
10049         term = *s++;
10050         s = delimcpy(d, e, s, PL_bufend, term, &len);
10051         if (s == PL_bufend)
10052             Perl_croak(aTHX_ "Unterminated delimiter for here document");
10053         d += len;
10054         s++;
10055     }
10056     else {
10057         if (*s == '\\')
10058             /* <<\FOO is equivalent to <<'FOO' */
10059             s++, term = '\'';
10060         else
10061             term = '"';
10062
10063         if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10064             Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
10065
10066         peek = s;
10067
10068         while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
10069             peek += UTF ? UTF8SKIP(peek) : 1;
10070         }
10071
10072         len = (peek - s >= e - d) ? (e - d) : (peek - s);
10073         Copy(s, d, len, char);
10074         s += len;
10075         d += len;
10076     }
10077
10078     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10079         Perl_croak(aTHX_ "Delimiter for here document is too long");
10080
10081     *d++ = '\n';
10082     *d = '\0';
10083     len = d - PL_tokenbuf;
10084
10085 #ifndef PERL_STRICT_CR
10086     d = (char *) memchr(s, '\r', PL_bufend - s);
10087     if (d) {
10088         char * const olds = s;
10089         s = d;
10090         while (s < PL_bufend) {
10091             if (*s == '\r') {
10092                 *d++ = '\n';
10093                 if (*++s == '\n')
10094                     s++;
10095             }
10096             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
10097                 *d++ = *s++;
10098                 s++;
10099             }
10100             else
10101                 *d++ = *s++;
10102         }
10103         *d = '\0';
10104         PL_bufend = d;
10105         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10106         s = olds;
10107     }
10108 #endif
10109
10110     tmpstr = newSV_type(SVt_PVIV);
10111     SvGROW(tmpstr, 80);
10112     if (term == '\'') {
10113         op_type = OP_CONST;
10114         SvIV_set(tmpstr, -1);
10115     }
10116     else if (term == '`') {
10117         op_type = OP_BACKTICK;
10118         SvIV_set(tmpstr, '\\');
10119     }
10120
10121     PL_multi_start = origline + 1 + PL_parser->herelines;
10122     PL_multi_open = PL_multi_close = '<';
10123
10124     /* inside a string eval or quote-like operator */
10125     if (!infile || PL_lex_inwhat) {
10126         SV *linestr;
10127         char *bufend;
10128         char * const olds = s;
10129         PERL_CONTEXT * const cx = CX_CUR();
10130         /* These two fields are not set until an inner lexing scope is
10131            entered.  But we need them set here. */
10132         shared->ls_bufptr  = s;
10133         shared->ls_linestr = PL_linestr;
10134
10135         if (PL_lex_inwhat) {
10136             /* Look for a newline.  If the current buffer does not have one,
10137              peek into the line buffer of the parent lexing scope, going
10138              up as many levels as necessary to find one with a newline
10139              after bufptr.
10140             */
10141             while (!(s = (char *)memchr(
10142                                 (void *)shared->ls_bufptr, '\n',
10143                                 SvEND(shared->ls_linestr)-shared->ls_bufptr
10144                 )))
10145             {
10146                 shared = shared->ls_prev;
10147                 /* shared is only null if we have gone beyond the outermost
10148                    lexing scope.  In a file, we will have broken out of the
10149                    loop in the previous iteration.  In an eval, the string buf-
10150                    fer ends with "\n;", so the while condition above will have
10151                    evaluated to false.  So shared can never be null.  Or so you
10152                    might think.  Odd syntax errors like s;@{<<; can gobble up
10153                    the implicit semicolon at the end of a flie, causing the
10154                    file handle to be closed even when we are not in a string
10155                    eval.  So shared may be null in that case.
10156                    (Closing '>>}' here to balance the earlier open brace for
10157                    editors that look for matched pairs.) */
10158                 if (UNLIKELY(!shared))
10159                     goto interminable;
10160                 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10161                    most lexing scope.  In a file, shared->ls_linestr at that
10162                    level is just one line, so there is no body to steal. */
10163                 if (infile && !shared->ls_prev) {
10164                     s = olds;
10165                     goto streaming;
10166                 }
10167             }
10168         }
10169         else {  /* eval or we've already hit EOF */
10170             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10171             if (!s)
10172                 goto interminable;
10173         }
10174
10175         linestr = shared->ls_linestr;
10176         bufend = SvEND(linestr);
10177         d = s;
10178         if (indented) {
10179             char *myolds = s;
10180
10181             while (s < bufend - len + 1) {
10182                 if (*s++ == '\n')
10183                     ++PL_parser->herelines;
10184
10185                 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
10186                     char *backup = s;
10187                     indent_len = 0;
10188
10189                     /* Only valid if it's preceded by whitespace only */
10190                     while (backup != myolds && --backup >= myolds) {
10191                         if (! SPACE_OR_TAB(*backup)) {
10192                             break;
10193                         }
10194                         indent_len++;
10195                     }
10196
10197                     /* No whitespace or all! */
10198                     if (backup == s || *backup == '\n') {
10199                         Newx(indent, indent_len + 1, char);
10200                         memcpy(indent, backup + 1, indent_len);
10201                         indent[indent_len] = 0;
10202                         s--; /* before our delimiter */
10203                         PL_parser->herelines--; /* this line doesn't count */
10204                         break;
10205                     }
10206                 }
10207             }
10208         }
10209         else {
10210             while (s < bufend - len + 1
10211                    && memNE(s,PL_tokenbuf,len) )
10212             {
10213                 if (*s++ == '\n')
10214                     ++PL_parser->herelines;
10215             }
10216         }
10217
10218         if (s >= bufend - len + 1) {
10219             goto interminable;
10220         }
10221
10222         sv_setpvn(tmpstr,d+1,s-d);
10223         s += len - 1;
10224         /* the preceding stmt passes a newline */
10225         PL_parser->herelines++;
10226
10227         /* s now points to the newline after the heredoc terminator.
10228            d points to the newline before the body of the heredoc.
10229          */
10230
10231         /* We are going to modify linestr in place here, so set
10232            aside copies of the string if necessary for re-evals or
10233            (caller $n)[6]. */
10234         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10235            check shared->re_eval_str. */
10236         if (shared->re_eval_start || shared->re_eval_str) {
10237             /* Set aside the rest of the regexp */
10238             if (!shared->re_eval_str)
10239                 shared->re_eval_str =
10240                        newSVpvn(shared->re_eval_start,
10241                                 bufend - shared->re_eval_start);
10242             shared->re_eval_start -= s-d;
10243         }
10244
10245         if (cxstack_ix >= 0
10246             && CxTYPE(cx) == CXt_EVAL
10247             && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10248             && cx->blk_eval.cur_text == linestr)
10249         {
10250             cx->blk_eval.cur_text = newSVsv(linestr);
10251             cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10252         }
10253
10254         /* Copy everything from s onwards back to d. */
10255         Move(s,d,bufend-s + 1,char);
10256         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10257         /* Setting PL_bufend only applies when we have not dug deeper
10258            into other scopes, because sublex_done sets PL_bufend to
10259            SvEND(PL_linestr). */
10260         if (shared == PL_parser->lex_shared)
10261             PL_bufend = SvEND(linestr);
10262         s = olds;
10263     }
10264     else {
10265         SV *linestr_save;
10266         char *oldbufptr_save;
10267         char *oldoldbufptr_save;
10268       streaming:
10269         SvPVCLEAR(tmpstr);   /* avoid "uninitialized" warning */
10270         term = PL_tokenbuf[1];
10271         len--;
10272         linestr_save = PL_linestr; /* must restore this afterwards */
10273         d = s;                   /* and this */
10274         oldbufptr_save = PL_oldbufptr;
10275         oldoldbufptr_save = PL_oldoldbufptr;
10276         PL_linestr = newSVpvs("");
10277         PL_bufend = SvPVX(PL_linestr);
10278
10279         while (1) {
10280             PL_bufptr = PL_bufend;
10281             CopLINE_set(PL_curcop,
10282                         origline + 1 + PL_parser->herelines);
10283
10284             if (   !lex_next_chunk(LEX_NO_TERM)
10285                 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
10286             {
10287                 /* Simply freeing linestr_save might seem simpler here, as it
10288                    does not matter what PL_linestr points to, since we are
10289                    about to croak; but in a quote-like op, linestr_save
10290                    will have been prospectively freed already, via
10291                    SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
10292                    restore PL_linestr. */
10293                 SvREFCNT_dec_NN(PL_linestr);
10294                 PL_linestr = linestr_save;
10295                 PL_oldbufptr = oldbufptr_save;
10296                 PL_oldoldbufptr = oldoldbufptr_save;
10297                 goto interminable;
10298             }
10299
10300             CopLINE_set(PL_curcop, origline);
10301
10302             if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10303                 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10304                 /* ^That should be enough to avoid this needing to grow:  */
10305                 sv_catpvs(PL_linestr, "\n\0");
10306                 assert(s == SvPVX(PL_linestr));
10307                 PL_bufend = SvEND(PL_linestr);
10308             }
10309
10310             s = PL_bufptr;
10311             PL_parser->herelines++;
10312             PL_last_lop = PL_last_uni = NULL;
10313
10314 #ifndef PERL_STRICT_CR
10315             if (PL_bufend - PL_linestart >= 2) {
10316                 if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10317                     || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10318                 {
10319                     PL_bufend[-2] = '\n';
10320                     PL_bufend--;
10321                     SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10322                 }
10323                 else if (PL_bufend[-1] == '\r')
10324                     PL_bufend[-1] = '\n';
10325             }
10326             else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10327                 PL_bufend[-1] = '\n';
10328 #endif
10329
10330             if (indented && (PL_bufend-s) >= len) {
10331                 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
10332
10333                 if (found) {
10334                     char *backup = found;
10335                     indent_len = 0;
10336
10337                     /* Only valid if it's preceded by whitespace only */
10338                     while (backup != s && --backup >= s) {
10339                         if (! SPACE_OR_TAB(*backup)) {
10340                             break;
10341                         }
10342                         indent_len++;
10343                     }
10344
10345                     /* All whitespace or none! */
10346                     if (backup == found || SPACE_OR_TAB(*backup)) {
10347                         Newx(indent, indent_len + 1, char);
10348                         memcpy(indent, backup, indent_len);
10349                         indent[indent_len] = 0;
10350                         SvREFCNT_dec(PL_linestr);
10351                         PL_linestr = linestr_save;
10352                         PL_linestart = SvPVX(linestr_save);
10353                         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10354                         PL_oldbufptr = oldbufptr_save;
10355                         PL_oldoldbufptr = oldoldbufptr_save;
10356                         s = d;
10357                         break;
10358                     }
10359                 }
10360
10361                 /* Didn't find it */
10362                 sv_catsv(tmpstr,PL_linestr);
10363             }
10364             else {
10365                 if (*s == term && PL_bufend-s >= len
10366                     && memEQ(s,PL_tokenbuf + 1,len))
10367                 {
10368                     SvREFCNT_dec(PL_linestr);
10369                     PL_linestr = linestr_save;
10370                     PL_linestart = SvPVX(linestr_save);
10371                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10372                     PL_oldbufptr = oldbufptr_save;
10373                     PL_oldoldbufptr = oldoldbufptr_save;
10374                     s = d;
10375                     break;
10376                 }
10377                 else {
10378                     sv_catsv(tmpstr,PL_linestr);
10379                 }
10380             }
10381         } /* while (1) */
10382     }
10383
10384     PL_multi_end = origline + PL_parser->herelines;
10385
10386     if (indented && indent) {
10387         STRLEN linecount = 1;
10388         STRLEN herelen = SvCUR(tmpstr);
10389         char *ss = SvPVX(tmpstr);
10390         char *se = ss + herelen;
10391         SV *newstr = newSV(herelen+1);
10392         SvPOK_on(newstr);
10393
10394         /* Trim leading whitespace */
10395         while (ss < se) {
10396             /* newline only? Copy and move on */
10397             if (*ss == '\n') {
10398                 sv_catpvs(newstr,"\n");
10399                 ss++;
10400                 linecount++;
10401
10402             /* Found our indentation? Strip it */
10403             }
10404             else if (se - ss >= indent_len
10405                        && memEQ(ss, indent, indent_len))
10406             {
10407                 STRLEN le = 0;
10408                 ss += indent_len;
10409
10410                 while ((ss + le) < se && *(ss + le) != '\n')
10411                     le++;
10412
10413                 sv_catpvn(newstr, ss, le);
10414                 ss += le;
10415
10416             /* Line doesn't begin with our indentation? Croak */
10417             }
10418             else {
10419                 Safefree(indent);
10420                 Perl_croak(aTHX_
10421                     "Indentation on line %d of here-doc doesn't match delimiter",
10422                     (int)linecount
10423                 );
10424             }
10425         } /* while */
10426
10427         /* avoid sv_setsv() as we dont wan't to COW here */
10428         sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
10429         Safefree(indent);
10430         SvREFCNT_dec_NN(newstr);
10431     }
10432
10433     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10434         SvPV_shrink_to_cur(tmpstr);
10435     }
10436
10437     if (!IN_BYTES) {
10438         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10439             SvUTF8_on(tmpstr);
10440     }
10441
10442     PL_lex_stuff = tmpstr;
10443     pl_yylval.ival = op_type;
10444     return s;
10445
10446   interminable:
10447     if (indent)
10448         Safefree(indent);
10449     SvREFCNT_dec(tmpstr);
10450     CopLINE_set(PL_curcop, origline);
10451     missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
10452 }
10453
10454
10455 /* scan_inputsymbol
10456    takes: position of first '<' in input buffer
10457    returns: position of first char following the matching '>' in
10458             input buffer
10459    side-effects: pl_yylval and lex_op are set.
10460
10461    This code handles:
10462
10463    <>           read from ARGV
10464    <<>>         read from ARGV without magic open
10465    <FH>         read from filehandle
10466    <pkg::FH>    read from package qualified filehandle
10467    <pkg'FH>     read from package qualified filehandle
10468    <$fh>        read from filehandle in $fh
10469    <*.h>        filename glob
10470
10471 */
10472
10473 STATIC char *
10474 S_scan_inputsymbol(pTHX_ char *start)
10475 {
10476     char *s = start;            /* current position in buffer */
10477     char *end;
10478     I32 len;
10479     bool nomagicopen = FALSE;
10480     char *d = PL_tokenbuf;                                      /* start of temp holding space */
10481     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
10482
10483     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10484
10485     end = (char *) memchr(s, '\n', PL_bufend - s);
10486     if (!end)
10487         end = PL_bufend;
10488     if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
10489         nomagicopen = TRUE;
10490         *d = '\0';
10491         len = 0;
10492         s += 3;
10493     }
10494     else
10495         s = delimcpy(d, e, s + 1, end, '>', &len);      /* extract until > */
10496
10497     /* die if we didn't have space for the contents of the <>,
10498        or if it didn't end, or if we see a newline
10499     */
10500
10501     if (len >= (I32)sizeof PL_tokenbuf)
10502         Perl_croak(aTHX_ "Excessively long <> operator");
10503     if (s >= end)
10504         Perl_croak(aTHX_ "Unterminated <> operator");
10505
10506     s++;
10507
10508     /* check for <$fh>
10509        Remember, only scalar variables are interpreted as filehandles by
10510        this code.  Anything more complex (e.g., <$fh{$num}>) will be
10511        treated as a glob() call.
10512        This code makes use of the fact that except for the $ at the front,
10513        a scalar variable and a filehandle look the same.
10514     */
10515     if (*d == '$' && d[1]) d++;
10516
10517     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10518     while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
10519         d += UTF ? UTF8SKIP(d) : 1;
10520     }
10521
10522     /* If we've tried to read what we allow filehandles to look like, and
10523        there's still text left, then it must be a glob() and not a getline.
10524        Use scan_str to pull out the stuff between the <> and treat it
10525        as nothing more than a string.
10526     */
10527
10528     if (d - PL_tokenbuf != len) {
10529         pl_yylval.ival = OP_GLOB;
10530         s = scan_str(start,FALSE,FALSE,FALSE,NULL);
10531         if (!s)
10532            Perl_croak(aTHX_ "Glob not terminated");
10533         return s;
10534     }
10535     else {
10536         bool readline_overriden = FALSE;
10537         GV *gv_readline;
10538         /* we're in a filehandle read situation */
10539         d = PL_tokenbuf;
10540
10541         /* turn <> into <ARGV> */
10542         if (!len)
10543             Copy("ARGV",d,5,char);
10544
10545         /* Check whether readline() is overriden */
10546         if ((gv_readline = gv_override("readline",8)))
10547             readline_overriden = TRUE;
10548
10549         /* if <$fh>, create the ops to turn the variable into a
10550            filehandle
10551         */
10552         if (*d == '$') {
10553             /* try to find it in the pad for this block, otherwise find
10554                add symbol table ops
10555             */
10556             const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
10557             if (tmp != NOT_IN_PAD) {
10558                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10559                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10560                     HEK * const stashname = HvNAME_HEK(stash);
10561                     SV * const sym = sv_2mortal(newSVhek(stashname));
10562                     sv_catpvs(sym, "::");
10563                     sv_catpv(sym, d+1);
10564                     d = SvPVX(sym);
10565                     goto intro_sym;
10566                 }
10567                 else {
10568                     OP * const o = newOP(OP_PADSV, 0);
10569                     o->op_targ = tmp;
10570                     PL_lex_op = readline_overriden
10571                         ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10572                                 op_append_elem(OP_LIST, o,
10573                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10574                         : newUNOP(OP_READLINE, 0, o);
10575                 }
10576             }
10577             else {
10578                 GV *gv;
10579                 ++d;
10580               intro_sym:
10581                 gv = gv_fetchpv(d,
10582                                 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
10583                                 SVt_PV);
10584                 PL_lex_op = readline_overriden
10585                     ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10586                             op_append_elem(OP_LIST,
10587                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10588                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10589                     : newUNOP(OP_READLINE, 0,
10590                             newUNOP(OP_RV2SV, 0,
10591                                 newGVOP(OP_GV, 0, gv)));
10592             }
10593             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10594             pl_yylval.ival = OP_NULL;
10595         }
10596
10597         /* If it's none of the above, it must be a literal filehandle
10598            (<Foo::BAR> or <FOO>) so build a simple readline OP */
10599         else {
10600             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10601             PL_lex_op = readline_overriden
10602                 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10603                         op_append_elem(OP_LIST,
10604                             newGVOP(OP_GV, 0, gv),
10605                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10606                 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
10607             pl_yylval.ival = OP_NULL;
10608         }
10609     }
10610
10611     return s;
10612 }
10613
10614
10615 /* scan_str
10616    takes:
10617         start                   position in buffer
10618         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
10619                                 only if they are of the open/close form
10620         keep_delims             preserve the delimiters around the string
10621         re_reparse              compiling a run-time /(?{})/:
10622                                    collapse // to /,  and skip encoding src
10623         delimp                  if non-null, this is set to the position of
10624                                 the closing delimiter, or just after it if
10625                                 the closing and opening delimiters differ
10626                                 (i.e., the opening delimiter of a substitu-
10627                                 tion replacement)
10628    returns: position to continue reading from buffer
10629    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10630         updates the read buffer.
10631
10632    This subroutine pulls a string out of the input.  It is called for:
10633         q               single quotes           q(literal text)
10634         '               single quotes           'literal text'
10635         qq              double quotes           qq(interpolate $here please)
10636         "               double quotes           "interpolate $here please"
10637         qx              backticks               qx(/bin/ls -l)
10638         `               backticks               `/bin/ls -l`
10639         qw              quote words             @EXPORT_OK = qw( func() $spam )
10640         m//             regexp match            m/this/
10641         s///            regexp substitute       s/this/that/
10642         tr///           string transliterate    tr/this/that/
10643         y///            string transliterate    y/this/that/
10644         ($*@)           sub prototypes          sub foo ($)
10645         (stuff)         sub attr parameters     sub foo : attr(stuff)
10646         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
10647
10648    In most of these cases (all but <>, patterns and transliterate)
10649    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
10650    calls scan_str().  s/// makes yylex() call scan_subst() which calls
10651    scan_str().  tr/// and y/// make yylex() call scan_trans() which
10652    calls scan_str().
10653
10654    It skips whitespace before the string starts, and treats the first
10655    character as the delimiter.  If the delimiter is one of ([{< then
10656    the corresponding "close" character )]}> is used as the closing
10657    delimiter.  It allows quoting of delimiters, and if the string has
10658    balanced delimiters ([{<>}]) it allows nesting.
10659
10660    On success, the SV with the resulting string is put into lex_stuff or,
10661    if that is already non-NULL, into lex_repl. The second case occurs only
10662    when parsing the RHS of the special constructs s/// and tr/// (y///).
10663    For convenience, the terminating delimiter character is stuffed into
10664    SvIVX of the SV.
10665 */
10666
10667 STATIC char *
10668 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
10669                  char **delimp
10670     )
10671 {
10672     SV *sv;                     /* scalar value: string */
10673     const char *tmps;           /* temp string, used for delimiter matching */
10674     char *s = start;            /* current position in the buffer */
10675     char term;                  /* terminating character */
10676     char *to;                   /* current position in the sv's data */
10677     I32 brackets = 1;           /* bracket nesting level */
10678     bool d_is_utf8 = FALSE;     /* is there any utf8 content? */
10679     IV termcode;                /* terminating char. code */
10680     U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
10681     STRLEN termlen;             /* length of terminating string */
10682     line_t herelines;
10683
10684     /* The delimiters that have a mirror-image closing one */
10685     const char * opening_delims = "([{<";
10686     const char * closing_delims = ")]}>";
10687
10688     /* The only non-UTF character that isn't a stand alone grapheme is
10689      * white-space, hence can't be a delimiter. */
10690     const char * non_grapheme_msg = "Use of unassigned code point or"
10691                                     " non-standalone grapheme for a delimiter"
10692                                     " is not allowed";
10693     PERL_ARGS_ASSERT_SCAN_STR;
10694
10695     /* skip space before the delimiter */
10696     if (isSPACE(*s)) {
10697         s = skipspace(s);
10698     }
10699
10700     /* mark where we are, in case we need to report errors */
10701     CLINE;
10702
10703     /* after skipping whitespace, the next character is the terminator */
10704     term = *s;
10705     if (!UTF || UTF8_IS_INVARIANT(term)) {
10706         termcode = termstr[0] = term;
10707         termlen = 1;
10708     }
10709     else {
10710         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10711         if (UTF && UNLIKELY(! _is_grapheme((U8 *) start,
10712                                            (U8 *) s,
10713                                            (U8 *) PL_bufend,
10714                                                   termcode)))
10715         {
10716             yyerror(non_grapheme_msg);
10717         }
10718
10719         Copy(s, termstr, termlen, U8);
10720     }
10721
10722     /* mark where we are */
10723     PL_multi_start = CopLINE(PL_curcop);
10724     PL_multi_open = termcode;
10725     herelines = PL_parser->herelines;
10726
10727     /* If the delimiter has a mirror-image closing one, get it */
10728     if (term && (tmps = strchr(opening_delims, term))) {
10729         termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
10730     }
10731
10732     PL_multi_close = termcode;
10733
10734     if (PL_multi_open == PL_multi_close) {
10735         keep_bracketed_quoted = FALSE;
10736     }
10737
10738     /* create a new SV to hold the contents.  79 is the SV's initial length.
10739        What a random number. */
10740     sv = newSV_type(SVt_PVIV);
10741     SvGROW(sv, 80);
10742     SvIV_set(sv, termcode);
10743     (void)SvPOK_only(sv);               /* validate pointer */
10744
10745     /* move past delimiter and try to read a complete string */
10746     if (keep_delims)
10747         sv_catpvn(sv, s, termlen);
10748     s += termlen;
10749     for (;;) {
10750         /* extend sv if need be */
10751         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10752         /* set 'to' to the next character in the sv's string */
10753         to = SvPVX(sv)+SvCUR(sv);
10754
10755         /* if open delimiter is the close delimiter read unbridle */
10756         if (PL_multi_open == PL_multi_close) {
10757             for (; s < PL_bufend; s++,to++) {
10758                 /* embedded newlines increment the current line number */
10759                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10760                     COPLINE_INC_WITH_HERELINES;
10761                 /* handle quoted delimiters */
10762                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10763                     if (!keep_bracketed_quoted
10764                         && (s[1] == term
10765                             || (re_reparse && s[1] == '\\'))
10766                     )
10767                         s++;
10768                     else /* any other quotes are simply copied straight through */
10769                         *to++ = *s++;
10770                 }
10771                 /* terminate when run out of buffer (the for() condition), or
10772                    have found the terminator */
10773                 else if (*s == term) {  /* First byte of terminator matches */
10774                     if (termlen == 1)   /* If is the only byte, are done */
10775                         break;
10776
10777                     /* If the remainder of the terminator matches, also are
10778                      * done, after checking that is a separate grapheme */
10779                     if (   s + termlen <= PL_bufend
10780                         && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
10781                     {
10782                         if (   UTF
10783                             && UNLIKELY(! _is_grapheme((U8 *) start,
10784                                                        (U8 *) s,
10785                                                        (U8 *) PL_bufend,
10786                                                               termcode)))
10787                         {
10788                             yyerror(non_grapheme_msg);
10789                         }
10790                         break;
10791                     }
10792                 }
10793                 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
10794                     d_is_utf8 = TRUE;
10795                 }
10796
10797                 *to = *s;
10798             }
10799         }
10800
10801         /* if the terminator isn't the same as the start character (e.g.,
10802            matched brackets), we have to allow more in the quoting, and
10803            be prepared for nested brackets.
10804         */
10805         else {
10806             /* read until we run out of string, or we find the terminator */
10807             for (; s < PL_bufend; s++,to++) {
10808                 /* embedded newlines increment the line count */
10809                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10810                     COPLINE_INC_WITH_HERELINES;
10811                 /* backslashes can escape the open or closing characters */
10812                 if (*s == '\\' && s+1 < PL_bufend) {
10813                     if (!keep_bracketed_quoted
10814                        && ( ((UV)s[1] == PL_multi_open)
10815                          || ((UV)s[1] == PL_multi_close) ))
10816                     {
10817                         s++;
10818                     }
10819                     else
10820                         *to++ = *s++;
10821                 }
10822                 /* allow nested opens and closes */
10823                 else if ((UV)*s == PL_multi_close && --brackets <= 0)
10824                     break;
10825                 else if ((UV)*s == PL_multi_open)
10826                     brackets++;
10827                 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10828                     d_is_utf8 = TRUE;
10829                 *to = *s;
10830             }
10831         }
10832         /* terminate the copied string and update the sv's end-of-string */
10833         *to = '\0';
10834         SvCUR_set(sv, to - SvPVX_const(sv));
10835
10836         /*
10837          * this next chunk reads more into the buffer if we're not done yet
10838          */
10839
10840         if (s < PL_bufend)
10841             break;              /* handle case where we are done yet :-) */
10842
10843 #ifndef PERL_STRICT_CR
10844         if (to - SvPVX_const(sv) >= 2) {
10845             if (   (to[-2] == '\r' && to[-1] == '\n')
10846                 || (to[-2] == '\n' && to[-1] == '\r'))
10847             {
10848                 to[-2] = '\n';
10849                 to--;
10850                 SvCUR_set(sv, to - SvPVX_const(sv));
10851             }
10852             else if (to[-1] == '\r')
10853                 to[-1] = '\n';
10854         }
10855         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10856             to[-1] = '\n';
10857 #endif
10858
10859         /* if we're out of file, or a read fails, bail and reset the current
10860            line marker so we can report where the unterminated string began
10861         */
10862         COPLINE_INC_WITH_HERELINES;
10863         PL_bufptr = PL_bufend;
10864         if (!lex_next_chunk(0)) {
10865             sv_free(sv);
10866             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10867             return NULL;
10868         }
10869         s = start = PL_bufptr;
10870     }
10871
10872     /* at this point, we have successfully read the delimited string */
10873
10874     if (keep_delims)
10875             sv_catpvn(sv, s, termlen);
10876     s += termlen;
10877
10878     if (d_is_utf8)
10879         SvUTF8_on(sv);
10880
10881     PL_multi_end = CopLINE(PL_curcop);
10882     CopLINE_set(PL_curcop, PL_multi_start);
10883     PL_parser->herelines = herelines;
10884
10885     /* if we allocated too much space, give some back */
10886     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10887         SvLEN_set(sv, SvCUR(sv) + 1);
10888         SvPV_renew(sv, SvLEN(sv));
10889     }
10890
10891     /* decide whether this is the first or second quoted string we've read
10892        for this op
10893     */
10894
10895     if (PL_lex_stuff)
10896         PL_parser->lex_sub_repl = sv;
10897     else
10898         PL_lex_stuff = sv;
10899     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10900     return s;
10901 }
10902
10903 /*
10904   scan_num
10905   takes: pointer to position in buffer
10906   returns: pointer to new position in buffer
10907   side-effects: builds ops for the constant in pl_yylval.op
10908
10909   Read a number in any of the formats that Perl accepts:
10910
10911   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10912   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10913   0b[01](_?[01])*                                       binary integers
10914   0[0-7](_?[0-7])*                                      octal integers
10915   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
10916   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
10917
10918   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10919   thing it reads.
10920
10921   If it reads a number without a decimal point or an exponent, it will
10922   try converting the number to an integer and see if it can do so
10923   without loss of precision.
10924 */
10925
10926 char *
10927 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10928 {
10929     const char *s = start;      /* current position in buffer */
10930     char *d;                    /* destination in temp buffer */
10931     char *e;                    /* end of temp buffer */
10932     NV nv;                              /* number read, as a double */
10933     SV *sv = NULL;                      /* place to put the converted number */
10934     bool floatit;                       /* boolean: int or float? */
10935     const char *lastub = NULL;          /* position of last underbar */
10936     static const char* const number_too_long = "Number too long";
10937     bool warned_about_underscore = 0;
10938 #define WARN_ABOUT_UNDERSCORE() \
10939         do { \
10940             if (!warned_about_underscore) { \
10941                 warned_about_underscore = 1; \
10942                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
10943                                "Misplaced _ in number"); \
10944             } \
10945         } while(0)
10946     /* Hexadecimal floating point.
10947      *
10948      * In many places (where we have quads and NV is IEEE 754 double)
10949      * we can fit the mantissa bits of a NV into an unsigned quad.
10950      * (Note that UVs might not be quads even when we have quads.)
10951      * This will not work everywhere, though (either no quads, or
10952      * using long doubles), in which case we have to resort to NV,
10953      * which will probably mean horrible loss of precision due to
10954      * multiple fp operations. */
10955     bool hexfp = FALSE;
10956     int total_bits = 0;
10957     int significant_bits = 0;
10958 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
10959 #  define HEXFP_UQUAD
10960     Uquad_t hexfp_uquad = 0;
10961     int hexfp_frac_bits = 0;
10962 #else
10963 #  define HEXFP_NV
10964     NV hexfp_nv = 0.0;
10965 #endif
10966     NV hexfp_mult = 1.0;
10967     UV high_non_zero = 0; /* highest digit */
10968     int non_zero_integer_digits = 0;
10969
10970     PERL_ARGS_ASSERT_SCAN_NUM;
10971
10972     /* We use the first character to decide what type of number this is */
10973
10974     switch (*s) {
10975     default:
10976         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10977
10978     /* if it starts with a 0, it could be an octal number, a decimal in
10979        0.13 disguise, or a hexadecimal number, or a binary number. */
10980     case '0':
10981         {
10982           /* variables:
10983              u          holds the "number so far"
10984              shift      the power of 2 of the base
10985                         (hex == 4, octal == 3, binary == 1)
10986              overflowed was the number more than we can hold?
10987
10988              Shift is used when we add a digit.  It also serves as an "are
10989              we in octal/hex/binary?" indicator to disallow hex characters
10990              when in octal mode.
10991            */
10992             NV n = 0.0;
10993             UV u = 0;
10994             I32 shift;
10995             bool overflowed = FALSE;
10996             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10997             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10998             static const char* const bases[5] =
10999               { "", "binary", "", "octal", "hexadecimal" };
11000             static const char* const Bases[5] =
11001               { "", "Binary", "", "Octal", "Hexadecimal" };
11002             static const char* const maxima[5] =
11003               { "",
11004                 "0b11111111111111111111111111111111",
11005                 "",
11006                 "037777777777",
11007                 "0xffffffff" };
11008             const char *base, *Base, *max;
11009
11010             /* check for hex */
11011             if (isALPHA_FOLD_EQ(s[1], 'x')) {
11012                 shift = 4;
11013                 s += 2;
11014                 just_zero = FALSE;
11015             } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
11016                 shift = 1;
11017                 s += 2;
11018                 just_zero = FALSE;
11019             }
11020             /* check for a decimal in disguise */
11021             else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
11022                 goto decimal;
11023             /* so it must be octal */
11024             else {
11025                 shift = 3;
11026                 s++;
11027             }
11028
11029             if (*s == '_') {
11030                 WARN_ABOUT_UNDERSCORE();
11031                lastub = s++;
11032             }
11033
11034             base = bases[shift];
11035             Base = Bases[shift];
11036             max  = maxima[shift];
11037
11038             /* read the rest of the number */
11039             for (;;) {
11040                 /* x is used in the overflow test,
11041                    b is the digit we're adding on. */
11042                 UV x, b;
11043
11044                 switch (*s) {
11045
11046                 /* if we don't mention it, we're done */
11047                 default:
11048                     goto out;
11049
11050                 /* _ are ignored -- but warned about if consecutive */
11051                 case '_':
11052                     if (lastub && s == lastub + 1)
11053                         WARN_ABOUT_UNDERSCORE();
11054                     lastub = s++;
11055                     break;
11056
11057                 /* 8 and 9 are not octal */
11058                 case '8': case '9':
11059                     if (shift == 3)
11060                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11061                     /* FALLTHROUGH */
11062
11063                 /* octal digits */
11064                 case '2': case '3': case '4':
11065                 case '5': case '6': case '7':
11066                     if (shift == 1)
11067                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11068                     /* FALLTHROUGH */
11069
11070                 case '0': case '1':
11071                     b = *s++ & 15;              /* ASCII digit -> value of digit */
11072                     goto digit;
11073
11074                 /* hex digits */
11075                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11076                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11077                     /* make sure they said 0x */
11078                     if (shift != 4)
11079                         goto out;
11080                     b = (*s++ & 7) + 9;
11081
11082                     /* Prepare to put the digit we have onto the end
11083                        of the number so far.  We check for overflows.
11084                     */
11085
11086                   digit:
11087                     just_zero = FALSE;
11088                     if (!overflowed) {
11089                         assert(shift >= 0);
11090                         x = u << shift; /* make room for the digit */
11091
11092                         total_bits += shift;
11093
11094                         if ((x >> shift) != u
11095                             && !(PL_hints & HINT_NEW_BINARY)) {
11096                             overflowed = TRUE;
11097                             n = (NV) u;
11098                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11099                                              "Integer overflow in %s number",
11100                                              base);
11101                         } else
11102                             u = x | b;          /* add the digit to the end */
11103                     }
11104                     if (overflowed) {
11105                         n *= nvshift[shift];
11106                         /* If an NV has not enough bits in its
11107                          * mantissa to represent an UV this summing of
11108                          * small low-order numbers is a waste of time
11109                          * (because the NV cannot preserve the
11110                          * low-order bits anyway): we could just
11111                          * remember when did we overflow and in the
11112                          * end just multiply n by the right
11113                          * amount. */
11114                         n += (NV) b;
11115                     }
11116
11117                     if (high_non_zero == 0 && b > 0)
11118                         high_non_zero = b;
11119
11120                     if (high_non_zero)
11121                         non_zero_integer_digits++;
11122
11123                     /* this could be hexfp, but peek ahead
11124                      * to avoid matching ".." */
11125                     if (UNLIKELY(HEXFP_PEEK(s))) {
11126                         goto out;
11127                     }
11128
11129                     break;
11130                 }
11131             }
11132
11133           /* if we get here, we had success: make a scalar value from
11134              the number.
11135           */
11136           out:
11137
11138             /* final misplaced underbar check */
11139             if (s[-1] == '_')
11140                 WARN_ABOUT_UNDERSCORE();
11141
11142             if (UNLIKELY(HEXFP_PEEK(s))) {
11143                 /* Do sloppy (on the underbars) but quick detection
11144                  * (and value construction) for hexfp, the decimal
11145                  * detection will shortly be more thorough with the
11146                  * underbar checks. */
11147                 const char* h = s;
11148                 significant_bits = non_zero_integer_digits * shift;
11149 #ifdef HEXFP_UQUAD
11150                 hexfp_uquad = u;
11151 #else /* HEXFP_NV */
11152                 hexfp_nv = u;
11153 #endif
11154                 /* Ignore the leading zero bits of
11155                  * the high (first) non-zero digit. */
11156                 if (high_non_zero) {
11157                     if (high_non_zero < 0x8)
11158                         significant_bits--;
11159                     if (high_non_zero < 0x4)
11160                         significant_bits--;
11161                     if (high_non_zero < 0x2)
11162                         significant_bits--;
11163                 }
11164
11165                 if (*h == '.') {
11166 #ifdef HEXFP_NV
11167                     NV nv_mult = 1.0;
11168 #endif
11169                     bool accumulate = TRUE;
11170                     U8 b;
11171                     int lim = 1 << shift;
11172                     for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
11173                                *h == '_'); h++) {
11174                         if (isXDIGIT(*h)) {
11175                             significant_bits += shift;
11176 #ifdef HEXFP_UQUAD
11177                             if (accumulate) {
11178                                 if (significant_bits < NV_MANT_DIG) {
11179                                     /* We are in the long "run" of xdigits,
11180                                      * accumulate the full four bits. */
11181                                     assert(shift >= 0);
11182                                     hexfp_uquad <<= shift;
11183                                     hexfp_uquad |= b;
11184                                     hexfp_frac_bits += shift;
11185                                 } else if (significant_bits - shift < NV_MANT_DIG) {
11186                                     /* We are at a hexdigit either at,
11187                                      * or straddling, the edge of mantissa.
11188                                      * We will try grabbing as many as
11189                                      * possible bits. */
11190                                     int tail =
11191                                       significant_bits - NV_MANT_DIG;
11192                                     if (tail <= 0)
11193                                        tail += shift;
11194                                     assert(tail >= 0);
11195                                     hexfp_uquad <<= tail;
11196                                     assert((shift - tail) >= 0);
11197                                     hexfp_uquad |= b >> (shift - tail);
11198                                     hexfp_frac_bits += tail;
11199
11200                                     /* Ignore the trailing zero bits
11201                                      * of the last non-zero xdigit.
11202                                      *
11203                                      * The assumption here is that if
11204                                      * one has input of e.g. the xdigit
11205                                      * eight (0x8), there is only one
11206                                      * bit being input, not the full
11207                                      * four bits.  Conversely, if one
11208                                      * specifies a zero xdigit, the
11209                                      * assumption is that one really
11210                                      * wants all those bits to be zero. */
11211                                     if (b) {
11212                                         if ((b & 0x1) == 0x0) {
11213                                             significant_bits--;
11214                                             if ((b & 0x2) == 0x0) {
11215                                                 significant_bits--;
11216                                                 if ((b & 0x4) == 0x0) {
11217                                                     significant_bits--;
11218                                                 }
11219                                             }
11220                                         }
11221                                     }
11222
11223                                     accumulate = FALSE;
11224                                 }
11225                             } else {
11226                                 /* Keep skipping the xdigits, and
11227                                  * accumulating the significant bits,
11228                                  * but do not shift the uquad
11229                                  * (which would catastrophically drop
11230                                  * high-order bits) or accumulate the
11231                                  * xdigits anymore. */
11232                             }
11233 #else /* HEXFP_NV */
11234                             if (accumulate) {
11235                                 nv_mult /= nvshift[shift];
11236                                 if (nv_mult > 0.0)
11237                                     hexfp_nv += b * nv_mult;
11238                                 else
11239                                     accumulate = FALSE;
11240                             }
11241 #endif
11242                         }
11243                         if (significant_bits >= NV_MANT_DIG)
11244                             accumulate = FALSE;
11245                     }
11246                 }
11247
11248                 if ((total_bits > 0 || significant_bits > 0) &&
11249                     isALPHA_FOLD_EQ(*h, 'p')) {
11250                     bool negexp = FALSE;
11251                     h++;
11252                     if (*h == '+')
11253                         h++;
11254                     else if (*h == '-') {
11255                         negexp = TRUE;
11256                         h++;
11257                     }
11258                     if (isDIGIT(*h)) {
11259                         I32 hexfp_exp = 0;
11260                         while (isDIGIT(*h) || *h == '_') {
11261                             if (isDIGIT(*h)) {
11262                                 hexfp_exp *= 10;
11263                                 hexfp_exp += *h - '0';
11264 #ifdef NV_MIN_EXP
11265                                 if (negexp
11266                                     && -hexfp_exp < NV_MIN_EXP - 1) {
11267                                     /* NOTE: this means that the exponent
11268                                      * underflow warning happens for
11269                                      * the IEEE 754 subnormals (denormals),
11270                                      * because DBL_MIN_EXP etc are the lowest
11271                                      * possible binary (or, rather, DBL_RADIX-base)
11272                                      * exponent for normals, not subnormals.
11273                                      *
11274                                      * This may or may not be a good thing. */
11275                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11276                                                    "Hexadecimal float: exponent underflow");
11277                                     break;
11278                                 }
11279 #endif
11280 #ifdef NV_MAX_EXP
11281                                 if (!negexp
11282                                     && hexfp_exp > NV_MAX_EXP - 1) {
11283                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11284                                                    "Hexadecimal float: exponent overflow");
11285                                     break;
11286                                 }
11287 #endif
11288                             }
11289                             h++;
11290                         }
11291                         if (negexp)
11292                             hexfp_exp = -hexfp_exp;
11293 #ifdef HEXFP_UQUAD
11294                         hexfp_exp -= hexfp_frac_bits;
11295 #endif
11296                         hexfp_mult = Perl_pow(2.0, hexfp_exp);
11297                         hexfp = TRUE;
11298                         goto decimal;
11299                     }
11300                 }
11301             }
11302
11303             if (overflowed) {
11304                 if (n > 4294967295.0)
11305                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11306                                    "%s number > %s non-portable",
11307                                    Base, max);
11308                 sv = newSVnv(n);
11309             }
11310             else {
11311 #if UVSIZE > 4
11312                 if (u > 0xffffffff)
11313                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11314                                    "%s number > %s non-portable",
11315                                    Base, max);
11316 #endif
11317                 sv = newSVuv(u);
11318             }
11319             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11320                 sv = new_constant(start, s - start, "integer",
11321                                   sv, NULL, NULL, 0, NULL);
11322             else if (PL_hints & HINT_NEW_BINARY)
11323                 sv = new_constant(start, s - start, "binary",
11324                                   sv, NULL, NULL, 0, NULL);
11325         }
11326         break;
11327
11328     /*
11329       handle decimal numbers.
11330       we're also sent here when we read a 0 as the first digit
11331     */
11332     case '1': case '2': case '3': case '4': case '5':
11333     case '6': case '7': case '8': case '9': case '.':
11334       decimal:
11335         d = PL_tokenbuf;
11336         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11337         floatit = FALSE;
11338         if (hexfp) {
11339             floatit = TRUE;
11340             *d++ = '0';
11341             *d++ = 'x';
11342             s = start + 2;
11343         }
11344
11345         /* read next group of digits and _ and copy into d */
11346         while (isDIGIT(*s)
11347                || *s == '_'
11348                || UNLIKELY(hexfp && isXDIGIT(*s)))
11349         {
11350             /* skip underscores, checking for misplaced ones
11351                if -w is on
11352             */
11353             if (*s == '_') {
11354                 if (lastub && s == lastub + 1)
11355                     WARN_ABOUT_UNDERSCORE();
11356                 lastub = s++;
11357             }
11358             else {
11359                 /* check for end of fixed-length buffer */
11360                 if (d >= e)
11361                     Perl_croak(aTHX_ "%s", number_too_long);
11362                 /* if we're ok, copy the character */
11363                 *d++ = *s++;
11364             }
11365         }
11366
11367         /* final misplaced underbar check */
11368         if (lastub && s == lastub + 1)
11369             WARN_ABOUT_UNDERSCORE();
11370
11371         /* read a decimal portion if there is one.  avoid
11372            3..5 being interpreted as the number 3. followed
11373            by .5
11374         */
11375         if (*s == '.' && s[1] != '.') {
11376             floatit = TRUE;
11377             *d++ = *s++;
11378
11379             if (*s == '_') {
11380                 WARN_ABOUT_UNDERSCORE();
11381                 lastub = s;
11382             }
11383
11384             /* copy, ignoring underbars, until we run out of digits.
11385             */
11386             for (; isDIGIT(*s)
11387                    || *s == '_'
11388                    || UNLIKELY(hexfp && isXDIGIT(*s));
11389                  s++)
11390             {
11391                 /* fixed length buffer check */
11392                 if (d >= e)
11393                     Perl_croak(aTHX_ "%s", number_too_long);
11394                 if (*s == '_') {
11395                    if (lastub && s == lastub + 1)
11396                         WARN_ABOUT_UNDERSCORE();
11397                    lastub = s;
11398                 }
11399                 else
11400                     *d++ = *s;
11401             }
11402             /* fractional part ending in underbar? */
11403             if (s[-1] == '_')
11404                 WARN_ABOUT_UNDERSCORE();
11405             if (*s == '.' && isDIGIT(s[1])) {
11406                 /* oops, it's really a v-string, but without the "v" */
11407                 s = start;
11408                 goto vstring;
11409             }
11410         }
11411
11412         /* read exponent part, if present */
11413         if ((isALPHA_FOLD_EQ(*s, 'e')
11414               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
11415             && strchr("+-0123456789_", s[1]))
11416         {
11417             int exp_digits = 0;
11418             const char *save_s = s;
11419             char * save_d = d;
11420
11421             /* regardless of whether user said 3E5 or 3e5, use lower 'e',
11422                ditto for p (hexfloats) */
11423             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
11424                 /* At least some Mach atof()s don't grok 'E' */
11425                 *d++ = 'e';
11426             }
11427             else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
11428                 *d++ = 'p';
11429             }
11430
11431             s++;
11432
11433
11434             /* stray preinitial _ */
11435             if (*s == '_') {
11436                 WARN_ABOUT_UNDERSCORE();
11437                 lastub = s++;
11438             }
11439
11440             /* allow positive or negative exponent */
11441             if (*s == '+' || *s == '-')
11442                 *d++ = *s++;
11443
11444             /* stray initial _ */
11445             if (*s == '_') {
11446                 WARN_ABOUT_UNDERSCORE();
11447                 lastub = s++;
11448             }
11449
11450             /* read digits of exponent */
11451             while (isDIGIT(*s) || *s == '_') {
11452                 if (isDIGIT(*s)) {
11453                     ++exp_digits;
11454                     if (d >= e)
11455                         Perl_croak(aTHX_ "%s", number_too_long);
11456                     *d++ = *s++;
11457                 }
11458                 else {
11459                    if (((lastub && s == lastub + 1)
11460                         || (!isDIGIT(s[1]) && s[1] != '_')))
11461                         WARN_ABOUT_UNDERSCORE();
11462                    lastub = s++;
11463                 }
11464             }
11465
11466             if (!exp_digits) {
11467                 /* no exponent digits, the [eEpP] could be for something else,
11468                  * though in practice we don't get here for p since that's preparsed
11469                  * earlier, and results in only the 0xX being consumed, so behave similarly
11470                  * for decimal floats and consume only the D.DD, leaving the [eE] to the
11471                  * next token.
11472                  */
11473                 s = save_s;
11474                 d = save_d;
11475             }
11476             else {
11477                 floatit = TRUE;
11478             }
11479         }
11480
11481
11482         /*
11483            We try to do an integer conversion first if no characters
11484            indicating "float" have been found.
11485          */
11486
11487         if (!floatit) {
11488             UV uv;
11489             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11490
11491             if (flags == IS_NUMBER_IN_UV) {
11492               if (uv <= IV_MAX)
11493                 sv = newSViv(uv); /* Prefer IVs over UVs. */
11494               else
11495                 sv = newSVuv(uv);
11496             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11497               if (uv <= (UV) IV_MIN)
11498                 sv = newSViv(-(IV)uv);
11499               else
11500                 floatit = TRUE;
11501             } else
11502               floatit = TRUE;
11503         }
11504         if (floatit) {
11505             /* terminate the string */
11506             *d = '\0';
11507             if (UNLIKELY(hexfp)) {
11508 #  ifdef NV_MANT_DIG
11509                 if (significant_bits > NV_MANT_DIG)
11510                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11511                                    "Hexadecimal float: mantissa overflow");
11512 #  endif
11513 #ifdef HEXFP_UQUAD
11514                 nv = hexfp_uquad * hexfp_mult;
11515 #else /* HEXFP_NV */
11516                 nv = hexfp_nv * hexfp_mult;
11517 #endif
11518             } else {
11519                 nv = Atof(PL_tokenbuf);
11520             }
11521             sv = newSVnv(nv);
11522         }
11523
11524         if ( floatit
11525              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11526             const char *const key = floatit ? "float" : "integer";
11527             const STRLEN keylen = floatit ? 5 : 7;
11528             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11529                                 key, keylen, sv, NULL, NULL, 0, NULL);
11530         }
11531         break;
11532
11533     /* if it starts with a v, it could be a v-string */
11534     case 'v':
11535     vstring:
11536                 sv = newSV(5); /* preallocate storage space */
11537                 ENTER_with_name("scan_vstring");
11538                 SAVEFREESV(sv);
11539                 s = scan_vstring(s, PL_bufend, sv);
11540                 SvREFCNT_inc_simple_void_NN(sv);
11541                 LEAVE_with_name("scan_vstring");
11542         break;
11543     }
11544
11545     /* make the op for the constant and return */
11546
11547     if (sv)
11548         lvalp->opval = newSVOP(OP_CONST, 0, sv);
11549     else
11550         lvalp->opval = NULL;
11551
11552     return (char *)s;
11553 }
11554
11555 STATIC char *
11556 S_scan_formline(pTHX_ char *s)
11557 {
11558     SV * const stuff = newSVpvs("");
11559     bool needargs = FALSE;
11560     bool eofmt = FALSE;
11561
11562     PERL_ARGS_ASSERT_SCAN_FORMLINE;
11563
11564     while (!needargs) {
11565         char *eol;
11566         if (*s == '.') {
11567             char *t = s+1;
11568 #ifdef PERL_STRICT_CR
11569             while (SPACE_OR_TAB(*t))
11570                 t++;
11571 #else
11572             while (SPACE_OR_TAB(*t) || *t == '\r')
11573                 t++;
11574 #endif
11575             if (*t == '\n' || t == PL_bufend) {
11576                 eofmt = TRUE;
11577                 break;
11578             }
11579         }
11580         eol = (char *) memchr(s,'\n',PL_bufend-s);
11581         if (!eol++)
11582                 eol = PL_bufend;
11583         if (*s != '#') {
11584             char *t;
11585             for (t = s; t < eol; t++) {
11586                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11587                     needargs = FALSE;
11588                     goto enough;        /* ~~ must be first line in formline */
11589                 }
11590                 if (*t == '@' || *t == '^')
11591                     needargs = TRUE;
11592             }
11593             if (eol > s) {
11594                 sv_catpvn(stuff, s, eol-s);
11595 #ifndef PERL_STRICT_CR
11596                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11597                     char *end = SvPVX(stuff) + SvCUR(stuff);
11598                     end[-2] = '\n';
11599                     end[-1] = '\0';
11600                     SvCUR_set(stuff, SvCUR(stuff) - 1);
11601                 }
11602 #endif
11603             }
11604             else
11605               break;
11606         }
11607         s = (char*)eol;
11608         if ((PL_rsfp || PL_parser->filtered)
11609          && PL_parser->form_lex_state == LEX_NORMAL) {
11610             bool got_some;
11611             PL_bufptr = PL_bufend;
11612             COPLINE_INC_WITH_HERELINES;
11613             got_some = lex_next_chunk(0);
11614             CopLINE_dec(PL_curcop);
11615             s = PL_bufptr;
11616             if (!got_some)
11617                 break;
11618         }
11619         incline(s, PL_bufend);
11620     }
11621   enough:
11622     if (!SvCUR(stuff) || needargs)
11623         PL_lex_state = PL_parser->form_lex_state;
11624     if (SvCUR(stuff)) {
11625         PL_expect = XSTATE;
11626         if (needargs) {
11627             const char *s2 = s;
11628             while (isSPACE(*s2) && *s2 != '\n')
11629                 s2++;
11630             if (*s2 == '{') {
11631                 PL_expect = XTERMBLOCK;
11632                 NEXTVAL_NEXTTOKE.ival = 0;
11633                 force_next(DO);
11634             }
11635             NEXTVAL_NEXTTOKE.ival = 0;
11636             force_next(FORMLBRACK);
11637         }
11638         if (!IN_BYTES) {
11639             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11640                 SvUTF8_on(stuff);
11641         }
11642         NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
11643         force_next(THING);
11644     }
11645     else {
11646         SvREFCNT_dec(stuff);
11647         if (eofmt)
11648             PL_lex_formbrack = 0;
11649     }
11650     return s;
11651 }
11652
11653 I32
11654 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11655 {
11656     const I32 oldsavestack_ix = PL_savestack_ix;
11657     CV* const outsidecv = PL_compcv;
11658
11659     SAVEI32(PL_subline);
11660     save_item(PL_subname);
11661     SAVESPTR(PL_compcv);
11662
11663     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11664     CvFLAGS(PL_compcv) |= flags;
11665
11666     PL_subline = CopLINE(PL_curcop);
11667     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11668     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11669     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11670     if (outsidecv && CvPADLIST(outsidecv))
11671         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
11672
11673     return oldsavestack_ix;
11674 }
11675
11676
11677 /* Do extra initialisation of a CV (typically one just created by
11678  * start_subparse()) if that CV is for a named sub
11679  */
11680
11681 void
11682 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
11683 {
11684     PERL_ARGS_ASSERT_INIT_NAMED_CV;
11685
11686     if (nameop->op_type == OP_CONST) {
11687         const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
11688         if (   strEQ(name, "BEGIN")
11689             || strEQ(name, "END")
11690             || strEQ(name, "INIT")
11691             || strEQ(name, "CHECK")
11692             || strEQ(name, "UNITCHECK")
11693         )
11694           CvSPECIAL_on(cv);
11695     }
11696     else
11697     /* State subs inside anonymous subs need to be
11698      clonable themselves. */
11699     if (   CvANON(CvOUTSIDE(cv))
11700         || CvCLONE(CvOUTSIDE(cv))
11701         || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
11702                         CvOUTSIDE(cv)
11703                      ))[nameop->op_targ])
11704     )
11705       CvCLONE_on(cv);
11706 }
11707
11708
11709 static int
11710 S_yywarn(pTHX_ const char *const s, U32 flags)
11711 {
11712     PERL_ARGS_ASSERT_YYWARN;
11713
11714     PL_in_eval |= EVAL_WARNONLY;
11715     yyerror_pv(s, flags);
11716     return 0;
11717 }
11718
11719 void
11720 Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
11721 {
11722     PERL_ARGS_ASSERT_ABORT_EXECUTION;
11723
11724     if (PL_minus_c)
11725         Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
11726     else {
11727         Perl_croak(aTHX_
11728                 "%sExecution of %s aborted due to compilation errors.\n", msg, name);
11729     }
11730     NOT_REACHED; /* NOTREACHED */
11731 }
11732
11733 void
11734 Perl_yyquit(pTHX)
11735 {
11736     /* Called, after at least one error has been found, to abort the parse now,
11737      * instead of trying to forge ahead */
11738
11739     yyerror_pvn(NULL, 0, 0);
11740 }
11741
11742 int
11743 Perl_yyerror(pTHX_ const char *const s)
11744 {
11745     PERL_ARGS_ASSERT_YYERROR;
11746     return yyerror_pvn(s, strlen(s), 0);
11747 }
11748
11749 int
11750 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11751 {
11752     PERL_ARGS_ASSERT_YYERROR_PV;
11753     return yyerror_pvn(s, strlen(s), flags);
11754 }
11755
11756 int
11757 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11758 {
11759     const char *context = NULL;
11760     int contlen = -1;
11761     SV *msg;
11762     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11763     int yychar  = PL_parser->yychar;
11764
11765     /* Output error message 's' with length 'len'.  'flags' are SV flags that
11766      * apply.  If the number of errors found is large enough, it abandons
11767      * parsing.  If 's' is NULL, there is no message, and it abandons
11768      * processing unconditionally */
11769
11770     if (s != NULL) {
11771         if (!yychar || (yychar == ';' && !PL_rsfp))
11772             sv_catpvs(where_sv, "at EOF");
11773         else if (   PL_oldoldbufptr
11774                  && PL_bufptr > PL_oldoldbufptr
11775                  && PL_bufptr - PL_oldoldbufptr < 200
11776                  && PL_oldoldbufptr != PL_oldbufptr
11777                  && PL_oldbufptr != PL_bufptr)
11778         {
11779             /*
11780                     Only for NetWare:
11781                     The code below is removed for NetWare because it
11782                     abends/crashes on NetWare when the script has error such as
11783                     not having the closing quotes like:
11784                         if ($var eq "value)
11785                     Checking of white spaces is anyway done in NetWare code.
11786             */
11787 #ifndef NETWARE
11788             while (isSPACE(*PL_oldoldbufptr))
11789                 PL_oldoldbufptr++;
11790 #endif
11791             context = PL_oldoldbufptr;
11792             contlen = PL_bufptr - PL_oldoldbufptr;
11793         }
11794         else if (  PL_oldbufptr
11795                 && PL_bufptr > PL_oldbufptr
11796                 && PL_bufptr - PL_oldbufptr < 200
11797                 && PL_oldbufptr != PL_bufptr) {
11798             /*
11799                     Only for NetWare:
11800                     The code below is removed for NetWare because it
11801                     abends/crashes on NetWare when the script has error such as
11802                     not having the closing quotes like:
11803                         if ($var eq "value)
11804                     Checking of white spaces is anyway done in NetWare code.
11805             */
11806 #ifndef NETWARE
11807             while (isSPACE(*PL_oldbufptr))
11808                 PL_oldbufptr++;
11809 #endif
11810             context = PL_oldbufptr;
11811             contlen = PL_bufptr - PL_oldbufptr;
11812         }
11813         else if (yychar > 255)
11814             sv_catpvs(where_sv, "next token ???");
11815         else if (yychar == YYEMPTY) {
11816             if (PL_lex_state == LEX_NORMAL)
11817                 sv_catpvs(where_sv, "at end of line");
11818             else if (PL_lex_inpat)
11819                 sv_catpvs(where_sv, "within pattern");
11820             else
11821                 sv_catpvs(where_sv, "within string");
11822         }
11823         else {
11824             sv_catpvs(where_sv, "next char ");
11825             if (yychar < 32)
11826                 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11827             else if (isPRINT_LC(yychar)) {
11828                 const char string = yychar;
11829                 sv_catpvn(where_sv, &string, 1);
11830             }
11831             else
11832                 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11833         }
11834         msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11835         Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
11836             OutCopFILE(PL_curcop),
11837             (IV)(PL_parser->preambling == NOLINE
11838                    ? CopLINE(PL_curcop)
11839                    : PL_parser->preambling));
11840         if (context)
11841             Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
11842                                  UTF8fARG(UTF, contlen, context));
11843         else
11844             Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
11845         if (   PL_multi_start < PL_multi_end
11846             && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
11847         {
11848             Perl_sv_catpvf(aTHX_ msg,
11849             "  (Might be a runaway multi-line %c%c string starting on"
11850             " line %" IVdf ")\n",
11851                     (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11852             PL_multi_end = 0;
11853         }
11854         if (PL_in_eval & EVAL_WARNONLY) {
11855             PL_in_eval &= ~EVAL_WARNONLY;
11856             Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
11857         }
11858         else {
11859             qerror(msg);
11860         }
11861     }
11862     if (s == NULL || PL_error_count >= 10) {
11863         const char * msg = "";
11864         const char * const name = OutCopFILE(PL_curcop);
11865
11866         if (PL_in_eval) {
11867             SV * errsv = ERRSV;
11868             if (SvCUR(errsv)) {
11869                 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
11870             }
11871         }
11872
11873         if (s == NULL) {
11874             abort_execution(msg, name);
11875         }
11876         else {
11877             Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
11878         }
11879     }
11880     PL_in_my = 0;
11881     PL_in_my_stash = NULL;
11882     return 0;
11883 }
11884
11885 STATIC char*
11886 S_swallow_bom(pTHX_ U8 *s)
11887 {
11888     const STRLEN slen = SvCUR(PL_linestr);
11889
11890     PERL_ARGS_ASSERT_SWALLOW_BOM;
11891
11892     switch (s[0]) {
11893     case 0xFF:
11894         if (s[1] == 0xFE) {
11895             /* UTF-16 little-endian? (or UTF-32LE?) */
11896             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11897                 /* diag_listed_as: Unsupported script encoding %s */
11898                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11899 #ifndef PERL_NO_UTF16_FILTER
11900 #ifdef DEBUGGING
11901             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11902 #endif
11903             s += 2;
11904             if (PL_bufend > (char*)s) {
11905                 s = add_utf16_textfilter(s, TRUE);
11906             }
11907 #else
11908             /* diag_listed_as: Unsupported script encoding %s */
11909             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11910 #endif
11911         }
11912         break;
11913     case 0xFE:
11914         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11915 #ifndef PERL_NO_UTF16_FILTER
11916 #ifdef DEBUGGING
11917             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11918 #endif
11919             s += 2;
11920             if (PL_bufend > (char *)s) {
11921                 s = add_utf16_textfilter(s, FALSE);
11922             }
11923 #else
11924             /* diag_listed_as: Unsupported script encoding %s */
11925             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11926 #endif
11927         }
11928         break;
11929     case BOM_UTF8_FIRST_BYTE: {
11930         if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
11931 #ifdef DEBUGGING
11932             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11933 #endif
11934             s += sizeof(BOM_UTF8) - 1;                     /* UTF-8 */
11935         }
11936         break;
11937     }
11938     case 0:
11939         if (slen > 3) {
11940              if (s[1] == 0) {
11941                   if (s[2] == 0xFE && s[3] == 0xFF) {
11942                        /* UTF-32 big-endian */
11943                        /* diag_listed_as: Unsupported script encoding %s */
11944                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11945                   }
11946              }
11947              else if (s[2] == 0 && s[3] != 0) {
11948                   /* Leading bytes
11949                    * 00 xx 00 xx
11950                    * are a good indicator of UTF-16BE. */
11951 #ifndef PERL_NO_UTF16_FILTER
11952 #ifdef DEBUGGING
11953                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11954 #endif
11955                   s = add_utf16_textfilter(s, FALSE);
11956 #else
11957                   /* diag_listed_as: Unsupported script encoding %s */
11958                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11959 #endif
11960              }
11961         }
11962         break;
11963
11964     default:
11965          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11966                   /* Leading bytes
11967                    * xx 00 xx 00
11968                    * are a good indicator of UTF-16LE. */
11969 #ifndef PERL_NO_UTF16_FILTER
11970 #ifdef DEBUGGING
11971               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11972 #endif
11973               s = add_utf16_textfilter(s, TRUE);
11974 #else
11975               /* diag_listed_as: Unsupported script encoding %s */
11976               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11977 #endif
11978          }
11979     }
11980     return (char*)s;
11981 }
11982
11983
11984 #ifndef PERL_NO_UTF16_FILTER
11985 static I32
11986 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11987 {
11988     SV *const filter = FILTER_DATA(idx);
11989     /* We re-use this each time round, throwing the contents away before we
11990        return.  */
11991     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11992     SV *const utf8_buffer = filter;
11993     IV status = IoPAGE(filter);
11994     const bool reverse = cBOOL(IoLINES(filter));
11995     I32 retval;
11996
11997     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11998
11999     /* As we're automatically added, at the lowest level, and hence only called
12000        from this file, we can be sure that we're not called in block mode. Hence
12001        don't bother writing code to deal with block mode.  */
12002     if (maxlen) {
12003         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
12004     }
12005     if (status < 0) {
12006         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
12007     }
12008     DEBUG_P(PerlIO_printf(Perl_debug_log,
12009                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12010                           FPTR2DPTR(void *, S_utf16_textfilter),
12011                           reverse ? 'l' : 'b', idx, maxlen, status,
12012                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12013
12014     while (1) {
12015         STRLEN chars;
12016         STRLEN have;
12017         I32 newlen;
12018         U8 *end;
12019         /* First, look in our buffer of existing UTF-8 data:  */
12020         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
12021
12022         if (nl) {
12023             ++nl;
12024         } else if (status == 0) {
12025             /* EOF */
12026             IoPAGE(filter) = 0;
12027             nl = SvEND(utf8_buffer);
12028         }
12029         if (nl) {
12030             STRLEN got = nl - SvPVX(utf8_buffer);
12031             /* Did we have anything to append?  */
12032             retval = got != 0;
12033             sv_catpvn(sv, SvPVX(utf8_buffer), got);
12034             /* Everything else in this code works just fine if SVp_POK isn't
12035                set.  This, however, needs it, and we need it to work, else
12036                we loop infinitely because the buffer is never consumed.  */
12037             sv_chop(utf8_buffer, nl);
12038             break;
12039         }
12040
12041         /* OK, not a complete line there, so need to read some more UTF-16.
12042            Read an extra octect if the buffer currently has an odd number. */
12043         while (1) {
12044             if (status <= 0)
12045                 break;
12046             if (SvCUR(utf16_buffer) >= 2) {
12047                 /* Location of the high octet of the last complete code point.
12048                    Gosh, UTF-16 is a pain. All the benefits of variable length,
12049                    *coupled* with all the benefits of partial reads and
12050                    endianness.  */
12051                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
12052                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
12053
12054                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
12055                     break;
12056                 }
12057
12058                 /* We have the first half of a surrogate. Read more.  */
12059                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
12060             }
12061
12062             status = FILTER_READ(idx + 1, utf16_buffer,
12063                                  160 + (SvCUR(utf16_buffer) & 1));
12064             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
12065             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
12066             if (status < 0) {
12067                 /* Error */
12068                 IoPAGE(filter) = status;
12069                 return status;
12070             }
12071         }
12072
12073         /* 'chars' isn't quite the right name, as code points above 0xFFFF
12074          * require 4 bytes per char */
12075         chars = SvCUR(utf16_buffer) >> 1;
12076         have = SvCUR(utf8_buffer);
12077
12078         /* Assume the worst case size as noted by the functions: twice the
12079          * number of input bytes */
12080         SvGROW(utf8_buffer, have + chars * 4 + 1);
12081
12082         if (reverse) {
12083             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
12084                                          (U8*)SvPVX_const(utf8_buffer) + have,
12085                                          chars * 2, &newlen);
12086         } else {
12087             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
12088                                 (U8*)SvPVX_const(utf8_buffer) + have,
12089                                 chars * 2, &newlen);
12090         }
12091         SvCUR_set(utf8_buffer, have + newlen);
12092         *end = '\0';
12093
12094         /* No need to keep this SV "well-formed" with a '\0' after the end, as
12095            it's private to us, and utf16_to_utf8{,reversed} take a
12096            (pointer,length) pair, rather than a NUL-terminated string.  */
12097         if(SvCUR(utf16_buffer) & 1) {
12098             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
12099             SvCUR_set(utf16_buffer, 1);
12100         } else {
12101             SvCUR_set(utf16_buffer, 0);
12102         }
12103     }
12104     DEBUG_P(PerlIO_printf(Perl_debug_log,
12105                           "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12106                           status,
12107                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12108     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
12109     return retval;
12110 }
12111
12112 static U8 *
12113 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
12114 {
12115     SV *filter = filter_add(S_utf16_textfilter, NULL);
12116
12117     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
12118
12119     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
12120     SvPVCLEAR(filter);
12121     IoLINES(filter) = reversed;
12122     IoPAGE(filter) = 1; /* Not EOF */
12123
12124     /* Sadly, we have to return a valid pointer, come what may, so we have to
12125        ignore any error return from this.  */
12126     SvCUR_set(PL_linestr, 0);
12127     if (FILTER_READ(0, PL_linestr, 0)) {
12128         SvUTF8_on(PL_linestr);
12129     } else {
12130         SvUTF8_on(PL_linestr);
12131     }
12132     PL_bufend = SvEND(PL_linestr);
12133     return (U8*)SvPVX(PL_linestr);
12134 }
12135 #endif
12136
12137 /*
12138 Returns a pointer to the next character after the parsed
12139 vstring, as well as updating the passed in sv.
12140
12141 Function must be called like
12142
12143         sv = sv_2mortal(newSV(5));
12144         s = scan_vstring(s,e,sv);
12145
12146 where s and e are the start and end of the string.
12147 The sv should already be large enough to store the vstring
12148 passed in, for performance reasons.
12149
12150 This function may croak if fatal warnings are enabled in the
12151 calling scope, hence the sv_2mortal in the example (to prevent
12152 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
12153 sv_2mortal.
12154
12155 */
12156
12157 char *
12158 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12159 {
12160     const char *pos = s;
12161     const char *start = s;
12162
12163     PERL_ARGS_ASSERT_SCAN_VSTRING;
12164
12165     if (*pos == 'v') pos++;  /* get past 'v' */
12166     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12167         pos++;
12168     if ( *pos != '.') {
12169         /* this may not be a v-string if followed by => */
12170         const char *next = pos;
12171         while (next < e && isSPACE(*next))
12172             ++next;
12173         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12174             /* return string not v-string */
12175             sv_setpvn(sv,(char *)s,pos-s);
12176             return (char *)pos;
12177         }
12178     }
12179
12180     if (!isALPHA(*pos)) {
12181         U8 tmpbuf[UTF8_MAXBYTES+1];
12182
12183         if (*s == 'v')
12184             s++;  /* get past 'v' */
12185
12186         SvPVCLEAR(sv);
12187
12188         for (;;) {
12189             /* this is atoi() that tolerates underscores */
12190             U8 *tmpend;
12191             UV rev = 0;
12192             const char *end = pos;
12193             UV mult = 1;
12194             while (--end >= s) {
12195                 if (*end != '_') {
12196                     const UV orev = rev;
12197                     rev += (*end - '0') * mult;
12198                     mult *= 10;
12199                     if (orev > rev)
12200                         /* diag_listed_as: Integer overflow in %s number */
12201                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12202                                          "Integer overflow in decimal number");
12203                 }
12204             }
12205
12206             /* Append native character for the rev point */
12207             tmpend = uvchr_to_utf8(tmpbuf, rev);
12208             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12209             if (!UVCHR_IS_INVARIANT(rev))
12210                  SvUTF8_on(sv);
12211             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12212                  s = ++pos;
12213             else {
12214                  s = pos;
12215                  break;
12216             }
12217             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12218                  pos++;
12219         }
12220         SvPOK_on(sv);
12221         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12222         SvRMAGICAL_on(sv);
12223     }
12224     return (char *)s;
12225 }
12226
12227 int
12228 Perl_keyword_plugin_standard(pTHX_
12229         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12230 {
12231     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
12232     PERL_UNUSED_CONTEXT;
12233     PERL_UNUSED_ARG(keyword_ptr);
12234     PERL_UNUSED_ARG(keyword_len);
12235     PERL_UNUSED_ARG(op_ptr);
12236     return KEYWORD_PLUGIN_DECLINE;
12237 }
12238
12239 /*
12240 =for apidoc Amx|void|wrap_keyword_plugin|Perl_keyword_plugin_t new_plugin|Perl_keyword_plugin_t *old_plugin_p
12241
12242 Puts a C function into the chain of keyword plugins.  This is the
12243 preferred way to manipulate the L</PL_keyword_plugin> variable.
12244 C<new_plugin> is a pointer to the C function that is to be added to the
12245 keyword plugin chain, and C<old_plugin_p> points to the storage location
12246 where a pointer to the next function in the chain will be stored.  The
12247 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
12248 while the value previously stored there is written to C<*old_plugin_p>.
12249
12250 L</PL_keyword_plugin> is global to an entire process, and a module wishing
12251 to hook keyword parsing may find itself invoked more than once per
12252 process, typically in different threads.  To handle that situation, this
12253 function is idempotent.  The location C<*old_plugin_p> must initially
12254 (once per process) contain a null pointer.  A C variable of static
12255 duration (declared at file scope, typically also marked C<static> to give
12256 it internal linkage) will be implicitly initialised appropriately, if it
12257 does not have an explicit initialiser.  This function will only actually
12258 modify the plugin chain if it finds C<*old_plugin_p> to be null.  This
12259 function is also thread safe on the small scale.  It uses appropriate
12260 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
12261
12262 When this function is called, the function referenced by C<new_plugin>
12263 must be ready to be called, except for C<*old_plugin_p> being unfilled.
12264 In a threading situation, C<new_plugin> may be called immediately, even
12265 before this function has returned.  C<*old_plugin_p> will always be
12266 appropriately set before C<new_plugin> is called.  If C<new_plugin>
12267 decides not to do anything special with the identifier that it is given
12268 (which is the usual case for most calls to a keyword plugin), it must
12269 chain the plugin function referenced by C<*old_plugin_p>.
12270
12271 Taken all together, XS code to install a keyword plugin should typically
12272 look something like this:
12273
12274     static Perl_keyword_plugin_t next_keyword_plugin;
12275     static OP *my_keyword_plugin(pTHX_
12276         char *keyword_plugin, STRLEN keyword_len, OP **op_ptr)
12277     {
12278         if (memEQs(keyword_ptr, keyword_len,
12279                    "my_new_keyword")) {
12280             ...
12281         } else {
12282             return next_keyword_plugin(aTHX_
12283                 keyword_ptr, keyword_len, op_ptr);
12284         }
12285     }
12286     BOOT:
12287         wrap_keyword_plugin(my_keyword_plugin,
12288                             &next_keyword_plugin);
12289
12290 Direct access to L</PL_keyword_plugin> should be avoided.
12291
12292 =cut
12293 */
12294
12295 void
12296 Perl_wrap_keyword_plugin(pTHX_
12297     Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
12298 {
12299     dVAR;
12300
12301     PERL_UNUSED_CONTEXT;
12302     PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
12303     if (*old_plugin_p) return;
12304     KEYWORD_PLUGIN_MUTEX_LOCK;
12305     if (!*old_plugin_p) {
12306         *old_plugin_p = PL_keyword_plugin;
12307         PL_keyword_plugin = new_plugin;
12308     }
12309     KEYWORD_PLUGIN_MUTEX_UNLOCK;
12310 }
12311
12312 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12313 static void
12314 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12315 {
12316     SAVEI32(PL_lex_brackets);
12317     if (PL_lex_brackets > 100)
12318         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12319     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12320     SAVEI32(PL_lex_allbrackets);
12321     PL_lex_allbrackets = 0;
12322     SAVEI8(PL_lex_fakeeof);
12323     PL_lex_fakeeof = (U8)fakeeof;
12324     if(yyparse(gramtype) && !PL_parser->error_count)
12325         qerror(Perl_mess(aTHX_ "Parse error"));
12326 }
12327
12328 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12329 static OP *
12330 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12331 {
12332     OP *o;
12333     ENTER;
12334     SAVEVPTR(PL_eval_root);
12335     PL_eval_root = NULL;
12336     parse_recdescent(gramtype, fakeeof);
12337     o = PL_eval_root;
12338     LEAVE;
12339     return o;
12340 }
12341
12342 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12343 static OP *
12344 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12345 {
12346     OP *exprop;
12347     if (flags & ~PARSE_OPTIONAL)
12348         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12349     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12350     if (!exprop && !(flags & PARSE_OPTIONAL)) {
12351         if (!PL_parser->error_count)
12352             qerror(Perl_mess(aTHX_ "Parse error"));
12353         exprop = newOP(OP_NULL, 0);
12354     }
12355     return exprop;
12356 }
12357
12358 /*
12359 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
12360
12361 Parse a Perl arithmetic expression.  This may contain operators of precedence
12362 down to the bit shift operators.  The expression must be followed (and thus
12363 terminated) either by a comparison or lower-precedence operator or by
12364 something that would normally terminate an expression such as semicolon.
12365 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12366 otherwise it is mandatory.  It is up to the caller to ensure that the
12367 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12368 the source of the code to be parsed and the lexical context for the
12369 expression.
12370
12371 The op tree representing the expression is returned.  If an optional
12372 expression is absent, a null pointer is returned, otherwise the pointer
12373 will be non-null.
12374
12375 If an error occurs in parsing or compilation, in most cases a valid op
12376 tree is returned anyway.  The error is reflected in the parser state,
12377 normally resulting in a single exception at the top level of parsing
12378 which covers all the compilation errors that occurred.  Some compilation
12379 errors, however, will throw an exception immediately.
12380
12381 =cut
12382 */
12383
12384 OP *
12385 Perl_parse_arithexpr(pTHX_ U32 flags)
12386 {
12387     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12388 }
12389
12390 /*
12391 =for apidoc Amx|OP *|parse_termexpr|U32 flags
12392
12393 Parse a Perl term expression.  This may contain operators of precedence
12394 down to the assignment operators.  The expression must be followed (and thus
12395 terminated) either by a comma or lower-precedence operator or by
12396 something that would normally terminate an expression such as semicolon.
12397 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12398 otherwise it is mandatory.  It is up to the caller to ensure that the
12399 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12400 the source of the code to be parsed and the lexical context for the
12401 expression.
12402
12403 The op tree representing the expression is returned.  If an optional
12404 expression is absent, a null pointer is returned, otherwise the pointer
12405 will be non-null.
12406
12407 If an error occurs in parsing or compilation, in most cases a valid op
12408 tree is returned anyway.  The error is reflected in the parser state,
12409 normally resulting in a single exception at the top level of parsing
12410 which covers all the compilation errors that occurred.  Some compilation
12411 errors, however, will throw an exception immediately.
12412
12413 =cut
12414 */
12415
12416 OP *
12417 Perl_parse_termexpr(pTHX_ U32 flags)
12418 {
12419     return parse_expr(LEX_FAKEEOF_COMMA, flags);
12420 }
12421
12422 /*
12423 =for apidoc Amx|OP *|parse_listexpr|U32 flags
12424
12425 Parse a Perl list expression.  This may contain operators of precedence
12426 down to the comma operator.  The expression must be followed (and thus
12427 terminated) either by a low-precedence logic operator such as C<or> or by
12428 something that would normally terminate an expression such as semicolon.
12429 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12430 otherwise it is mandatory.  It is up to the caller to ensure that the
12431 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12432 the source of the code to be parsed and the lexical context for the
12433 expression.
12434
12435 The op tree representing the expression is returned.  If an optional
12436 expression is absent, a null pointer is returned, otherwise the pointer
12437 will be non-null.
12438
12439 If an error occurs in parsing or compilation, in most cases a valid op
12440 tree is returned anyway.  The error is reflected in the parser state,
12441 normally resulting in a single exception at the top level of parsing
12442 which covers all the compilation errors that occurred.  Some compilation
12443 errors, however, will throw an exception immediately.
12444
12445 =cut
12446 */
12447
12448 OP *
12449 Perl_parse_listexpr(pTHX_ U32 flags)
12450 {
12451     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12452 }
12453
12454 /*
12455 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
12456
12457 Parse a single complete Perl expression.  This allows the full
12458 expression grammar, including the lowest-precedence operators such
12459 as C<or>.  The expression must be followed (and thus terminated) by a
12460 token that an expression would normally be terminated by: end-of-file,
12461 closing bracketing punctuation, semicolon, or one of the keywords that
12462 signals a postfix expression-statement modifier.  If C<flags> has the
12463 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
12464 mandatory.  It is up to the caller to ensure that the dynamic parser
12465 state (L</PL_parser> et al) is correctly set to reflect the source of
12466 the code to be parsed and the lexical context for the expression.
12467
12468 The op tree representing the expression is returned.  If an optional
12469 expression is absent, a null pointer is returned, otherwise the pointer
12470 will be non-null.
12471
12472 If an error occurs in parsing or compilation, in most cases a valid op
12473 tree is returned anyway.  The error is reflected in the parser state,
12474 normally resulting in a single exception at the top level of parsing
12475 which covers all the compilation errors that occurred.  Some compilation
12476 errors, however, will throw an exception immediately.
12477
12478 =cut
12479 */
12480
12481 OP *
12482 Perl_parse_fullexpr(pTHX_ U32 flags)
12483 {
12484     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12485 }
12486
12487 /*
12488 =for apidoc Amx|OP *|parse_block|U32 flags
12489
12490 Parse a single complete Perl code block.  This consists of an opening
12491 brace, a sequence of statements, and a closing brace.  The block
12492 constitutes a lexical scope, so C<my> variables and various compile-time
12493 effects can be contained within it.  It is up to the caller to ensure
12494 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12495 reflect the source of the code to be parsed and the lexical context for
12496 the statement.
12497
12498 The op tree representing the code block is returned.  This is always a
12499 real op, never a null pointer.  It will normally be a C<lineseq> list,
12500 including C<nextstate> or equivalent ops.  No ops to construct any kind
12501 of runtime scope are included by virtue of it being a block.
12502
12503 If an error occurs in parsing or compilation, in most cases a valid op
12504 tree (most likely null) is returned anyway.  The error is reflected in
12505 the parser state, normally resulting in a single exception at the top
12506 level of parsing which covers all the compilation errors that occurred.
12507 Some compilation errors, however, will throw an exception immediately.
12508
12509 The C<flags> parameter is reserved for future use, and must always
12510 be zero.
12511
12512 =cut
12513 */
12514
12515 OP *
12516 Perl_parse_block(pTHX_ U32 flags)
12517 {
12518     if (flags)
12519         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12520     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12521 }
12522
12523 /*
12524 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12525
12526 Parse a single unadorned Perl statement.  This may be a normal imperative
12527 statement or a declaration that has compile-time effect.  It does not
12528 include any label or other affixture.  It is up to the caller to ensure
12529 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12530 reflect the source of the code to be parsed and the lexical context for
12531 the statement.
12532
12533 The op tree representing the statement is returned.  This may be a
12534 null pointer if the statement is null, for example if it was actually
12535 a subroutine definition (which has compile-time side effects).  If not
12536 null, it will be ops directly implementing the statement, suitable to
12537 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
12538 equivalent op (except for those embedded in a scope contained entirely
12539 within the statement).
12540
12541 If an error occurs in parsing or compilation, in most cases a valid op
12542 tree (most likely null) is returned anyway.  The error is reflected in
12543 the parser state, normally resulting in a single exception at the top
12544 level of parsing which covers all the compilation errors that occurred.
12545 Some compilation errors, however, will throw an exception immediately.
12546
12547 The C<flags> parameter is reserved for future use, and must always
12548 be zero.
12549
12550 =cut
12551 */
12552
12553 OP *
12554 Perl_parse_barestmt(pTHX_ U32 flags)
12555 {
12556     if (flags)
12557         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12558     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12559 }
12560
12561 /*
12562 =for apidoc Amx|SV *|parse_label|U32 flags
12563
12564 Parse a single label, possibly optional, of the type that may prefix a
12565 Perl statement.  It is up to the caller to ensure that the dynamic parser
12566 state (L</PL_parser> et al) is correctly set to reflect the source of
12567 the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
12568 label is optional, otherwise it is mandatory.
12569
12570 The name of the label is returned in the form of a fresh scalar.  If an
12571 optional label is absent, a null pointer is returned.
12572
12573 If an error occurs in parsing, which can only occur if the label is
12574 mandatory, a valid label is returned anyway.  The error is reflected in
12575 the parser state, normally resulting in a single exception at the top
12576 level of parsing which covers all the compilation errors that occurred.
12577
12578 =cut
12579 */
12580
12581 SV *
12582 Perl_parse_label(pTHX_ U32 flags)
12583 {
12584     if (flags & ~PARSE_OPTIONAL)
12585         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12586     if (PL_nexttoke) {
12587         PL_parser->yychar = yylex();
12588         if (PL_parser->yychar == LABEL) {
12589             SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
12590             PL_parser->yychar = YYEMPTY;
12591             cSVOPx(pl_yylval.opval)->op_sv = NULL;
12592             op_free(pl_yylval.opval);
12593             return labelsv;
12594         } else {
12595             yyunlex();
12596             goto no_label;
12597         }
12598     } else {
12599         char *s, *t;
12600         STRLEN wlen, bufptr_pos;
12601         lex_read_space(0);
12602         t = s = PL_bufptr;
12603         if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
12604             goto no_label;
12605         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12606         if (word_takes_any_delimiter(s, wlen))
12607             goto no_label;
12608         bufptr_pos = s - SvPVX(PL_linestr);
12609         PL_bufptr = t;
12610         lex_read_space(LEX_KEEP_PREVIOUS);
12611         t = PL_bufptr;
12612         s = SvPVX(PL_linestr) + bufptr_pos;
12613         if (t[0] == ':' && t[1] != ':') {
12614             PL_oldoldbufptr = PL_oldbufptr;
12615             PL_oldbufptr = s;
12616             PL_bufptr = t+1;
12617             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12618         } else {
12619             PL_bufptr = s;
12620             no_label:
12621             if (flags & PARSE_OPTIONAL) {
12622                 return NULL;
12623             } else {
12624                 qerror(Perl_mess(aTHX_ "Parse error"));
12625                 return newSVpvs("x");
12626             }
12627         }
12628     }
12629 }
12630
12631 /*
12632 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12633
12634 Parse a single complete Perl statement.  This may be a normal imperative
12635 statement or a declaration that has compile-time effect, and may include
12636 optional labels.  It is up to the caller to ensure that the dynamic
12637 parser state (L</PL_parser> et al) is correctly set to reflect the source
12638 of the code to be parsed and the lexical context for the statement.
12639
12640 The op tree representing the statement is returned.  This may be a
12641 null pointer if the statement is null, for example if it was actually
12642 a subroutine definition (which has compile-time side effects).  If not
12643 null, it will be the result of a L</newSTATEOP> call, normally including
12644 a C<nextstate> or equivalent op.
12645
12646 If an error occurs in parsing or compilation, in most cases a valid op
12647 tree (most likely null) is returned anyway.  The error is reflected in
12648 the parser state, normally resulting in a single exception at the top
12649 level of parsing which covers all the compilation errors that occurred.
12650 Some compilation errors, however, will throw an exception immediately.
12651
12652 The C<flags> parameter is reserved for future use, and must always
12653 be zero.
12654
12655 =cut
12656 */
12657
12658 OP *
12659 Perl_parse_fullstmt(pTHX_ U32 flags)
12660 {
12661     if (flags)
12662         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12663     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12664 }
12665
12666 /*
12667 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12668
12669 Parse a sequence of zero or more Perl statements.  These may be normal
12670 imperative statements, including optional labels, or declarations
12671 that have compile-time effect, or any mixture thereof.  The statement
12672 sequence ends when a closing brace or end-of-file is encountered in a
12673 place where a new statement could have validly started.  It is up to
12674 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12675 is correctly set to reflect the source of the code to be parsed and the
12676 lexical context for the statements.
12677
12678 The op tree representing the statement sequence is returned.  This may
12679 be a null pointer if the statements were all null, for example if there
12680 were no statements or if there were only subroutine definitions (which
12681 have compile-time side effects).  If not null, it will be a C<lineseq>
12682 list, normally including C<nextstate> or equivalent ops.
12683
12684 If an error occurs in parsing or compilation, in most cases a valid op
12685 tree is returned anyway.  The error is reflected in the parser state,
12686 normally resulting in a single exception at the top level of parsing
12687 which covers all the compilation errors that occurred.  Some compilation
12688 errors, however, will throw an exception immediately.
12689
12690 The C<flags> parameter is reserved for future use, and must always
12691 be zero.
12692
12693 =cut
12694 */
12695
12696 OP *
12697 Perl_parse_stmtseq(pTHX_ U32 flags)
12698 {
12699     OP *stmtseqop;
12700     I32 c;
12701     if (flags)
12702         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12703     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12704     c = lex_peek_unichar(0);
12705     if (c != -1 && c != /*{*/'}')
12706         qerror(Perl_mess(aTHX_ "Parse error"));
12707     return stmtseqop;
12708 }
12709
12710 /*
12711  * ex: set ts=8 sts=4 sw=4 et:
12712  */