toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26 This is the lower layer of the Perl parser, managing characters and tokens.
  27
  28 =for apidoc AmU|yy_parser *|PL_parser
  29
  30 Pointer to a structure encapsulating the state of the parsing operation
  31 currently in progress.  The pointer can be locally changed to perform
  32 a nested parse without interfering with the state of an outer parse.
  33 Individual members of C<PL_parser> have their own documentation.
  34
  35 =cut
  36 */
  37
  38 #include "EXTERN.h"
  39 #define PERL_IN_TOKE_C
  40 #include "perl.h"
  41 #include "dquote_inline.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g)     \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* XXX temporary backwards compatibility */
  49 #define PL_lex_brackets         (PL_parser->lex_brackets)
  50 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  51 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  52 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  53 #define PL_lex_casemods         (PL_parser->lex_casemods)
  54 #define PL_lex_casestack        (PL_parser->lex_casestack)
  55 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  56 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  57 #define PL_lex_inpat            (PL_parser->lex_inpat)
  58 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  59 #define PL_lex_op               (PL_parser->lex_op)
  60 #define PL_lex_repl             (PL_parser->lex_repl)
  61 #define PL_lex_starts           (PL_parser->lex_starts)
  62 #define PL_lex_stuff            (PL_parser->lex_stuff)
  63 #define PL_multi_start          (PL_parser->multi_start)
  64 #define PL_multi_open           (PL_parser->multi_open)
  65 #define PL_multi_close          (PL_parser->multi_close)
  66 #define PL_preambled            (PL_parser->preambled)
  67 #define PL_linestr              (PL_parser->linestr)
  68 #define PL_expect               (PL_parser->expect)
  69 #define PL_copline              (PL_parser->copline)
  70 #define PL_bufptr               (PL_parser->bufptr)
  71 #define PL_oldbufptr            (PL_parser->oldbufptr)
  72 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  73 #define PL_linestart            (PL_parser->linestart)
  74 #define PL_bufend               (PL_parser->bufend)
  75 #define PL_last_uni             (PL_parser->last_uni)
  76 #define PL_last_lop             (PL_parser->last_lop)
  77 #define PL_last_lop_op          (PL_parser->last_lop_op)
  78 #define PL_lex_state            (PL_parser->lex_state)
  79 #define PL_rsfp                 (PL_parser->rsfp)
  80 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  81 #define PL_in_my                (PL_parser->in_my)
  82 #define PL_in_my_stash          (PL_parser->in_my_stash)
  83 #define PL_tokenbuf             (PL_parser->tokenbuf)
  84 #define PL_multi_end            (PL_parser->multi_end)
  85 #define PL_error_count          (PL_parser->error_count)
  86
  87 #  define PL_nexttoke           (PL_parser->nexttoke)
  88 #  define PL_nexttype           (PL_parser->nexttype)
  89 #  define PL_nextval            (PL_parser->nextval)
  90
  91
  92 #define SvEVALED(sv) \
  93     (SvTYPE(sv) >= SVt_PVNV \
  94     && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
  95
  96 static const char* const ident_too_long = "Identifier too long";
  97
  98 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
  99
 100 #define XENUMMASK  0x3f
 101 #define XFAKEEOF   0x40
 102 #define XFAKEBRACK 0x80
 103
 104 #ifdef USE_UTF8_SCRIPTS
 105 #   define UTF cBOOL(!IN_BYTES)
 106 #else
 107 #   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 108 #endif
 109
 110 /* The maximum number of characters preceding the unrecognized one to display */
 111 #define UNRECOGNIZED_PRECEDE_COUNT 10
 112
 113 /* In variables named $^X, these are the legal values for X.
 114  * 1999-02-27 mjd-perl-patch@plover.com */
 115 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 116
 117 #define SPACE_OR_TAB(c) isBLANK_A(c)
 118
 119 #define HEXFP_PEEK(s)     \
 120     (((s[0] == '.') && \
 121       (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
 122      isALPHA_FOLD_EQ(s[0], 'p'))
 123
 124 /* LEX_* are values for PL_lex_state, the state of the lexer.
 125  * They are arranged oddly so that the guard on the switch statement
 126  * can get by with a single comparison (if the compiler is smart enough).
 127  *
 128  * These values refer to the various states within a sublex parse,
 129  * i.e. within a double quotish string
 130  */
 131
 132 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 133
 134 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 135 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 136 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 137 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 138 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 139
 140                                    /* at end of code, eg "$x" followed by:  */
 141 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 142 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 143
 144 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 145                                         string or after \E, $foo, etc       */
 146 #define LEX_INTERPCONST          2 /* NOT USED */
 147 #define LEX_FORMLINE             1 /* expecting a format line               */
 148
 149
 150 #ifdef DEBUGGING
 151 static const char* const lex_state_names[] = {
 152     "KNOWNEXT",
 153     "FORMLINE",
 154     "INTERPCONST",
 155     "INTERPCONCAT",
 156     "INTERPENDMAYBE",
 157     "INTERPEND",
 158     "INTERPSTART",
 159     "INTERPPUSH",
 160     "INTERPCASEMOD",
 161     "INTERPNORMAL",
 162     "NORMAL"
 163 };
 164 #endif
 165
 166 #include "keywords.h"
 167
 168 /* CLINE is a macro that ensures PL_copline has a sane value */
 169
 170 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 171
 172 /*
 173  * Convenience functions to return different tokens and prime the
 174  * lexer for the next token.  They all take an argument.
 175  *
 176  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 177  * OPERATOR     : generic operator
 178  * AOPERATOR    : assignment operator
 179  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 180  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 181  * PREREF       : *EXPR where EXPR is not a simple identifier
 182  * TERM         : expression term
 183  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 184  * LOOPX        : loop exiting command (goto, last, dump, etc)
 185  * FTST         : file test operator
 186  * FUN0         : zero-argument function
 187  * FUN0OP       : zero-argument function, with its op created in this file
 188  * FUN1         : not used, except for not, which isn't a UNIOP
 189  * BOop         : bitwise or or xor
 190  * BAop         : bitwise and
 191  * BCop         : bitwise complement
 192  * SHop         : shift operator
 193  * PWop         : power operator
 194  * PMop         : pattern-matching operator
 195  * Aop          : addition-level operator
 196  * AopNOASSIGN  : addition-level operator that is never part of .=
 197  * Mop          : multiplication-level operator
 198  * Eop          : equality-testing operator
 199  * Rop          : relational operator <= != gt
 200  *
 201  * Also see LOP and lop() below.
 202  */
 203
 204 #ifdef DEBUGGING /* Serve -DT. */
 205 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 206 #else
 207 #   define REPORT(retval) (retval)
 208 #endif
 209
 210 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 211 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 212 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
 213 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 214 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 215 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 216 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 217 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 218 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
 219                          pl_yylval.ival=f, \
 220                          PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
 221                          REPORT((int)LOOPEX))
 222 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 223 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 224 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 225 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 226 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
 227 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
 228 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
 229                        REPORT('~')
 230 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
 231 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
 232 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 233 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
 234 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
 235 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
 236 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 237 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 238
 239 /* This bit of chicanery makes a unary function followed by
 240  * a parenthesis into a function with one argument, highest precedence.
 241  * The UNIDOR macro is for unary functions that can be followed by the //
 242  * operator (such as C<shift // 0>).
 243  */
 244 #define UNI3(f,x,have_x) { \
 245         pl_yylval.ival = f; \
 246         if (have_x) PL_expect = x; \
 247         PL_bufptr = s; \
 248         PL_last_uni = PL_oldbufptr; \
 249         PL_last_lop_op = (f) < 0 ? -(f) : (f); \
 250         if (*s == '(') \
 251             return REPORT( (int)FUNC1 ); \
 252         s = skipspace(s); \
 253         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 254         }
 255 #define UNI(f)    UNI3(f,XTERM,1)
 256 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 257 #define UNIPROTO(f,optional) { \
 258         if (optional) PL_last_uni = PL_oldbufptr; \
 259         OPERATOR(f); \
 260         }
 261
 262 #define UNIBRACK(f) UNI3(f,0,0)
 263
 264 /* grandfather return to old style */
 265 #define OLDLOP(f) \
 266         do { \
 267             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 268                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 269             pl_yylval.ival = (f); \
 270             PL_expect = XTERM; \
 271             PL_bufptr = s; \
 272             return (int)LSTOP; \
 273         } while(0)
 274
 275 #define COPLINE_INC_WITH_HERELINES                  \
 276     STMT_START {                                     \
 277         CopLINE_inc(PL_curcop);                       \
 278         if (PL_parser->herelines)                      \
 279             CopLINE(PL_curcop) += PL_parser->herelines, \
 280             PL_parser->herelines = 0;                    \
 281     } STMT_END
 282 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 283  * is no sublex_push to follow. */
 284 #define COPLINE_SET_FROM_MULTI_END            \
 285     STMT_START {                               \
 286         CopLINE_set(PL_curcop, PL_multi_end);   \
 287         if (PL_multi_end != PL_multi_start)      \
 288             PL_parser->herelines = 0;             \
 289     } STMT_END
 290
 291
 292 #ifdef DEBUGGING
 293
 294 /* how to interpret the pl_yylval associated with the token */
 295 enum token_type {
 296     TOKENTYPE_NONE,
 297     TOKENTYPE_IVAL,
 298     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 299     TOKENTYPE_PVAL,
 300     TOKENTYPE_OPVAL
 301 };
 302
 303 static struct debug_tokens {
 304     const int token;
 305     enum token_type type;
 306     const char *name;
 307 } const debug_tokens[] =
 308 {
 309     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 310     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 311     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 312     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 313     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 314     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 315     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 316     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 317     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 318     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 319     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 320     { DO,               TOKENTYPE_NONE,         "DO" },
 321     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 322     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 323     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 324     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 325     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 326     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 327     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 328     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 329     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 330     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 331     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 332     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 333     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 334     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 335     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 336     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 337     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 338     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 339     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 340     { IF,               TOKENTYPE_IVAL,         "IF" },
 341     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 342     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 343     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 344     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 345     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 346     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 347     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 348     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 349     { MY,               TOKENTYPE_IVAL,         "MY" },
 350     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 351     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 352     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 353     { OROR,             TOKENTYPE_NONE,         "OROR" },
 354     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 355     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 356     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 357     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 358     { POSTJOIN,         TOKENTYPE_NONE,         "POSTJOIN" },
 359     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 360     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 361     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 362     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 363     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 364     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 365     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 366     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 367     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 368     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 369     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 370     { SUB,              TOKENTYPE_NONE,         "SUB" },
 371     { THING,            TOKENTYPE_OPVAL,        "THING" },
 372     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 373     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 374     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 375     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 376     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 377     { USE,              TOKENTYPE_IVAL,         "USE" },
 378     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 379     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 380     { BAREWORD,         TOKENTYPE_OPVAL,        "BAREWORD" },
 381     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 382     { 0,                TOKENTYPE_NONE,         NULL }
 383 };
 384
 385 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 386
 387 STATIC int
 388 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 389 {
 390     PERL_ARGS_ASSERT_TOKEREPORT;
 391
 392     if (DEBUG_T_TEST) {
 393         const char *name = NULL;
 394         enum token_type type = TOKENTYPE_NONE;
 395         const struct debug_tokens *p;
 396         SV* const report = newSVpvs("<== ");
 397
 398         for (p = debug_tokens; p->token; p++) {
 399             if (p->token == (int)rv) {
 400                 name = p->name;
 401                 type = p->type;
 402                 break;
 403             }
 404         }
 405         if (name)
 406             Perl_sv_catpv(aTHX_ report, name);
 407         else if (isGRAPH(rv))
 408         {
 409             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 410             if ((char)rv == 'p')
 411                 sv_catpvs(report, " (pending identifier)");
 412         }
 413         else if (!rv)
 414             sv_catpvs(report, "EOF");
 415         else
 416             Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
 417         switch (type) {
 418         case TOKENTYPE_NONE:
 419             break;
 420         case TOKENTYPE_IVAL:
 421             Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
 422             break;
 423         case TOKENTYPE_OPNUM:
 424             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 425                                     PL_op_name[lvalp->ival]);
 426             break;
 427         case TOKENTYPE_PVAL:
 428             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 429             break;
 430         case TOKENTYPE_OPVAL:
 431             if (lvalp->opval) {
 432                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 433                                     PL_op_name[lvalp->opval->op_type]);
 434                 if (lvalp->opval->op_type == OP_CONST) {
 435                     Perl_sv_catpvf(aTHX_ report, " %s",
 436                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 437                 }
 438
 439             }
 440             else
 441                 sv_catpvs(report, "(opval=null)");
 442             break;
 443         }
 444         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 445     };
 446     return (int)rv;
 447 }
 448
 449
 450 /* print the buffer with suitable escapes */
 451
 452 STATIC void
 453 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 454 {
 455     SV* const tmp = newSVpvs("");
 456
 457     PERL_ARGS_ASSERT_PRINTBUF;
 458
 459     GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
 460     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 461     GCC_DIAG_RESTORE;
 462     SvREFCNT_dec(tmp);
 463 }
 464
 465 #endif
 466
 467 /*
 468  * S_ao
 469  *
 470  * This subroutine looks for an '=' next to the operator that has just been
 471  * parsed and turns it into an ASSIGNOP if it finds one.
 472  */
 473
 474 STATIC int
 475 S_ao(pTHX_ int toketype)
 476 {
 477     if (*PL_bufptr == '=') {
 478         PL_bufptr++;
 479         if (toketype == ANDAND)
 480             pl_yylval.ival = OP_ANDASSIGN;
 481         else if (toketype == OROR)
 482             pl_yylval.ival = OP_ORASSIGN;
 483         else if (toketype == DORDOR)
 484             pl_yylval.ival = OP_DORASSIGN;
 485         toketype = ASSIGNOP;
 486     }
 487     return REPORT(toketype);
 488 }
 489
 490 /*
 491  * S_no_op
 492  * When Perl expects an operator and finds something else, no_op
 493  * prints the warning.  It always prints "<something> found where
 494  * operator expected.  It prints "Missing semicolon on previous line?"
 495  * if the surprise occurs at the start of the line.  "do you need to
 496  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 497  * where the compiler doesn't know if foo is a method call or a function.
 498  * It prints "Missing operator before end of line" if there's nothing
 499  * after the missing operator, or "... before <...>" if there is something
 500  * after the missing operator.
 501  *
 502  * PL_bufptr is expected to point to the start of the thing that was found,
 503  * and s after the next token or partial token.
 504  */
 505
 506 STATIC void
 507 S_no_op(pTHX_ const char *const what, char *s)
 508 {
 509     char * const oldbp = PL_bufptr;
 510     const bool is_first = (PL_oldbufptr == PL_linestart);
 511
 512     PERL_ARGS_ASSERT_NO_OP;
 513
 514     if (!s)
 515         s = oldbp;
 516     else
 517         PL_bufptr = s;
 518     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 519     if (ckWARN_d(WARN_SYNTAX)) {
 520         if (is_first)
 521             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 522                     "\t(Missing semicolon on previous line?)\n");
 523         else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
 524                                                            PL_bufend,
 525                                                            UTF))
 526         {
 527             const char *t;
 528             for (t = PL_oldoldbufptr;
 529                  (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
 530                  t += UTF ? UTF8SKIP(t) : 1)
 531             {
 532                 NOOP;
 533             }
 534             if (t < PL_bufptr && isSPACE(*t))
 535                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 536                         "\t(Do you need to predeclare %" UTF8f "?)\n",
 537                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 538         }
 539         else {
 540             assert(s >= oldbp);
 541             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 542                     "\t(Missing operator before %" UTF8f "?)\n",
 543                      UTF8fARG(UTF, s - oldbp, oldbp));
 544         }
 545     }
 546     PL_bufptr = oldbp;
 547 }
 548
 549 /*
 550  * S_missingterm
 551  * Complain about missing quote/regexp/heredoc terminator.
 552  * If it's called with NULL then it cauterizes the line buffer.
 553  * If we're in a delimited string and the delimiter is a control
 554  * character, it's reformatted into a two-char sequence like ^C.
 555  * This is fatal.
 556  */
 557
 558 STATIC void
 559 S_missingterm(pTHX_ char *s)
 560 {
 561     char tmpbuf[UTF8_MAXBYTES + 1];
 562     char q;
 563     bool uni = FALSE;
 564     SV *sv;
 565     if (s) {
 566         char * const nl = strrchr(s,'\n');
 567         if (nl)
 568             *nl = '\0';
 569         uni = UTF;
 570     }
 571     else if (PL_multi_close < 32) {
 572         *tmpbuf = '^';
 573         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 574         tmpbuf[2] = '\0';
 575         s = tmpbuf;
 576     }
 577     else {
 578         if (LIKELY(PL_multi_close < 256)) {
 579             *tmpbuf = (char)PL_multi_close;
 580             tmpbuf[1] = '\0';
 581         }
 582         else {
 583             uni = TRUE;
 584             *uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close) = 0;
 585         }
 586         s = tmpbuf;
 587     }
 588     q = strchr(s,'"') ? '\'' : '"';
 589     sv = sv_2mortal(newSVpv(s,0));
 590     if (uni)
 591         SvUTF8_on(sv);
 592     Perl_croak(aTHX_ "Can't find string terminator %c%" SVf
 593                      "%c anywhere before EOF",q,SVfARG(sv),q);
 594 }
 595
 596 #include "feature.h"
 597
 598 /*
 599  * Check whether the named feature is enabled.
 600  */
 601 bool
 602 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 603 {
 604     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 605
 606     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 607
 608     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 609
 610     if (namelen > MAX_FEATURE_LEN)
 611         return FALSE;
 612     memcpy(&he_name[8], name, namelen);
 613
 614     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 615                                      REFCOUNTED_HE_EXISTS));
 616 }
 617
 618 /*
 619  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 620  * utf16-to-utf8-reversed.
 621  */
 622
 623 #ifdef PERL_CR_FILTER
 624 static void
 625 strip_return(SV *sv)
 626 {
 627     const char *s = SvPVX_const(sv);
 628     const char * const e = s + SvCUR(sv);
 629
 630     PERL_ARGS_ASSERT_STRIP_RETURN;
 631
 632     /* outer loop optimized to do nothing if there are no CR-LFs */
 633     while (s < e) {
 634         if (*s++ == '\r' && *s == '\n') {
 635             /* hit a CR-LF, need to copy the rest */
 636             char *d = s - 1;
 637             *d++ = *s++;
 638             while (s < e) {
 639                 if (*s == '\r' && s[1] == '\n')
 640                     s++;
 641                 *d++ = *s++;
 642             }
 643             SvCUR(sv) -= s - d;
 644             return;
 645         }
 646     }
 647 }
 648
 649 STATIC I32
 650 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 651 {
 652     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 653     if (count > 0 && !maxlen)
 654         strip_return(sv);
 655     return count;
 656 }
 657 #endif
 658
 659 /*
 660 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 661
 662 Creates and initialises a new lexer/parser state object, supplying
 663 a context in which to lex and parse from a new source of Perl code.
 664 A pointer to the new state object is placed in L</PL_parser>.  An entry
 665 is made on the save stack so that upon unwinding, the new state object
 666 will be destroyed and the former value of L</PL_parser> will be restored.
 667 Nothing else need be done to clean up the parsing context.
 668
 669 The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
 670 non-null, provides a string (in SV form) containing code to be parsed.
 671 A copy of the string is made, so subsequent modification of C<line>
 672 does not affect parsing.  C<rsfp>, if non-null, provides an input stream
 673 from which code will be read to be parsed.  If both are non-null, the
 674 code in C<line> comes first and must consist of complete lines of input,
 675 and C<rsfp> supplies the remainder of the source.
 676
 677 The C<flags> parameter is reserved for future use.  Currently it is only
 678 used by perl internally, so extensions should always pass zero.
 679
 680 =cut
 681 */
 682
 683 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 684    can share filters with the current parser.
 685    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 686    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 687    destruction. This is used to handle the case of defaulting to reading the
 688    script from the standard input because no filename was given on the command
 689    line (without getting confused by situation where STDIN has been closed, so
 690    the script handle is opened on fd 0)  */
 691
 692 void
 693 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 694 {
 695     const char *s = NULL;
 696     yy_parser *parser, *oparser;
 697
 698     if (flags && flags & ~LEX_START_FLAGS)
 699         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 700
 701     /* create and initialise a parser */
 702
 703     Newxz(parser, 1, yy_parser);
 704     parser->old_parser = oparser = PL_parser;
 705     PL_parser = parser;
 706
 707     parser->stack = NULL;
 708     parser->stack_max1 = NULL;
 709     parser->ps = NULL;
 710
 711     /* on scope exit, free this parser and restore any outer one */
 712     SAVEPARSER(parser);
 713     parser->saved_curcop = PL_curcop;
 714
 715     /* initialise lexer state */
 716
 717     parser->nexttoke = 0;
 718     parser->error_count = oparser ? oparser->error_count : 0;
 719     parser->copline = parser->preambling = NOLINE;
 720     parser->lex_state = LEX_NORMAL;
 721     parser->expect = XSTATE;
 722     parser->rsfp = rsfp;
 723     parser->recheck_utf8_validity = FALSE;
 724     parser->rsfp_filters =
 725       !(flags & LEX_START_SAME_FILTER) || !oparser
 726         ? NULL
 727         : MUTABLE_AV(SvREFCNT_inc(
 728             oparser->rsfp_filters
 729              ? oparser->rsfp_filters
 730              : (oparser->rsfp_filters = newAV())
 731           ));
 732
 733     Newx(parser->lex_brackstack, 120, char);
 734     Newx(parser->lex_casestack, 12, char);
 735     *parser->lex_casestack = '\0';
 736     Newxz(parser->lex_shared, 1, LEXSHARED);
 737
 738     if (line) {
 739         STRLEN len;
 740         const U8* first_bad_char_loc;
 741
 742         s = SvPV_const(line, len);
 743
 744         if (   SvUTF8(line)
 745             && UNLIKELY(! is_utf8_string_loc((U8 *) s,
 746                                              SvCUR(line),
 747                                              &first_bad_char_loc)))
 748         {
 749             _force_out_malformed_utf8_message(first_bad_char_loc,
 750                                               (U8 *) s + SvCUR(line),
 751                                               0,
 752                                               1 /* 1 means die */ );
 753             NOT_REACHED; /* NOTREACHED */
 754         }
 755
 756         parser->linestr = flags & LEX_START_COPIED
 757                             ? SvREFCNT_inc_simple_NN(line)
 758                             : newSVpvn_flags(s, len, SvUTF8(line));
 759         if (!rsfp)
 760             sv_catpvs(parser->linestr, "\n;");
 761     } else {
 762         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 763     }
 764
 765     parser->oldoldbufptr =
 766         parser->oldbufptr =
 767         parser->bufptr =
 768         parser->linestart = SvPVX(parser->linestr);
 769     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 770     parser->last_lop = parser->last_uni = NULL;
 771
 772     STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 773                                                         |LEX_DONT_CLOSE_RSFP));
 774     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 775                                                         |LEX_DONT_CLOSE_RSFP));
 776
 777     parser->in_pod = parser->filtered = 0;
 778 }
 779
 780
 781 /* delete a parser object */
 782
 783 void
 784 Perl_parser_free(pTHX_  const yy_parser *parser)
 785 {
 786     PERL_ARGS_ASSERT_PARSER_FREE;
 787
 788     PL_curcop = parser->saved_curcop;
 789     SvREFCNT_dec(parser->linestr);
 790
 791     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 792         PerlIO_clearerr(parser->rsfp);
 793     else if (parser->rsfp && (!parser->old_parser
 794           || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 795         PerlIO_close(parser->rsfp);
 796     SvREFCNT_dec(parser->rsfp_filters);
 797     SvREFCNT_dec(parser->lex_stuff);
 798     SvREFCNT_dec(parser->lex_sub_repl);
 799
 800     Safefree(parser->lex_brackstack);
 801     Safefree(parser->lex_casestack);
 802     Safefree(parser->lex_shared);
 803     PL_parser = parser->old_parser;
 804     Safefree(parser);
 805 }
 806
 807 void
 808 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 809 {
 810     I32 nexttoke = parser->nexttoke;
 811     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 812     while (nexttoke--) {
 813         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 814          && parser->nextval[nexttoke].opval
 815          && parser->nextval[nexttoke].opval->op_slabbed
 816          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 817             op_free(parser->nextval[nexttoke].opval);
 818             parser->nextval[nexttoke].opval = NULL;
 819         }
 820     }
 821 }
 822
 823
 824 /*
 825 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 826
 827 Buffer scalar containing the chunk currently under consideration of the
 828 text currently being lexed.  This is always a plain string scalar (for
 829 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 830 normal scalar means; instead refer to the buffer directly by the pointer
 831 variables described below.
 832
 833 The lexer maintains various C<char*> pointers to things in the
 834 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 835 reallocated, all of these pointers must be updated.  Don't attempt to
 836 do this manually, but rather use L</lex_grow_linestr> if you need to
 837 reallocate the buffer.
 838
 839 The content of the text chunk in the buffer is commonly exactly one
 840 complete line of input, up to and including a newline terminator,
 841 but there are situations where it is otherwise.  The octets of the
 842 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 843 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 844 flag on this scalar, which may disagree with it.
 845
 846 For direct examination of the buffer, the variable
 847 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 848 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 849 of these pointers is usually preferable to examination of the scalar
 850 through normal scalar means.
 851
 852 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 853
 854 Direct pointer to the end of the chunk of text currently being lexed, the
 855 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 856 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
 857 always located at the end of the buffer, and does not count as part of
 858 the buffer's contents.
 859
 860 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 861
 862 Points to the current position of lexing inside the lexer buffer.
 863 Characters around this point may be freely examined, within
 864 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 865 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 866 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 867
 868 Lexing code (whether in the Perl core or not) moves this pointer past
 869 the characters that it consumes.  It is also expected to perform some
 870 bookkeeping whenever a newline character is consumed.  This movement
 871 can be more conveniently performed by the function L</lex_read_to>,
 872 which handles newlines appropriately.
 873
 874 Interpretation of the buffer's octets can be abstracted out by
 875 using the slightly higher-level functions L</lex_peek_unichar> and
 876 L</lex_read_unichar>.
 877
 878 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 879
 880 Points to the start of the current line inside the lexer buffer.
 881 This is useful for indicating at which column an error occurred, and
 882 not much else.  This must be updated by any lexing code that consumes
 883 a newline; the function L</lex_read_to> handles this detail.
 884
 885 =cut
 886 */
 887
 888 /*
 889 =for apidoc Amx|bool|lex_bufutf8
 890
 891 Indicates whether the octets in the lexer buffer
 892 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 893 of Unicode characters.  If not, they should be interpreted as Latin-1
 894 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 895
 896 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 897 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 898 encoding.
 899
 900 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 901 is significant, but not the whole story regarding the input character
 902 encoding.  Normally, when a file is being read, the scalar contains octets
 903 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 904 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 905 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 906 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 907 is in effect.  This logic may change in the future; use this function
 908 instead of implementing the logic yourself.
 909
 910 =cut
 911 */
 912
 913 bool
 914 Perl_lex_bufutf8(pTHX)
 915 {
 916     return UTF;
 917 }
 918
 919 /*
 920 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 921
 922 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 923 at least C<len> octets (including terminating C<NUL>).  Returns a
 924 pointer to the reallocated buffer.  This is necessary before making
 925 any direct modification of the buffer that would increase its length.
 926 L</lex_stuff_pvn> provides a more convenient way to insert text into
 927 the buffer.
 928
 929 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 930 this function updates all of the lexer's variables that point directly
 931 into the buffer.
 932
 933 =cut
 934 */
 935
 936 char *
 937 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 938 {
 939     SV *linestr;
 940     char *buf;
 941     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 942     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 943     bool current;
 944
 945     linestr = PL_parser->linestr;
 946     buf = SvPVX(linestr);
 947     if (len <= SvLEN(linestr))
 948         return buf;
 949
 950     /* Is the lex_shared linestr SV the same as the current linestr SV?
 951      * Only in this case does re_eval_start need adjusting, since it
 952      * points within lex_shared->ls_linestr's buffer */
 953     current = (   !PL_parser->lex_shared->ls_linestr
 954                || linestr == PL_parser->lex_shared->ls_linestr);
 955
 956     bufend_pos = PL_parser->bufend - buf;
 957     bufptr_pos = PL_parser->bufptr - buf;
 958     oldbufptr_pos = PL_parser->oldbufptr - buf;
 959     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 960     linestart_pos = PL_parser->linestart - buf;
 961     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 962     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 963     re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
 964                             PL_parser->lex_shared->re_eval_start - buf : 0;
 965
 966     buf = sv_grow(linestr, len);
 967
 968     PL_parser->bufend = buf + bufend_pos;
 969     PL_parser->bufptr = buf + bufptr_pos;
 970     PL_parser->oldbufptr = buf + oldbufptr_pos;
 971     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 972     PL_parser->linestart = buf + linestart_pos;
 973     if (PL_parser->last_uni)
 974         PL_parser->last_uni = buf + last_uni_pos;
 975     if (PL_parser->last_lop)
 976         PL_parser->last_lop = buf + last_lop_pos;
 977     if (current && PL_parser->lex_shared->re_eval_start)
 978         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 979     return buf;
 980 }
 981
 982 /*
 983 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 984
 985 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 986 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 987 reallocating the buffer if necessary.  This means that lexing code that
 988 runs later will see the characters as if they had appeared in the input.
 989 It is not recommended to do this as part of normal parsing, and most
 990 uses of this facility run the risk of the inserted characters being
 991 interpreted in an unintended manner.
 992
 993 The string to be inserted is represented by C<len> octets starting
 994 at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 995 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
 996 The characters are recoded for the lexer buffer, according to how the
 997 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 998 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 999 function is more convenient.
1000
1001 =cut
1002 */
1003
1004 void
1005 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1006 {
1007     dVAR;
1008     char *bufptr;
1009     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1010     if (flags & ~(LEX_STUFF_UTF8))
1011         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1012     if (UTF) {
1013         if (flags & LEX_STUFF_UTF8) {
1014             goto plain_copy;
1015         } else {
1016             STRLEN highhalf = 0;    /* Count of variants */
1017             const char *p, *e = pv+len;
1018             for (p = pv; p != e; p++) {
1019                 if (! UTF8_IS_INVARIANT(*p)) {
1020                     highhalf++;
1021                 }
1022             }
1023             if (!highhalf)
1024                 goto plain_copy;
1025             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1026             bufptr = PL_parser->bufptr;
1027             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1028             SvCUR_set(PL_parser->linestr,
1029                 SvCUR(PL_parser->linestr) + len+highhalf);
1030             PL_parser->bufend += len+highhalf;
1031             for (p = pv; p != e; p++) {
1032                 U8 c = (U8)*p;
1033                 if (! UTF8_IS_INVARIANT(c)) {
1034                     *bufptr++ = UTF8_TWO_BYTE_HI(c);
1035                     *bufptr++ = UTF8_TWO_BYTE_LO(c);
1036                 } else {
1037                     *bufptr++ = (char)c;
1038                 }
1039             }
1040         }
1041     } else {
1042         if (flags & LEX_STUFF_UTF8) {
1043             STRLEN highhalf = 0;
1044             const char *p, *e = pv+len;
1045             for (p = pv; p != e; p++) {
1046                 U8 c = (U8)*p;
1047                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1048                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1049                                 "non-Latin-1 character into Latin-1 input");
1050                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1051                     p++;
1052                     highhalf++;
1053                 } else assert(UTF8_IS_INVARIANT(c));
1054             }
1055             if (!highhalf)
1056                 goto plain_copy;
1057             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1058             bufptr = PL_parser->bufptr;
1059             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1060             SvCUR_set(PL_parser->linestr,
1061                 SvCUR(PL_parser->linestr) + len-highhalf);
1062             PL_parser->bufend += len-highhalf;
1063             p = pv;
1064             while (p < e) {
1065                 if (UTF8_IS_INVARIANT(*p)) {
1066                     *bufptr++ = *p;
1067                     p++;
1068                 }
1069                 else {
1070                     assert(p < e -1 );
1071                     *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1072                     p += 2;
1073                 }
1074             }
1075         } else {
1076           plain_copy:
1077             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1078             bufptr = PL_parser->bufptr;
1079             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1080             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1081             PL_parser->bufend += len;
1082             Copy(pv, bufptr, len, char);
1083         }
1084     }
1085 }
1086
1087 /*
1088 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1089
1090 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1091 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1092 reallocating the buffer if necessary.  This means that lexing code that
1093 runs later will see the characters as if they had appeared in the input.
1094 It is not recommended to do this as part of normal parsing, and most
1095 uses of this facility run the risk of the inserted characters being
1096 interpreted in an unintended manner.
1097
1098 The string to be inserted is represented by octets starting at C<pv>
1099 and continuing to the first nul.  These octets are interpreted as either
1100 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1101 in C<flags>.  The characters are recoded for the lexer buffer, according
1102 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1103 If it is not convenient to nul-terminate a string to be inserted, the
1104 L</lex_stuff_pvn> function is more appropriate.
1105
1106 =cut
1107 */
1108
1109 void
1110 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1111 {
1112     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1113     lex_stuff_pvn(pv, strlen(pv), flags);
1114 }
1115
1116 /*
1117 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1118
1119 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1120 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1121 reallocating the buffer if necessary.  This means that lexing code that
1122 runs later will see the characters as if they had appeared in the input.
1123 It is not recommended to do this as part of normal parsing, and most
1124 uses of this facility run the risk of the inserted characters being
1125 interpreted in an unintended manner.
1126
1127 The string to be inserted is the string value of C<sv>.  The characters
1128 are recoded for the lexer buffer, according to how the buffer is currently
1129 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1130 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1131 need to construct a scalar.
1132
1133 =cut
1134 */
1135
1136 void
1137 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1138 {
1139     char *pv;
1140     STRLEN len;
1141     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1142     if (flags)
1143         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1144     pv = SvPV(sv, len);
1145     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1146 }
1147
1148 /*
1149 =for apidoc Amx|void|lex_unstuff|char *ptr
1150
1151 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1152 C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
1153 This hides the discarded text from any lexing code that runs later,
1154 as if the text had never appeared.
1155
1156 This is not the normal way to consume lexed text.  For that, use
1157 L</lex_read_to>.
1158
1159 =cut
1160 */
1161
1162 void
1163 Perl_lex_unstuff(pTHX_ char *ptr)
1164 {
1165     char *buf, *bufend;
1166     STRLEN unstuff_len;
1167     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1168     buf = PL_parser->bufptr;
1169     if (ptr < buf)
1170         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1171     if (ptr == buf)
1172         return;
1173     bufend = PL_parser->bufend;
1174     if (ptr > bufend)
1175         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1176     unstuff_len = ptr - buf;
1177     Move(ptr, buf, bufend+1-ptr, char);
1178     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1179     PL_parser->bufend = bufend - unstuff_len;
1180 }
1181
1182 /*
1183 =for apidoc Amx|void|lex_read_to|char *ptr
1184
1185 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1186 to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1187 performing the correct bookkeeping whenever a newline character is passed.
1188 This is the normal way to consume lexed text.
1189
1190 Interpretation of the buffer's octets can be abstracted out by
1191 using the slightly higher-level functions L</lex_peek_unichar> and
1192 L</lex_read_unichar>.
1193
1194 =cut
1195 */
1196
1197 void
1198 Perl_lex_read_to(pTHX_ char *ptr)
1199 {
1200     char *s;
1201     PERL_ARGS_ASSERT_LEX_READ_TO;
1202     s = PL_parser->bufptr;
1203     if (ptr < s || ptr > PL_parser->bufend)
1204         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1205     for (; s != ptr; s++)
1206         if (*s == '\n') {
1207             COPLINE_INC_WITH_HERELINES;
1208             PL_parser->linestart = s+1;
1209         }
1210     PL_parser->bufptr = ptr;
1211 }
1212
1213 /*
1214 =for apidoc Amx|void|lex_discard_to|char *ptr
1215
1216 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1217 up to C<ptr>.  The remaining content of the buffer will be moved, and
1218 all pointers into the buffer updated appropriately.  C<ptr> must not
1219 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1220 it is not permitted to discard text that has yet to be lexed.
1221
1222 Normally it is not necessarily to do this directly, because it suffices to
1223 use the implicit discarding behaviour of L</lex_next_chunk> and things
1224 based on it.  However, if a token stretches across multiple lines,
1225 and the lexing code has kept multiple lines of text in the buffer for
1226 that purpose, then after completion of the token it would be wise to
1227 explicitly discard the now-unneeded earlier lines, to avoid future
1228 multi-line tokens growing the buffer without bound.
1229
1230 =cut
1231 */
1232
1233 void
1234 Perl_lex_discard_to(pTHX_ char *ptr)
1235 {
1236     char *buf;
1237     STRLEN discard_len;
1238     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1239     buf = SvPVX(PL_parser->linestr);
1240     if (ptr < buf)
1241         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1242     if (ptr == buf)
1243         return;
1244     if (ptr > PL_parser->bufptr)
1245         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1246     discard_len = ptr - buf;
1247     if (PL_parser->oldbufptr < ptr)
1248         PL_parser->oldbufptr = ptr;
1249     if (PL_parser->oldoldbufptr < ptr)
1250         PL_parser->oldoldbufptr = ptr;
1251     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1252         PL_parser->last_uni = NULL;
1253     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1254         PL_parser->last_lop = NULL;
1255     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1256     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1257     PL_parser->bufend -= discard_len;
1258     PL_parser->bufptr -= discard_len;
1259     PL_parser->oldbufptr -= discard_len;
1260     PL_parser->oldoldbufptr -= discard_len;
1261     if (PL_parser->last_uni)
1262         PL_parser->last_uni -= discard_len;
1263     if (PL_parser->last_lop)
1264         PL_parser->last_lop -= discard_len;
1265 }
1266
1267 void
1268 Perl_notify_parser_that_changed_to_utf8(pTHX)
1269 {
1270     /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1271      * off to on.  At compile time, this has the effect of entering a 'use
1272      * utf8' section.  This means that any input was not previously checked for
1273      * UTF-8 (because it was off), but now we do need to check it, or our
1274      * assumptions about the input being sane could be wrong, and we could
1275      * segfault.  This routine just sets a flag so that the next time we look
1276      * at the input we do the well-formed UTF-8 check.  If we aren't in the
1277      * proper phase, there may not be a parser object, but if there is, setting
1278      * the flag is harmless */
1279
1280     if (PL_parser) {
1281         PL_parser->recheck_utf8_validity = TRUE;
1282     }
1283 }
1284
1285 /*
1286 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1287
1288 Reads in the next chunk of text to be lexed, appending it to
1289 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1290 looked to the end of the current chunk and wants to know more.  It is
1291 usual, but not necessary, for lexing to have consumed the entirety of
1292 the current chunk at this time.
1293
1294 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1295 chunk (i.e., the current chunk has been entirely consumed), normally the
1296 current chunk will be discarded at the same time that the new chunk is
1297 read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1298 will not be discarded.  If the current chunk has not been entirely
1299 consumed, then it will not be discarded regardless of the flag.
1300
1301 Returns true if some new text was added to the buffer, or false if the
1302 buffer has reached the end of the input text.
1303
1304 =cut
1305 */
1306
1307 #define LEX_FAKE_EOF 0x80000000
1308 #define LEX_NO_TERM  0x40000000 /* here-doc */
1309
1310 bool
1311 Perl_lex_next_chunk(pTHX_ U32 flags)
1312 {
1313     SV *linestr;
1314     char *buf;
1315     STRLEN old_bufend_pos, new_bufend_pos;
1316     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1317     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1318     bool got_some_for_debugger = 0;
1319     bool got_some;
1320
1321     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1322         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1323     if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1324         return FALSE;
1325     linestr = PL_parser->linestr;
1326     buf = SvPVX(linestr);
1327     if (!(flags & LEX_KEEP_PREVIOUS)
1328           && PL_parser->bufptr == PL_parser->bufend)
1329     {
1330         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1331         linestart_pos = 0;
1332         if (PL_parser->last_uni != PL_parser->bufend)
1333             PL_parser->last_uni = NULL;
1334         if (PL_parser->last_lop != PL_parser->bufend)
1335             PL_parser->last_lop = NULL;
1336         last_uni_pos = last_lop_pos = 0;
1337         *buf = 0;
1338         SvCUR(linestr) = 0;
1339     } else {
1340         old_bufend_pos = PL_parser->bufend - buf;
1341         bufptr_pos = PL_parser->bufptr - buf;
1342         oldbufptr_pos = PL_parser->oldbufptr - buf;
1343         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1344         linestart_pos = PL_parser->linestart - buf;
1345         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1346         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1347     }
1348     if (flags & LEX_FAKE_EOF) {
1349         goto eof;
1350     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1351         got_some = 0;
1352     } else if (filter_gets(linestr, old_bufend_pos)) {
1353         got_some = 1;
1354         got_some_for_debugger = 1;
1355     } else if (flags & LEX_NO_TERM) {
1356         got_some = 0;
1357     } else {
1358         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1359             SvPVCLEAR(linestr);
1360         eof:
1361         /* End of real input.  Close filehandle (unless it was STDIN),
1362          * then add implicit termination.
1363          */
1364         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1365             PerlIO_clearerr(PL_parser->rsfp);
1366         else if (PL_parser->rsfp)
1367             (void)PerlIO_close(PL_parser->rsfp);
1368         PL_parser->rsfp = NULL;
1369         PL_parser->in_pod = PL_parser->filtered = 0;
1370         if (!PL_in_eval && PL_minus_p) {
1371             sv_catpvs(linestr,
1372                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1373             PL_minus_n = PL_minus_p = 0;
1374         } else if (!PL_in_eval && PL_minus_n) {
1375             sv_catpvs(linestr, /*{*/";}");
1376             PL_minus_n = 0;
1377         } else
1378             sv_catpvs(linestr, ";");
1379         got_some = 1;
1380     }
1381     buf = SvPVX(linestr);
1382     new_bufend_pos = SvCUR(linestr);
1383     PL_parser->bufend = buf + new_bufend_pos;
1384     PL_parser->bufptr = buf + bufptr_pos;
1385
1386     if (UTF) {
1387         const U8* first_bad_char_loc;
1388         if (UNLIKELY(! is_utf8_string_loc(
1389                             (U8 *) PL_parser->bufptr,
1390                                    PL_parser->bufend - PL_parser->bufptr,
1391                                    &first_bad_char_loc)))
1392         {
1393             _force_out_malformed_utf8_message(first_bad_char_loc,
1394                                               (U8 *) PL_parser->bufend,
1395                                               0,
1396                                               1 /* 1 means die */ );
1397             NOT_REACHED; /* NOTREACHED */
1398         }
1399     }
1400
1401     PL_parser->oldbufptr = buf + oldbufptr_pos;
1402     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1403     PL_parser->linestart = buf + linestart_pos;
1404     if (PL_parser->last_uni)
1405         PL_parser->last_uni = buf + last_uni_pos;
1406     if (PL_parser->last_lop)
1407         PL_parser->last_lop = buf + last_lop_pos;
1408     if (PL_parser->preambling != NOLINE) {
1409         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1410         PL_parser->preambling = NOLINE;
1411     }
1412     if (   got_some_for_debugger
1413         && PERLDB_LINE_OR_SAVESRC
1414         && PL_curstash != PL_debstash)
1415     {
1416         /* debugger active and we're not compiling the debugger code,
1417          * so store the line into the debugger's array of lines
1418          */
1419         update_debugger_info(NULL, buf+old_bufend_pos,
1420             new_bufend_pos-old_bufend_pos);
1421     }
1422     return got_some;
1423 }
1424
1425 /*
1426 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1427
1428 Looks ahead one (Unicode) character in the text currently being lexed.
1429 Returns the codepoint (unsigned integer value) of the next character,
1430 or -1 if lexing has reached the end of the input text.  To consume the
1431 peeked character, use L</lex_read_unichar>.
1432
1433 If the next character is in (or extends into) the next chunk of input
1434 text, the next chunk will be read in.  Normally the current chunk will be
1435 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1436 bit set, then the current chunk will not be discarded.
1437
1438 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1439 is encountered, an exception is generated.
1440
1441 =cut
1442 */
1443
1444 I32
1445 Perl_lex_peek_unichar(pTHX_ U32 flags)
1446 {
1447     dVAR;
1448     char *s, *bufend;
1449     if (flags & ~(LEX_KEEP_PREVIOUS))
1450         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1451     s = PL_parser->bufptr;
1452     bufend = PL_parser->bufend;
1453     if (UTF) {
1454         U8 head;
1455         I32 unichar;
1456         STRLEN len, retlen;
1457         if (s == bufend) {
1458             if (!lex_next_chunk(flags))
1459                 return -1;
1460             s = PL_parser->bufptr;
1461             bufend = PL_parser->bufend;
1462         }
1463         head = (U8)*s;
1464         if (UTF8_IS_INVARIANT(head))
1465             return head;
1466         if (UTF8_IS_START(head)) {
1467             len = UTF8SKIP(&head);
1468             while ((STRLEN)(bufend-s) < len) {
1469                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1470                     break;
1471                 s = PL_parser->bufptr;
1472                 bufend = PL_parser->bufend;
1473             }
1474         }
1475         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1476         if (retlen == (STRLEN)-1) {
1477             _force_out_malformed_utf8_message((U8 *) s,
1478                                               (U8 *) bufend,
1479                                               0,
1480                                               1 /* 1 means die */ );
1481             NOT_REACHED; /* NOTREACHED */
1482         }
1483         return unichar;
1484     } else {
1485         if (s == bufend) {
1486             if (!lex_next_chunk(flags))
1487                 return -1;
1488             s = PL_parser->bufptr;
1489         }
1490         return (U8)*s;
1491     }
1492 }
1493
1494 /*
1495 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1496
1497 Reads the next (Unicode) character in the text currently being lexed.
1498 Returns the codepoint (unsigned integer value) of the character read,
1499 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1500 if lexing has reached the end of the input text.  To non-destructively
1501 examine the next character, use L</lex_peek_unichar> instead.
1502
1503 If the next character is in (or extends into) the next chunk of input
1504 text, the next chunk will be read in.  Normally the current chunk will be
1505 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1506 bit set, then the current chunk will not be discarded.
1507
1508 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1509 is encountered, an exception is generated.
1510
1511 =cut
1512 */
1513
1514 I32
1515 Perl_lex_read_unichar(pTHX_ U32 flags)
1516 {
1517     I32 c;
1518     if (flags & ~(LEX_KEEP_PREVIOUS))
1519         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1520     c = lex_peek_unichar(flags);
1521     if (c != -1) {
1522         if (c == '\n')
1523             COPLINE_INC_WITH_HERELINES;
1524         if (UTF)
1525             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1526         else
1527             ++(PL_parser->bufptr);
1528     }
1529     return c;
1530 }
1531
1532 /*
1533 =for apidoc Amx|void|lex_read_space|U32 flags
1534
1535 Reads optional spaces, in Perl style, in the text currently being
1536 lexed.  The spaces may include ordinary whitespace characters and
1537 Perl-style comments.  C<#line> directives are processed if encountered.
1538 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1539 at a non-space character (or the end of the input text).
1540
1541 If spaces extend into the next chunk of input text, the next chunk will
1542 be read in.  Normally the current chunk will be discarded at the same
1543 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1544 chunk will not be discarded.
1545
1546 =cut
1547 */
1548
1549 #define LEX_NO_INCLINE    0x40000000
1550 #define LEX_NO_NEXT_CHUNK 0x80000000
1551
1552 void
1553 Perl_lex_read_space(pTHX_ U32 flags)
1554 {
1555     char *s, *bufend;
1556     const bool can_incline = !(flags & LEX_NO_INCLINE);
1557     bool need_incline = 0;
1558     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1559         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1560     s = PL_parser->bufptr;
1561     bufend = PL_parser->bufend;
1562     while (1) {
1563         char c = *s;
1564         if (c == '#') {
1565             do {
1566                 c = *++s;
1567             } while (!(c == '\n' || (c == 0 && s == bufend)));
1568         } else if (c == '\n') {
1569             s++;
1570             if (can_incline) {
1571                 PL_parser->linestart = s;
1572                 if (s == bufend)
1573                     need_incline = 1;
1574                 else
1575                     incline(s);
1576             }
1577         } else if (isSPACE(c)) {
1578             s++;
1579         } else if (c == 0 && s == bufend) {
1580             bool got_more;
1581             line_t l;
1582             if (flags & LEX_NO_NEXT_CHUNK)
1583                 break;
1584             PL_parser->bufptr = s;
1585             l = CopLINE(PL_curcop);
1586             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1587             got_more = lex_next_chunk(flags);
1588             CopLINE_set(PL_curcop, l);
1589             s = PL_parser->bufptr;
1590             bufend = PL_parser->bufend;
1591             if (!got_more)
1592                 break;
1593             if (can_incline && need_incline && PL_parser->rsfp) {
1594                 incline(s);
1595                 need_incline = 0;
1596             }
1597         } else if (!c) {
1598             s++;
1599         } else {
1600             break;
1601         }
1602     }
1603     PL_parser->bufptr = s;
1604 }
1605
1606 /*
1607
1608 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1609
1610 This function performs syntax checking on a prototype, C<proto>.
1611 If C<warn> is true, any illegal characters or mismatched brackets
1612 will trigger illegalproto warnings, declaring that they were
1613 detected in the prototype for C<name>.
1614
1615 The return value is C<true> if this is a valid prototype, and
1616 C<false> if it is not, regardless of whether C<warn> was C<true> or
1617 C<false>.
1618
1619 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1620
1621 =cut
1622
1623  */
1624
1625 bool
1626 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1627 {
1628     STRLEN len, origlen;
1629     char *p;
1630     bool bad_proto = FALSE;
1631     bool in_brackets = FALSE;
1632     bool after_slash = FALSE;
1633     char greedy_proto = ' ';
1634     bool proto_after_greedy_proto = FALSE;
1635     bool must_be_last = FALSE;
1636     bool underscore = FALSE;
1637     bool bad_proto_after_underscore = FALSE;
1638
1639     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1640
1641     if (!proto)
1642         return TRUE;
1643
1644     p = SvPV(proto, len);
1645     origlen = len;
1646     for (; len--; p++) {
1647         if (!isSPACE(*p)) {
1648             if (must_be_last)
1649                 proto_after_greedy_proto = TRUE;
1650             if (underscore) {
1651                 if (!strchr(";@%", *p))
1652                     bad_proto_after_underscore = TRUE;
1653                 underscore = FALSE;
1654             }
1655             if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1656                 bad_proto = TRUE;
1657             }
1658             else {
1659                 if (*p == '[')
1660                     in_brackets = TRUE;
1661                 else if (*p == ']')
1662                     in_brackets = FALSE;
1663                 else if ((*p == '@' || *p == '%')
1664                          && !after_slash
1665                          && !in_brackets )
1666                 {
1667                     must_be_last = TRUE;
1668                     greedy_proto = *p;
1669                 }
1670                 else if (*p == '_')
1671                     underscore = TRUE;
1672             }
1673             if (*p == '\\')
1674                 after_slash = TRUE;
1675             else
1676                 after_slash = FALSE;
1677         }
1678     }
1679
1680     if (warn) {
1681         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1682         p -= origlen;
1683         p = SvUTF8(proto)
1684             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1685                              origlen, UNI_DISPLAY_ISPRINT)
1686             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1687
1688         if (proto_after_greedy_proto)
1689             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1690                         "Prototype after '%c' for %" SVf " : %s",
1691                         greedy_proto, SVfARG(name), p);
1692         if (in_brackets)
1693             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1694                         "Missing ']' in prototype for %" SVf " : %s",
1695                         SVfARG(name), p);
1696         if (bad_proto)
1697             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1698                         "Illegal character in prototype for %" SVf " : %s",
1699                         SVfARG(name), p);
1700         if (bad_proto_after_underscore)
1701             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1702                         "Illegal character after '_' in prototype for %" SVf " : %s",
1703                         SVfARG(name), p);
1704     }
1705
1706     return (! (proto_after_greedy_proto || bad_proto) );
1707 }
1708
1709 /*
1710  * S_incline
1711  * This subroutine has nothing to do with tilting, whether at windmills
1712  * or pinball tables.  Its name is short for "increment line".  It
1713  * increments the current line number in CopLINE(PL_curcop) and checks
1714  * to see whether the line starts with a comment of the form
1715  *    # line 500 "foo.pm"
1716  * If so, it sets the current line number and file to the values in the comment.
1717  */
1718
1719 STATIC void
1720 S_incline(pTHX_ const char *s)
1721 {
1722     const char *t;
1723     const char *n;
1724     const char *e;
1725     line_t line_num;
1726     UV uv;
1727
1728     PERL_ARGS_ASSERT_INCLINE;
1729
1730     COPLINE_INC_WITH_HERELINES;
1731     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1732      && s+1 == PL_bufend && *s == ';') {
1733         /* fake newline in string eval */
1734         CopLINE_dec(PL_curcop);
1735         return;
1736     }
1737     if (*s++ != '#')
1738         return;
1739     while (SPACE_OR_TAB(*s))
1740         s++;
1741     if (strEQs(s, "line"))
1742         s += 4;
1743     else
1744         return;
1745     if (SPACE_OR_TAB(*s))
1746         s++;
1747     else
1748         return;
1749     while (SPACE_OR_TAB(*s))
1750         s++;
1751     if (!isDIGIT(*s))
1752         return;
1753
1754     n = s;
1755     while (isDIGIT(*s))
1756         s++;
1757     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1758         return;
1759     while (SPACE_OR_TAB(*s))
1760         s++;
1761     if (*s == '"' && (t = strchr(s+1, '"'))) {
1762         s++;
1763         e = t + 1;
1764     }
1765     else {
1766         t = s;
1767         while (*t && !isSPACE(*t))
1768             t++;
1769         e = t;
1770     }
1771     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1772         e++;
1773     if (*e != '\n' && *e != '\0')
1774         return;         /* false alarm */
1775
1776     if (!grok_atoUV(n, &uv, &e))
1777         return;
1778     line_num = ((line_t)uv) - 1;
1779
1780     if (t - s > 0) {
1781         const STRLEN len = t - s;
1782
1783         if (!PL_rsfp && !PL_parser->filtered) {
1784             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1785              * to *{"::_<newfilename"} */
1786             /* However, the long form of evals is only turned on by the
1787                debugger - usually they're "(eval %lu)" */
1788             GV * const cfgv = CopFILEGV(PL_curcop);
1789             if (cfgv) {
1790                 char smallbuf[128];
1791                 STRLEN tmplen2 = len;
1792                 char *tmpbuf2;
1793                 GV *gv2;
1794
1795                 if (tmplen2 + 2 <= sizeof smallbuf)
1796                     tmpbuf2 = smallbuf;
1797                 else
1798                     Newx(tmpbuf2, tmplen2 + 2, char);
1799
1800                 tmpbuf2[0] = '_';
1801                 tmpbuf2[1] = '<';
1802
1803                 memcpy(tmpbuf2 + 2, s, tmplen2);
1804                 tmplen2 += 2;
1805
1806                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1807                 if (!isGV(gv2)) {
1808                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1809                     /* adjust ${"::_<newfilename"} to store the new file name */
1810                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1811                     /* The line number may differ. If that is the case,
1812                        alias the saved lines that are in the array.
1813                        Otherwise alias the whole array. */
1814                     if (CopLINE(PL_curcop) == line_num) {
1815                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1816                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1817                     }
1818                     else if (GvAV(cfgv)) {
1819                         AV * const av = GvAV(cfgv);
1820                         const I32 start = CopLINE(PL_curcop)+1;
1821                         I32 items = AvFILLp(av) - start;
1822                         if (items > 0) {
1823                             AV * const av2 = GvAVn(gv2);
1824                             SV **svp = AvARRAY(av) + start;
1825                             I32 l = (I32)line_num+1;
1826                             while (items--)
1827                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1828                         }
1829                     }
1830                 }
1831
1832                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1833             }
1834         }
1835         CopFILE_free(PL_curcop);
1836         CopFILE_setn(PL_curcop, s, len);
1837     }
1838     CopLINE_set(PL_curcop, line_num);
1839 }
1840
1841 STATIC void
1842 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1843 {
1844     AV *av = CopFILEAVx(PL_curcop);
1845     if (av) {
1846         SV * sv;
1847         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1848         else {
1849             sv = *av_fetch(av, 0, 1);
1850             SvUPGRADE(sv, SVt_PVMG);
1851         }
1852         if (!SvPOK(sv)) SvPVCLEAR(sv);
1853         if (orig_sv)
1854             sv_catsv(sv, orig_sv);
1855         else
1856             sv_catpvn(sv, buf, len);
1857         if (!SvIOK(sv)) {
1858             (void)SvIOK_on(sv);
1859             SvIV_set(sv, 0);
1860         }
1861         if (PL_parser->preambling == NOLINE)
1862             av_store(av, CopLINE(PL_curcop), sv);
1863     }
1864 }
1865
1866 /*
1867  * skipspace
1868  * Called to gobble the appropriate amount and type of whitespace.
1869  * Skips comments as well.
1870  * Returns the next character after the whitespace that is skipped.
1871  *
1872  * peekspace
1873  * Same thing, but look ahead without incrementing line numbers or
1874  * adjusting PL_linestart.
1875  */
1876
1877 #define skipspace(s) skipspace_flags(s, 0)
1878 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1879
1880 STATIC char *
1881 S_skipspace_flags(pTHX_ char *s, U32 flags)
1882 {
1883     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1884     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1885         while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1886             s++;
1887     } else {
1888         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1889         PL_bufptr = s;
1890         lex_read_space(flags | LEX_KEEP_PREVIOUS |
1891                 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1892                     LEX_NO_NEXT_CHUNK : 0));
1893         s = PL_bufptr;
1894         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1895         if (PL_linestart > PL_bufptr)
1896             PL_bufptr = PL_linestart;
1897         return s;
1898     }
1899     return s;
1900 }
1901
1902 /*
1903  * S_check_uni
1904  * Check the unary operators to ensure there's no ambiguity in how they're
1905  * used.  An ambiguous piece of code would be:
1906  *     rand + 5
1907  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1908  * the +5 is its argument.
1909  */
1910
1911 STATIC void
1912 S_check_uni(pTHX)
1913 {
1914     const char *s;
1915     const char *t;
1916
1917     if (PL_oldoldbufptr != PL_last_uni)
1918         return;
1919     while (isSPACE(*PL_last_uni))
1920         PL_last_uni++;
1921     s = PL_last_uni;
1922     while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
1923         s += UTF ? UTF8SKIP(s) : 1;
1924     if ((t = strchr(s, '(')) && t < PL_bufptr)
1925         return;
1926
1927     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1928                      "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
1929                      UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1930 }
1931
1932 /*
1933  * LOP : macro to build a list operator.  Its behaviour has been replaced
1934  * with a subroutine, S_lop() for which LOP is just another name.
1935  */
1936
1937 #define LOP(f,x) return lop(f,x,s)
1938
1939 /*
1940  * S_lop
1941  * Build a list operator (or something that might be one).  The rules:
1942  *  - if we have a next token, then it's a list operator (no parens) for
1943  *    which the next token has already been parsed; e.g.,
1944  *       sort foo @args
1945  *       sort foo (@args)
1946  *  - if the next thing is an opening paren, then it's a function
1947  *  - else it's a list operator
1948  */
1949
1950 STATIC I32
1951 S_lop(pTHX_ I32 f, U8 x, char *s)
1952 {
1953     PERL_ARGS_ASSERT_LOP;
1954
1955     pl_yylval.ival = f;
1956     CLINE;
1957     PL_bufptr = s;
1958     PL_last_lop = PL_oldbufptr;
1959     PL_last_lop_op = (OPCODE)f;
1960     if (PL_nexttoke)
1961         goto lstop;
1962     PL_expect = x;
1963     if (*s == '(')
1964         return REPORT(FUNC);
1965     s = skipspace(s);
1966     if (*s == '(')
1967         return REPORT(FUNC);
1968     else {
1969         lstop:
1970         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1971             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1972         return REPORT(LSTOP);
1973     }
1974 }
1975
1976 /*
1977  * S_force_next
1978  * When the lexer realizes it knows the next token (for instance,
1979  * it is reordering tokens for the parser) then it can call S_force_next
1980  * to know what token to return the next time the lexer is called.  Caller
1981  * will need to set PL_nextval[] and possibly PL_expect to ensure
1982  * the lexer handles the token correctly.
1983  */
1984
1985 STATIC void
1986 S_force_next(pTHX_ I32 type)
1987 {
1988 #ifdef DEBUGGING
1989     if (DEBUG_T_TEST) {
1990         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1991         tokereport(type, &NEXTVAL_NEXTTOKE);
1992     }
1993 #endif
1994     assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
1995     PL_nexttype[PL_nexttoke] = type;
1996     PL_nexttoke++;
1997 }
1998
1999 /*
2000  * S_postderef
2001  *
2002  * This subroutine handles postfix deref syntax after the arrow has already
2003  * been emitted.  @* $* etc. are emitted as two separate token right here.
2004  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2005  * only the first, leaving yylex to find the next.
2006  */
2007
2008 static int
2009 S_postderef(pTHX_ int const funny, char const next)
2010 {
2011     assert(funny == DOLSHARP || strchr("$@%&*", funny));
2012     if (next == '*') {
2013         PL_expect = XOPERATOR;
2014         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2015             assert('@' == funny || '$' == funny || DOLSHARP == funny);
2016             PL_lex_state = LEX_INTERPEND;
2017             if ('@' == funny)
2018                 force_next(POSTJOIN);
2019         }
2020         force_next(next);
2021         PL_bufptr+=2;
2022     }
2023     else {
2024         if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2025          && !PL_lex_brackets)
2026             PL_lex_dojoin = 2;
2027         PL_expect = XOPERATOR;
2028         PL_bufptr++;
2029     }
2030     return funny;
2031 }
2032
2033 void
2034 Perl_yyunlex(pTHX)
2035 {
2036     int yyc = PL_parser->yychar;
2037     if (yyc != YYEMPTY) {
2038         if (yyc) {
2039             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2040             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2041                 PL_lex_allbrackets--;
2042                 PL_lex_brackets--;
2043                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2044             } else if (yyc == '('/*)*/) {
2045                 PL_lex_allbrackets--;
2046                 yyc |= (2<<24);
2047             }
2048             force_next(yyc);
2049         }
2050         PL_parser->yychar = YYEMPTY;
2051     }
2052 }
2053
2054 STATIC SV *
2055 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2056 {
2057     SV * const sv = newSVpvn_utf8(start, len,
2058                           !IN_BYTES
2059                           && UTF
2060                           && !is_utf8_invariant_string((const U8*)start, len)
2061                           && is_utf8_string((const U8*)start, len));
2062     return sv;
2063 }
2064
2065 /*
2066  * S_force_word
2067  * When the lexer knows the next thing is a word (for instance, it has
2068  * just seen -> and it knows that the next char is a word char, then
2069  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2070  * lookahead.
2071  *
2072  * Arguments:
2073  *   char *start : buffer position (must be within PL_linestr)
2074  *   int token   : PL_next* will be this type of bare word
2075  *                 (e.g., METHOD,BAREWORD)
2076  *   int check_keyword : if true, Perl checks to make sure the word isn't
2077  *       a keyword (do this if the word is a label, e.g. goto FOO)
2078  *   int allow_pack : if true, : characters will also be allowed (require,
2079  *       use, etc. do this)
2080  */
2081
2082 STATIC char *
2083 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2084 {
2085     char *s;
2086     STRLEN len;
2087
2088     PERL_ARGS_ASSERT_FORCE_WORD;
2089
2090     start = skipspace(start);
2091     s = start;
2092     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2093         || (allow_pack && *s == ':' && s[1] == ':') )
2094     {
2095         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2096         if (check_keyword) {
2097           char *s2 = PL_tokenbuf;
2098           STRLEN len2 = len;
2099           if (allow_pack && len > 6 && strEQs(s2, "CORE::"))
2100             s2 += 6, len2 -= 6;
2101           if (keyword(s2, len2, 0))
2102             return start;
2103         }
2104         if (token == METHOD) {
2105             s = skipspace(s);
2106             if (*s == '(')
2107                 PL_expect = XTERM;
2108             else {
2109                 PL_expect = XOPERATOR;
2110             }
2111         }
2112         NEXTVAL_NEXTTOKE.opval
2113             = newSVOP(OP_CONST,0,
2114                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2115         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2116         force_next(token);
2117     }
2118     return s;
2119 }
2120
2121 /*
2122  * S_force_ident
2123  * Called when the lexer wants $foo *foo &foo etc, but the program
2124  * text only contains the "foo" portion.  The first argument is a pointer
2125  * to the "foo", and the second argument is the type symbol to prefix.
2126  * Forces the next token to be a "BAREWORD".
2127  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2128  */
2129
2130 STATIC void
2131 S_force_ident(pTHX_ const char *s, int kind)
2132 {
2133     PERL_ARGS_ASSERT_FORCE_IDENT;
2134
2135     if (s[0]) {
2136         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2137         OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2138                                                                 UTF ? SVf_UTF8 : 0));
2139         NEXTVAL_NEXTTOKE.opval = o;
2140         force_next(BAREWORD);
2141         if (kind) {
2142             o->op_private = OPpCONST_ENTERED;
2143             /* XXX see note in pp_entereval() for why we forgo typo
2144                warnings if the symbol must be introduced in an eval.
2145                GSAR 96-10-12 */
2146             gv_fetchpvn_flags(s, len,
2147                               (PL_in_eval ? GV_ADDMULTI
2148                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2149                               kind == '$' ? SVt_PV :
2150                               kind == '@' ? SVt_PVAV :
2151                               kind == '%' ? SVt_PVHV :
2152                               SVt_PVGV
2153                               );
2154         }
2155     }
2156 }
2157
2158 static void
2159 S_force_ident_maybe_lex(pTHX_ char pit)
2160 {
2161     NEXTVAL_NEXTTOKE.ival = pit;
2162     force_next('p');
2163 }
2164
2165 NV
2166 Perl_str_to_version(pTHX_ SV *sv)
2167 {
2168     NV retval = 0.0;
2169     NV nshift = 1.0;
2170     STRLEN len;
2171     const char *start = SvPV_const(sv,len);
2172     const char * const end = start + len;
2173     const bool utf = cBOOL(SvUTF8(sv));
2174
2175     PERL_ARGS_ASSERT_STR_TO_VERSION;
2176
2177     while (start < end) {
2178         STRLEN skip;
2179         UV n;
2180         if (utf)
2181             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2182         else {
2183             n = *(U8*)start;
2184             skip = 1;
2185         }
2186         retval += ((NV)n)/nshift;
2187         start += skip;
2188         nshift *= 1000;
2189     }
2190     return retval;
2191 }
2192
2193 /*
2194  * S_force_version
2195  * Forces the next token to be a version number.
2196  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2197  * and if "guessing" is TRUE, then no new token is created (and the caller
2198  * must use an alternative parsing method).
2199  */
2200
2201 STATIC char *
2202 S_force_version(pTHX_ char *s, int guessing)
2203 {
2204     OP *version = NULL;
2205     char *d;
2206
2207     PERL_ARGS_ASSERT_FORCE_VERSION;
2208
2209     s = skipspace(s);
2210
2211     d = s;
2212     if (*d == 'v')
2213         d++;
2214     if (isDIGIT(*d)) {
2215         while (isDIGIT(*d) || *d == '_' || *d == '.')
2216             d++;
2217         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2218             SV *ver;
2219             s = scan_num(s, &pl_yylval);
2220             version = pl_yylval.opval;
2221             ver = cSVOPx(version)->op_sv;
2222             if (SvPOK(ver) && !SvNIOK(ver)) {
2223                 SvUPGRADE(ver, SVt_PVNV);
2224                 SvNV_set(ver, str_to_version(ver));
2225                 SvNOK_on(ver);          /* hint that it is a version */
2226             }
2227         }
2228         else if (guessing) {
2229             return s;
2230         }
2231     }
2232
2233     /* NOTE: The parser sees the package name and the VERSION swapped */
2234     NEXTVAL_NEXTTOKE.opval = version;
2235     force_next(BAREWORD);
2236
2237     return s;
2238 }
2239
2240 /*
2241  * S_force_strict_version
2242  * Forces the next token to be a version number using strict syntax rules.
2243  */
2244
2245 STATIC char *
2246 S_force_strict_version(pTHX_ char *s)
2247 {
2248     OP *version = NULL;
2249     const char *errstr = NULL;
2250
2251     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2252
2253     while (isSPACE(*s)) /* leading whitespace */
2254         s++;
2255
2256     if (is_STRICT_VERSION(s,&errstr)) {
2257         SV *ver = newSV(0);
2258         s = (char *)scan_version(s, ver, 0);
2259         version = newSVOP(OP_CONST, 0, ver);
2260     }
2261     else if ((*s != ';' && *s != '{' && *s != '}' )
2262              && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2263     {
2264         PL_bufptr = s;
2265         if (errstr)
2266             yyerror(errstr); /* version required */
2267         return s;
2268     }
2269
2270     /* NOTE: The parser sees the package name and the VERSION swapped */
2271     NEXTVAL_NEXTTOKE.opval = version;
2272     force_next(BAREWORD);
2273
2274     return s;
2275 }
2276
2277 /*
2278  * S_tokeq
2279  * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2280  * modified as necessary.  However, if HINT_NEW_STRING is on, 'sv' is
2281  * unchanged, and a new SV containing the modified input is returned.
2282  */
2283
2284 STATIC SV *
2285 S_tokeq(pTHX_ SV *sv)
2286 {
2287     char *s;
2288     char *send;
2289     char *d;
2290     SV *pv = sv;
2291
2292     PERL_ARGS_ASSERT_TOKEQ;
2293
2294     assert (SvPOK(sv));
2295     assert (SvLEN(sv));
2296     assert (!SvIsCOW(sv));
2297     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2298         goto finish;
2299     s = SvPVX(sv);
2300     send = SvEND(sv);
2301     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2302     while (s < send && !(*s == '\\' && s[1] == '\\'))
2303         s++;
2304     if (s == send)
2305         goto finish;
2306     d = s;
2307     if ( PL_hints & HINT_NEW_STRING ) {
2308         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2309                             SVs_TEMP | SvUTF8(sv));
2310     }
2311     while (s < send) {
2312         if (*s == '\\') {
2313             if (s + 1 < send && (s[1] == '\\'))
2314                 s++;            /* all that, just for this */
2315         }
2316         *d++ = *s++;
2317     }
2318     *d = '\0';
2319     SvCUR_set(sv, d - SvPVX_const(sv));
2320   finish:
2321     if ( PL_hints & HINT_NEW_STRING )
2322        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2323     return sv;
2324 }
2325
2326 /*
2327  * Now come three functions related to double-quote context,
2328  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2329  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2330  * interact with PL_lex_state, and create fake ( ... ) argument lists
2331  * to handle functions and concatenation.
2332  * For example,
2333  *   "foo\lbar"
2334  * is tokenised as
2335  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2336  */
2337
2338 /*
2339  * S_sublex_start
2340  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2341  *
2342  * Pattern matching will set PL_lex_op to the pattern-matching op to
2343  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2344  *
2345  * OP_CONST is easy--just make the new op and return.
2346  *
2347  * Everything else becomes a FUNC.
2348  *
2349  * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2350  * had an OP_CONST.  This just sets us up for a
2351  * call to S_sublex_push().
2352  */
2353
2354 STATIC I32
2355 S_sublex_start(pTHX)
2356 {
2357     const I32 op_type = pl_yylval.ival;
2358
2359     if (op_type == OP_NULL) {
2360         pl_yylval.opval = PL_lex_op;
2361         PL_lex_op = NULL;
2362         return THING;
2363     }
2364     if (op_type == OP_CONST) {
2365         SV *sv = PL_lex_stuff;
2366         PL_lex_stuff = NULL;
2367         sv = tokeq(sv);
2368
2369         if (SvTYPE(sv) == SVt_PVIV) {
2370             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2371             STRLEN len;
2372             const char * const p = SvPV_const(sv, len);
2373             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2374             SvREFCNT_dec(sv);
2375             sv = nsv;
2376         }
2377         pl_yylval.opval = newSVOP(op_type, 0, sv);
2378         return THING;
2379     }
2380
2381     PL_parser->lex_super_state = PL_lex_state;
2382     PL_parser->lex_sub_inwhat = (U16)op_type;
2383     PL_parser->lex_sub_op = PL_lex_op;
2384     PL_lex_state = LEX_INTERPPUSH;
2385
2386     PL_expect = XTERM;
2387     if (PL_lex_op) {
2388         pl_yylval.opval = PL_lex_op;
2389         PL_lex_op = NULL;
2390         return PMFUNC;
2391     }
2392     else
2393         return FUNC;
2394 }
2395
2396 /*
2397  * S_sublex_push
2398  * Create a new scope to save the lexing state.  The scope will be
2399  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2400  * to the uc, lc, etc. found before.
2401  * Sets PL_lex_state to LEX_INTERPCONCAT.
2402  */
2403
2404 STATIC I32
2405 S_sublex_push(pTHX)
2406 {
2407     LEXSHARED *shared;
2408     const bool is_heredoc = PL_multi_close == '<';
2409     ENTER;
2410
2411     PL_lex_state = PL_parser->lex_super_state;
2412     SAVEI8(PL_lex_dojoin);
2413     SAVEI32(PL_lex_brackets);
2414     SAVEI32(PL_lex_allbrackets);
2415     SAVEI32(PL_lex_formbrack);
2416     SAVEI8(PL_lex_fakeeof);
2417     SAVEI32(PL_lex_casemods);
2418     SAVEI32(PL_lex_starts);
2419     SAVEI8(PL_lex_state);
2420     SAVESPTR(PL_lex_repl);
2421     SAVEVPTR(PL_lex_inpat);
2422     SAVEI16(PL_lex_inwhat);
2423     if (is_heredoc)
2424     {
2425         SAVECOPLINE(PL_curcop);
2426         SAVEI32(PL_multi_end);
2427         SAVEI32(PL_parser->herelines);
2428         PL_parser->herelines = 0;
2429     }
2430     SAVEIV(PL_multi_close);
2431     SAVEPPTR(PL_bufptr);
2432     SAVEPPTR(PL_bufend);
2433     SAVEPPTR(PL_oldbufptr);
2434     SAVEPPTR(PL_oldoldbufptr);
2435     SAVEPPTR(PL_last_lop);
2436     SAVEPPTR(PL_last_uni);
2437     SAVEPPTR(PL_linestart);
2438     SAVESPTR(PL_linestr);
2439     SAVEGENERICPV(PL_lex_brackstack);
2440     SAVEGENERICPV(PL_lex_casestack);
2441     SAVEGENERICPV(PL_parser->lex_shared);
2442     SAVEBOOL(PL_parser->lex_re_reparsing);
2443     SAVEI32(PL_copline);
2444
2445     /* The here-doc parser needs to be able to peek into outer lexing
2446        scopes to find the body of the here-doc.  So we put PL_linestr and
2447        PL_bufptr into lex_shared, to ‘share’ those values.
2448      */
2449     PL_parser->lex_shared->ls_linestr = PL_linestr;
2450     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2451
2452     PL_linestr = PL_lex_stuff;
2453     PL_lex_repl = PL_parser->lex_sub_repl;
2454     PL_lex_stuff = NULL;
2455     PL_parser->lex_sub_repl = NULL;
2456
2457     /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2458        set for an inner quote-like operator and then an error causes scope-
2459        popping.  We must not have a PL_lex_stuff value left dangling, as
2460        that breaks assumptions elsewhere.  See bug #123617.  */
2461     SAVEGENERICSV(PL_lex_stuff);
2462     SAVEGENERICSV(PL_parser->lex_sub_repl);
2463
2464     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2465         = SvPVX(PL_linestr);
2466     PL_bufend += SvCUR(PL_linestr);
2467     PL_last_lop = PL_last_uni = NULL;
2468     SAVEFREESV(PL_linestr);
2469     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2470
2471     PL_lex_dojoin = FALSE;
2472     PL_lex_brackets = PL_lex_formbrack = 0;
2473     PL_lex_allbrackets = 0;
2474     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2475     Newx(PL_lex_brackstack, 120, char);
2476     Newx(PL_lex_casestack, 12, char);
2477     PL_lex_casemods = 0;
2478     *PL_lex_casestack = '\0';
2479     PL_lex_starts = 0;
2480     PL_lex_state = LEX_INTERPCONCAT;
2481     if (is_heredoc)
2482         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2483     PL_copline = NOLINE;
2484
2485     Newxz(shared, 1, LEXSHARED);
2486     shared->ls_prev = PL_parser->lex_shared;
2487     PL_parser->lex_shared = shared;
2488
2489     PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2490     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2491     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2492         PL_lex_inpat = PL_parser->lex_sub_op;
2493     else
2494         PL_lex_inpat = NULL;
2495
2496     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2497     PL_in_eval &= ~EVAL_RE_REPARSING;
2498
2499     return '(';
2500 }
2501
2502 /*
2503  * S_sublex_done
2504  * Restores lexer state after a S_sublex_push.
2505  */
2506
2507 STATIC I32
2508 S_sublex_done(pTHX)
2509 {
2510     if (!PL_lex_starts++) {
2511         SV * const sv = newSVpvs("");
2512         if (SvUTF8(PL_linestr))
2513             SvUTF8_on(sv);
2514         PL_expect = XOPERATOR;
2515         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2516         return THING;
2517     }
2518
2519     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2520         PL_lex_state = LEX_INTERPCASEMOD;
2521         return yylex();
2522     }
2523
2524     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2525     assert(PL_lex_inwhat != OP_TRANSR);
2526     if (PL_lex_repl) {
2527         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2528         PL_linestr = PL_lex_repl;
2529         PL_lex_inpat = 0;
2530         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2531         PL_bufend += SvCUR(PL_linestr);
2532         PL_last_lop = PL_last_uni = NULL;
2533         PL_lex_dojoin = FALSE;
2534         PL_lex_brackets = 0;
2535         PL_lex_allbrackets = 0;
2536         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2537         PL_lex_casemods = 0;
2538         *PL_lex_casestack = '\0';
2539         PL_lex_starts = 0;
2540         if (SvEVALED(PL_lex_repl)) {
2541             PL_lex_state = LEX_INTERPNORMAL;
2542             PL_lex_starts++;
2543             /*  we don't clear PL_lex_repl here, so that we can check later
2544                 whether this is an evalled subst; that means we rely on the
2545                 logic to ensure sublex_done() is called again only via the
2546                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2547         }
2548         else {
2549             PL_lex_state = LEX_INTERPCONCAT;
2550             PL_lex_repl = NULL;
2551         }
2552         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2553             CopLINE(PL_curcop) +=
2554                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2555                  + PL_parser->herelines;
2556             PL_parser->herelines = 0;
2557         }
2558         return '/';
2559     }
2560     else {
2561         const line_t l = CopLINE(PL_curcop);
2562         LEAVE;
2563         if (PL_multi_close == '<')
2564             PL_parser->herelines += l - PL_multi_end;
2565         PL_bufend = SvPVX(PL_linestr);
2566         PL_bufend += SvCUR(PL_linestr);
2567         PL_expect = XOPERATOR;
2568         return ')';
2569     }
2570 }
2571
2572 STATIC SV*
2573 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2574 {
2575     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2576      * interior, hence to the "}".  Finds what the name resolves to, returning
2577      * an SV* containing it; NULL if no valid one found */
2578
2579     SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2580
2581     HV * table;
2582     SV **cvp;
2583     SV *cv;
2584     SV *rv;
2585     HV *stash;
2586     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2587
2588     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2589
2590     if (!SvCUR(res)) {
2591         deprecate_fatal_in("5.28", "Unknown charname '' is deprecated");
2592         return res;
2593     }
2594
2595     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2596                         /* include the <}> */
2597                         e - backslash_ptr + 1);
2598     if (! SvPOK(res)) {
2599         SvREFCNT_dec_NN(res);
2600         return NULL;
2601     }
2602
2603     /* See if the charnames handler is the Perl core's, and if so, we can skip
2604      * the validation needed for a user-supplied one, as Perl's does its own
2605      * validation. */
2606     table = GvHV(PL_hintgv);             /* ^H */
2607     cvp = hv_fetchs(table, "charnames", FALSE);
2608     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2609         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2610     {
2611         const char * const name = HvNAME(stash);
2612         if (HvNAMELEN(stash) == sizeof("_charnames")-1
2613          && strEQ(name, "_charnames")) {
2614            return res;
2615        }
2616     }
2617
2618     /* Here, it isn't Perl's charname handler.  We can't rely on a
2619      * user-supplied handler to validate the input name.  For non-ut8 input,
2620      * look to see that the first character is legal.  Then loop through the
2621      * rest checking that each is a continuation */
2622
2623     /* This code makes the reasonable assumption that the only Latin1-range
2624      * characters that begin a character name alias are alphabetic, otherwise
2625      * would have to create a isCHARNAME_BEGIN macro */
2626
2627     if (! UTF) {
2628         if (! isALPHAU(*s)) {
2629             goto bad_charname;
2630         }
2631         s++;
2632         while (s < e) {
2633             if (! isCHARNAME_CONT(*s)) {
2634                 goto bad_charname;
2635             }
2636             if (*s == ' ' && *(s-1) == ' ') {
2637                 goto multi_spaces;
2638             }
2639             s++;
2640         }
2641     }
2642     else {
2643         /* Similarly for utf8.  For invariants can check directly; for other
2644          * Latin1, can calculate their code point and check; otherwise  use a
2645          * swash */
2646         if (UTF8_IS_INVARIANT(*s)) {
2647             if (! isALPHAU(*s)) {
2648                 goto bad_charname;
2649             }
2650             s++;
2651         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2652             if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2653                 goto bad_charname;
2654             }
2655             s += 2;
2656         }
2657         else {
2658             if (! PL_utf8_charname_begin) {
2659                 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2660                 PL_utf8_charname_begin = _core_swash_init("utf8",
2661                                                         "_Perl_Charname_Begin",
2662                                                         &PL_sv_undef,
2663                                                         1, 0, NULL, &flags);
2664             }
2665             if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2666                 goto bad_charname;
2667             }
2668             s += UTF8SKIP(s);
2669         }
2670
2671         while (s < e) {
2672             if (UTF8_IS_INVARIANT(*s)) {
2673                 if (! isCHARNAME_CONT(*s)) {
2674                     goto bad_charname;
2675                 }
2676                 if (*s == ' ' && *(s-1) == ' ') {
2677                     goto multi_spaces;
2678                 }
2679                 s++;
2680             }
2681             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2682                 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2683                 {
2684                     goto bad_charname;
2685                 }
2686                 s += 2;
2687             }
2688             else {
2689                 if (! PL_utf8_charname_continue) {
2690                     U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2691                     PL_utf8_charname_continue = _core_swash_init("utf8",
2692                                                 "_Perl_Charname_Continue",
2693                                                 &PL_sv_undef,
2694                                                 1, 0, NULL, &flags);
2695                 }
2696                 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2697                     goto bad_charname;
2698                 }
2699                 s += UTF8SKIP(s);
2700             }
2701         }
2702     }
2703     if (*(s-1) == ' ') {
2704         yyerror_pv(
2705             Perl_form(aTHX_
2706             "charnames alias definitions may not contain trailing "
2707             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2708             (int)(s - backslash_ptr + 1), backslash_ptr,
2709             (int)(e - s + 1), s + 1
2710             ),
2711         UTF ? SVf_UTF8 : 0);
2712         return NULL;
2713     }
2714
2715     if (SvUTF8(res)) { /* Don't accept malformed input */
2716         const U8* first_bad_char_loc;
2717         STRLEN len;
2718         const char* const str = SvPV_const(res, len);
2719         if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2720                                           &first_bad_char_loc)))
2721         {
2722             _force_out_malformed_utf8_message(first_bad_char_loc,
2723                                               (U8 *) PL_parser->bufend,
2724                                               0,
2725                                               0 /* 0 means don't die */ );
2726             yyerror_pv(
2727               Perl_form(aTHX_
2728                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2729                  (int) (e - backslash_ptr + 1), backslash_ptr,
2730                  (int) ((char *) first_bad_char_loc - str), str
2731               ),
2732               SVf_UTF8);
2733             return NULL;
2734         }
2735     }
2736
2737     return res;
2738
2739   bad_charname: {
2740
2741         /* The final %.*s makes sure that should the trailing NUL be missing
2742          * that this print won't run off the end of the string */
2743         yyerror_pv(
2744           Perl_form(aTHX_
2745             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2746             (int)(s - backslash_ptr + 1), backslash_ptr,
2747             (int)(e - s + 1), s + 1
2748           ),
2749           UTF ? SVf_UTF8 : 0);
2750         return NULL;
2751     }
2752
2753   multi_spaces:
2754         yyerror_pv(
2755           Perl_form(aTHX_
2756             "charnames alias definitions may not contain a sequence of "
2757             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2758             (int)(s - backslash_ptr + 1), backslash_ptr,
2759             (int)(e - s + 1), s + 1
2760           ),
2761           UTF ? SVf_UTF8 : 0);
2762         return NULL;
2763 }
2764
2765 /*
2766   scan_const
2767
2768   Extracts the next constant part of a pattern, double-quoted string,
2769   or transliteration.  This is terrifying code.
2770
2771   For example, in parsing the double-quoted string "ab\x63$d", it would
2772   stop at the '$' and return an OP_CONST containing 'abc'.
2773
2774   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2775   processing a pattern (PL_lex_inpat is true), a transliteration
2776   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2777
2778   Returns a pointer to the character scanned up to. If this is
2779   advanced from the start pointer supplied (i.e. if anything was
2780   successfully parsed), will leave an OP_CONST for the substring scanned
2781   in pl_yylval. Caller must intuit reason for not parsing further
2782   by looking at the next characters herself.
2783
2784   In patterns:
2785     expand:
2786       \N{FOO}  => \N{U+hex_for_character_FOO}
2787       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2788
2789     pass through:
2790         all other \-char, including \N and \N{ apart from \N{ABC}
2791
2792     stops on:
2793         @ and $ where it appears to be a var, but not for $ as tail anchor
2794         \l \L \u \U \Q \E
2795         (?{  or  (??{
2796
2797   In transliterations:
2798     characters are VERY literal, except for - not at the start or end
2799     of the string, which indicates a range.  However some backslash sequences
2800     are recognized: \r, \n, and the like
2801                     \007 \o{}, \x{}, \N{}
2802     If all elements in the transliteration are below 256,
2803     scan_const expands the range to the full set of intermediate
2804     characters. If the range is in utf8, the hyphen is replaced with
2805     a certain range mark which will be handled by pmtrans() in op.c.
2806
2807   In double-quoted strings:
2808     backslashes:
2809       all those recognized in transliterations
2810       deprecated backrefs: \1 (in substitution replacements)
2811       case and quoting: \U \Q \E
2812     stops on @ and $
2813
2814   scan_const does *not* construct ops to handle interpolated strings.
2815   It stops processing as soon as it finds an embedded $ or @ variable
2816   and leaves it to the caller to work out what's going on.
2817
2818   embedded arrays (whether in pattern or not) could be:
2819       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2820
2821   $ in double-quoted strings must be the symbol of an embedded scalar.
2822
2823   $ in pattern could be $foo or could be tail anchor.  Assumption:
2824   it's a tail anchor if $ is the last thing in the string, or if it's
2825   followed by one of "()| \r\n\t"
2826
2827   \1 (backreferences) are turned into $1 in substitutions
2828
2829   The structure of the code is
2830       while (there's a character to process) {
2831           handle transliteration ranges
2832           skip regexp comments /(?#comment)/ and codes /(?{code})/
2833           skip #-initiated comments in //x patterns
2834           check for embedded arrays
2835           check for embedded scalars
2836           if (backslash) {
2837               deprecate \1 in substitution replacements
2838               handle string-changing backslashes \l \U \Q \E, etc.
2839               switch (what was escaped) {
2840                   handle \- in a transliteration (becomes a literal -)
2841                   if a pattern and not \N{, go treat as regular character
2842                   handle \132 (octal characters)
2843                   handle \x15 and \x{1234} (hex characters)
2844                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2845                   handle \cV (control characters)
2846                   handle printf-style backslashes (\f, \r, \n, etc)
2847               } (end switch)
2848               continue
2849           } (end if backslash)
2850           handle regular character
2851     } (end while character to read)
2852
2853 */
2854
2855 STATIC char *
2856 S_scan_const(pTHX_ char *start)
2857 {
2858     char *send = PL_bufend;             /* end of the constant */
2859     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
2860                                            on sizing. */
2861     char *s = start;                    /* start of the constant */
2862     char *d = SvPVX(sv);                /* destination for copies */
2863     bool dorange = FALSE;               /* are we in a translit range? */
2864     bool didrange = FALSE;              /* did we just finish a range? */
2865     bool in_charclass = FALSE;          /* within /[...]/ */
2866     bool has_utf8 = FALSE;              /* Output constant is UTF8 */
2867     bool  this_utf8 = cBOOL(UTF);       /* Is the source string assumed to be
2868                                            UTF8?  But, this can show as true
2869                                            when the source isn't utf8, as for
2870                                            example when it is entirely composed
2871                                            of hex constants */
2872     STRLEN utf8_variant_count = 0;      /* When not in UTF-8, this counts the
2873                                            number of characters found so far
2874                                            that will expand (into 2 bytes)
2875                                            should we have to convert to
2876                                            UTF-8) */
2877     SV *res;                            /* result from charnames */
2878     STRLEN offset_to_max;   /* The offset in the output to where the range
2879                                high-end character is temporarily placed */
2880
2881     /* Does something require special handling in tr/// ?  This avoids extra
2882      * work in a less likely case.  As such, khw didn't feel it was worth
2883      * adding any branches to the more mainline code to handle this, which
2884      * means that this doesn't get set in some circumstances when things like
2885      * \x{100} get expanded out.  As a result there needs to be extra testing
2886      * done in the tr code */
2887     bool has_above_latin1 = FALSE;
2888
2889     /* Note on sizing:  The scanned constant is placed into sv, which is
2890      * initialized by newSV() assuming one byte of output for every byte of
2891      * input.  This routine expects newSV() to allocate an extra byte for a
2892      * trailing NUL, which this routine will append if it gets to the end of
2893      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2894      * CAPITAL LETTER A}), or more output than input if the constant ends up
2895      * recoded to utf8, but each time a construct is found that might increase
2896      * the needed size, SvGROW() is called.  Its size parameter each time is
2897      * based on the best guess estimate at the time, namely the length used so
2898      * far, plus the length the current construct will occupy, plus room for
2899      * the trailing NUL, plus one byte for every input byte still unscanned */
2900
2901     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2902                        before set */
2903 #ifdef EBCDIC
2904     int backslash_N = 0;            /* ? was the character from \N{} */
2905     int non_portable_endpoint = 0;  /* ? In a range is an endpoint
2906                                        platform-specific like \x65 */
2907 #endif
2908
2909     PERL_ARGS_ASSERT_SCAN_CONST;
2910
2911     assert(PL_lex_inwhat != OP_TRANSR);
2912     if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
2913         /* If we are doing a trans and we know we want UTF8 set expectation */
2914         has_utf8   = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2915         this_utf8  = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2916     }
2917
2918     /* Protect sv from errors and fatal warnings. */
2919     ENTER_with_name("scan_const");
2920     SAVEFREESV(sv);
2921
2922     while (s < send
2923            || dorange   /* Handle tr/// range at right edge of input */
2924     ) {
2925
2926         /* get transliterations out of the way (they're most literal) */
2927         if (PL_lex_inwhat == OP_TRANS) {
2928
2929             /* But there isn't any special handling necessary unless there is a
2930              * range, so for most cases we just drop down and handle the value
2931              * as any other.  There are two exceptions.
2932              *
2933              * 1.  A hyphen indicates that we are actually going to have a
2934              *     range.  In this case, skip the '-', set a flag, then drop
2935              *     down to handle what should be the end range value.
2936              * 2.  After we've handled that value, the next time through, that
2937              *     flag is set and we fix up the range.
2938              *
2939              * Ranges entirely within Latin1 are expanded out entirely, in
2940              * order to make the transliteration a simple table look-up.
2941              * Ranges that extend above Latin1 have to be done differently, so
2942              * there is no advantage to expanding them here, so they are
2943              * stored here as Min, ILLEGAL_UTF8_BYTE, Max.  The illegal byte
2944              * signifies a hyphen without any possible ambiguity.  On EBCDIC
2945              * machines, if the range is expressed as Unicode, the Latin1
2946              * portion is expanded out even if the range extends above
2947              * Latin1.  This is because each code point in it has to be
2948              * processed here individually to get its native translation */
2949
2950             if (! dorange) {
2951
2952                 /* Here, we don't think we're in a range.  If the new character
2953                  * is not a hyphen; or if it is a hyphen, but it's too close to
2954                  * either edge to indicate a range, then it's a regular
2955                  * character. */
2956                 if (*s != '-' || s >= send - 1 || s == start) {
2957
2958                     /* A regular character.  Process like any other, but first
2959                      * clear any flags */
2960                     didrange = FALSE;
2961                     dorange = FALSE;
2962 #ifdef EBCDIC
2963                     non_portable_endpoint = 0;
2964                     backslash_N = 0;
2965 #endif
2966                     /* The tests here for being above Latin1 and similar ones
2967                      * in the following 'else' suffice to find all such
2968                      * occurences in the constant, except those added by a
2969                      * backslash escape sequence, like \x{100}.  Mostly, those
2970                      * set 'has_above_latin1' as appropriate */
2971                     if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
2972                         has_above_latin1 = TRUE;
2973                     }
2974
2975                     /* Drops down to generic code to process current byte */
2976                 }
2977                 else {  /* Is a '-' in the context where it means a range */
2978                     if (didrange) { /* Something like y/A-C-Z// */
2979                         Perl_croak(aTHX_ "Ambiguous range in transliteration"
2980                                          " operator");
2981                     }
2982
2983                     dorange = TRUE;
2984
2985                     s++;    /* Skip past the hyphen */
2986
2987                     /* d now points to where the end-range character will be
2988                      * placed.  Save it so won't have to go finding it later,
2989                      * and drop down to get that character.  (Actually we
2990                      * instead save the offset, to handle the case where a
2991                      * realloc in the meantime could change the actual
2992                      * pointer).  We'll finish processing the range the next
2993                      * time through the loop */
2994                     offset_to_max = d - SvPVX_const(sv);
2995
2996                     if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
2997                         has_above_latin1 = TRUE;
2998                     }
2999
3000                     /* Drops down to generic code to process current byte */
3001                 }
3002             }  /* End of not a range */
3003             else {
3004                 /* Here we have parsed a range.  Now must handle it.  At this
3005                  * point:
3006                  * 'sv' is a SV* that contains the output string we are
3007                  *      constructing.  The final two characters in that string
3008                  *      are the range start and range end, in order.
3009                  * 'd'  points to just beyond the range end in the 'sv' string,
3010                  *      where we would next place something
3011                  * 'offset_to_max' is the offset in 'sv' at which the character
3012                  *      (the range's maximum end point) before 'd'  begins.
3013                  */
3014                 char * max_ptr = SvPVX(sv) + offset_to_max;
3015                 char * min_ptr;
3016                 IV range_min;
3017                 IV range_max;   /* last character in range */
3018                 STRLEN grow;
3019                 Size_t offset_to_min = 0;
3020                 Size_t extras = 0;
3021 #ifdef EBCDIC
3022                 bool convert_unicode;
3023                 IV real_range_max = 0;
3024 #endif
3025                 /* Get the code point values of the range ends. */
3026                 if (has_utf8) {
3027                     /* We know the utf8 is valid, because we just constructed
3028                      * it ourselves in previous loop iterations */
3029                     min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3030                     range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3031                     range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3032
3033                     /* This compensates for not all code setting
3034                      * 'has_above_latin1', so that we don't skip stuff that
3035                      * should be executed */
3036                     if (range_max > 255) {
3037                         has_above_latin1 = TRUE;
3038                     }
3039                 }
3040                 else {
3041                     min_ptr = max_ptr - 1;
3042                     range_min = * (U8*) min_ptr;
3043                     range_max = * (U8*) max_ptr;
3044                 }
3045
3046                 /* If the range is just a single code point, like tr/a-a/.../,
3047                  * that code point is already in the output, twice.  We can
3048                  * just back up over the second instance and avoid all the rest
3049                  * of the work.  But if it is a variant character, it's been
3050                  * counted twice, so decrement.  (This unlikely scenario is
3051                  * special cased, like the one for a range of 2 code points
3052                  * below, only because the main-line code below needs a range
3053                  * of 3 or more to work without special casing.  Might as well
3054                  * get it out of the way now.) */
3055                 if (UNLIKELY(range_max == range_min)) {
3056                     d = max_ptr;
3057                     if (! has_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3058                         utf8_variant_count--;
3059                     }
3060                     goto range_done;
3061                 }
3062
3063 #ifdef EBCDIC
3064                 /* On EBCDIC platforms, we may have to deal with portable
3065                  * ranges.  These happen if at least one range endpoint is a
3066                  * Unicode value (\N{...}), or if the range is a subset of
3067                  * [A-Z] or [a-z], and both ends are literal characters,
3068                  * like 'A', and not like \x{C1} */
3069                 convert_unicode =
3070                                cBOOL(backslash_N)   /* \N{} forces Unicode,
3071                                                        hence portable range */
3072                     || (     ! non_portable_endpoint
3073                         && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
3074                            || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3075                 if (convert_unicode) {
3076
3077                     /* Special handling is needed for these portable ranges.
3078                      * They are defined to be in Unicode terms, which includes
3079                      * all the Unicode code points between the end points.
3080                      * Convert to Unicode to get the Unicode range.  Later we
3081                      * will convert each code point in the range back to
3082                      * native.  */
3083                     range_min = NATIVE_TO_UNI(range_min);
3084                     range_max = NATIVE_TO_UNI(range_max);
3085                 }
3086 #endif
3087
3088                 if (range_min > range_max) {
3089 #ifdef EBCDIC
3090                     if (convert_unicode) {
3091                         /* Need to convert back to native for meaningful
3092                          * messages for this platform */
3093                         range_min = UNI_TO_NATIVE(range_min);
3094                         range_max = UNI_TO_NATIVE(range_max);
3095                     }
3096 #endif
3097                     /* Use the characters themselves for the error message if
3098                      * ASCII printables; otherwise some visible representation
3099                      * of them */
3100                     if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3101                         Perl_croak(aTHX_
3102                          "Invalid range \"%c-%c\" in transliteration operator",
3103                          (char)range_min, (char)range_max);
3104                     }
3105 #ifdef EBCDIC
3106                     else if (convert_unicode) {
3107         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3108                         Perl_croak(aTHX_
3109                            "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3110                            UVXf "}\" in transliteration operator",
3111                            range_min, range_max);
3112                     }
3113 #endif
3114                     else {
3115         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3116                         Perl_croak(aTHX_
3117                            "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3118                            " in transliteration operator",
3119                            range_min, range_max);
3120                     }
3121                 }
3122
3123                 /* If the range is exactly two code points long, they are
3124                  * already both in the output */
3125                 if (UNLIKELY(range_min + 1 == range_max)) {
3126                     goto range_done;
3127                 }
3128
3129                 /* Here the range contains at least 3 code points */
3130
3131                 if (has_utf8) {
3132
3133                     /* If everything in the transliteration is below 256, we
3134                      * can avoid special handling later.  A translation table
3135                      * for each of those bytes is created by op.c.  So we
3136                      * expand out all ranges to their constituent code points.
3137                      * But if we've encountered something above 255, the
3138                      * expanding won't help, so skip doing that.  But if it's
3139                      * EBCDIC, we may have to look at each character below 256
3140                      * if we have to convert to/from Unicode values */
3141                     if (   has_above_latin1
3142 #ifdef EBCDIC
3143                         && (range_min > 255 || ! convert_unicode)
3144 #endif
3145                     ) {
3146                         /* Move the high character one byte to the right; then
3147                          * insert between it and the range begin, an illegal
3148                          * byte which serves to indicate this is a range (using
3149                          * a '-' would be ambiguous). */
3150                         char *e = d++;
3151                         while (e-- > max_ptr) {
3152                             *(e + 1) = *e;
3153                         }
3154                         *(e + 1) = (char) ILLEGAL_UTF8_BYTE;
3155                         goto range_done;
3156                     }
3157
3158                     /* Here, we're going to expand out the range.  For EBCDIC
3159                      * the range can extend above 255 (not so in ASCII), so
3160                      * for EBCDIC, split it into the parts above and below
3161                      * 255/256 */
3162 #ifdef EBCDIC
3163                     if (range_max > 255) {
3164                         real_range_max = range_max;
3165                         range_max = 255;
3166                     }
3167 #endif
3168                 }
3169
3170                 /* Here we need to expand out the string to contain each
3171                  * character in the range.  Grow the output to handle this.
3172                  * For non-UTF8, we need a byte for each code point in the
3173                  * range, minus the three that we've already allocated for: the
3174                  * hyphen, the min, and the max.  For UTF-8, we need this
3175                  * plus an extra byte for each code point that occupies two
3176                  * bytes (is variant) when in UTF-8 (except we've already
3177                  * allocated for the end points, including if they are
3178                  * variants).  For ASCII platforms and Unicode ranges on EBCDIC
3179                  * platforms, it's easy to calculate a precise number.  To
3180                  * start, we count the variants in the range, which we need
3181                  * elsewhere in this function anyway.  (For the case where it
3182                  * isn't easy to calculate, 'extras' has been initialized to 0,
3183                  * and the calculation is done in a loop further down.) */
3184 #ifdef EBCDIC
3185                 if (convert_unicode)
3186 #endif
3187                 {
3188                     /* This is executed unconditionally on ASCII, and for
3189                      * Unicode ranges on EBCDIC.  Under these conditions, all
3190                      * code points above a certain value are variant; and none
3191                      * under that value are.  We just need to find out how much
3192                      * of the range is above that value.  We don't count the
3193                      * end points here, as they will already have been counted
3194                      * as they were parsed. */
3195                     if (range_min >= UTF_CONTINUATION_MARK) {
3196
3197                         /* The whole range is made up of variants */
3198                         extras = (range_max - 1) - (range_min + 1) + 1;
3199                     }
3200                     else if (range_max >= UTF_CONTINUATION_MARK) {
3201
3202                         /* Only the higher portion of the range is variants */
3203                         extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3204                     }
3205
3206                     utf8_variant_count += extras;
3207                 }
3208
3209                 /* The base growth is the number of code points in the range,
3210                  * not including the endpoints, which have already been sized
3211                  * for (and output).  We don't subtract for the hyphen, as it
3212                  * has been parsed but not output, and the SvGROW below is
3213                  * based only on what's been output plus what's left to parse.
3214                  * */
3215                 grow = (range_max - 1) - (range_min + 1) + 1;
3216
3217                 if (has_utf8) {
3218 #ifdef EBCDIC
3219                     /* In some cases in EBCDIC, we haven't yet calculated a
3220                      * precise amount needed for the UTF-8 variants.  Just
3221                      * assume the worst case, that everything will expand by a
3222                      * byte */
3223                     if (! convert_unicode) {
3224                         grow *= 2;
3225                     }
3226                     else
3227 #endif
3228                     {
3229                         /* Otherwise we know exactly how many variants there
3230                          * are in the range. */
3231                         grow += extras;
3232                     }
3233                 }
3234
3235                 /* Grow, but position the output to overwrite the range min end
3236                  * point, because in some cases we overwrite that */
3237                 SvCUR_set(sv, d - SvPVX_const(sv));
3238                 offset_to_min = min_ptr - SvPVX_const(sv);
3239
3240                 /* See Note on sizing above. */
3241                 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3242                                              + (send - s)
3243                                              + grow
3244                                              + 1 /* Trailing NUL */ );
3245
3246                 /* Now, we can expand out the range. */
3247 #ifdef EBCDIC
3248                 if (convert_unicode) {
3249                     SSize_t i;
3250
3251                     /* Recall that the min and max are now in Unicode terms, so
3252                      * we have to convert each character to its native
3253                      * equivalent */
3254                     if (has_utf8) {
3255                         for (i = range_min; i <= range_max; i++) {
3256                             append_utf8_from_native_byte(
3257                                                     LATIN1_TO_NATIVE((U8) i),
3258                                                     (U8 **) &d);
3259                         }
3260                     }
3261                     else {
3262                         for (i = range_min; i <= range_max; i++) {
3263                             *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3264                         }
3265                     }
3266                 }
3267                 else
3268 #endif
3269                 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3270                 {
3271                     /* Here, no conversions are necessary, which means that the
3272                      * first character in the range is already in 'd' and
3273                      * valid, so we can skip overwriting it */
3274                     if (has_utf8) {
3275                         SSize_t i;
3276                         d += UTF8SKIP(d);
3277                         for (i = range_min + 1; i <= range_max; i++) {
3278                             append_utf8_from_native_byte((U8) i, (U8 **) &d);
3279                         }
3280                     }
3281                     else {
3282                         SSize_t i;
3283                         d++;
3284                         assert(range_min + 1 <= range_max);
3285                         for (i = range_min + 1; i < range_max; i++) {
3286 #ifdef EBCDIC
3287                             /* In this case on EBCDIC, we haven't calculated
3288                              * the variants.  Do it here, as we go along */
3289                             if (! UVCHR_IS_INVARIANT(i)) {
3290                                 utf8_variant_count++;
3291                             }
3292 #endif
3293                             *d++ = (char)i;
3294                         }
3295
3296                         /* The range_max is done outside the loop so as to
3297                          * avoid having to special case not incrementing
3298                          * 'utf8_variant_count' on EBCDIC (it's already been
3299                          * counted when originally parsed) */
3300                         *d++ = (char) range_max;
3301                     }
3302                 }
3303
3304 #ifdef EBCDIC
3305                 /* If the original range extended above 255, add in that
3306                  * portion. */
3307                 if (real_range_max) {
3308                     *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3309                     *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3310                     if (real_range_max > 0x100) {
3311                         if (real_range_max > 0x101) {
3312                             *d++ = (char) ILLEGAL_UTF8_BYTE;
3313                         }
3314                         d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3315                     }
3316                 }
3317 #endif
3318
3319               range_done:
3320                 /* mark the range as done, and continue */
3321                 didrange = TRUE;
3322                 dorange = FALSE;
3323 #ifdef EBCDIC
3324                 non_portable_endpoint = 0;
3325                 backslash_N = 0;
3326 #endif
3327                 continue;
3328             } /* End of is a range */
3329         } /* End of transliteration.  Joins main code after these else's */
3330         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3331             char *s1 = s-1;
3332             int esc = 0;
3333             while (s1 >= start && *s1-- == '\\')
3334                 esc = !esc;
3335             if (!esc)
3336                 in_charclass = TRUE;
3337         }
3338         else if (*s == ']' && PL_lex_inpat && in_charclass) {
3339             char *s1 = s-1;
3340             int esc = 0;
3341             while (s1 >= start && *s1-- == '\\')
3342                 esc = !esc;
3343             if (!esc)
3344                 in_charclass = FALSE;
3345         }
3346             /* skip for regexp comments /(?#comment)/, except for the last
3347              * char, which will be done separately.  Stop on (?{..}) and
3348              * friends */
3349         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3350             if (s[2] == '#') {
3351                 while (s+1 < send && *s != ')')
3352                     *d++ = *s++;
3353             }
3354             else if (!PL_lex_casemods
3355                      && (    s[2] == '{' /* This should match regcomp.c */
3356                          || (s[2] == '?' && s[3] == '{')))
3357             {
3358                 break;
3359             }
3360         }
3361             /* likewise skip #-initiated comments in //x patterns */
3362         else if (*s == '#'
3363                  && PL_lex_inpat
3364                  && !in_charclass
3365                  && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3366         {
3367             while (s < send && *s != '\n')
3368                 *d++ = *s++;
3369         }
3370             /* no further processing of single-quoted regex */
3371         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3372             goto default_action;
3373
3374             /* check for embedded arrays
3375              * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3376              */
3377         else if (*s == '@' && s[1]) {
3378             if (UTF
3379                ? isIDFIRST_utf8_safe(s+1, send)
3380                : isWORDCHAR_A(s[1]))
3381             {
3382                 break;
3383             }
3384             if (strchr(":'{$", s[1]))
3385                 break;
3386             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3387                 break; /* in regexp, neither @+ nor @- are interpolated */
3388         }
3389             /* check for embedded scalars.  only stop if we're sure it's a
3390              * variable.  */
3391         else if (*s == '$') {
3392             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3393                 break;
3394             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3395                 if (s[1] == '\\') {
3396                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3397                                    "Possible unintended interpolation of $\\ in regex");
3398                 }
3399                 break;          /* in regexp, $ might be tail anchor */
3400             }
3401         }
3402
3403         /* End of else if chain - OP_TRANS rejoin rest */
3404
3405         if (UNLIKELY(s >= send)) {
3406             assert(s == send);
3407             break;
3408         }
3409
3410         /* backslashes */
3411         if (*s == '\\' && s+1 < send) {
3412             char* e;    /* Can be used for ending '}', etc. */
3413
3414             s++;
3415
3416             /* warn on \1 - \9 in substitution replacements, but note that \11
3417              * is an octal; and \19 is \1 followed by '9' */
3418             if (PL_lex_inwhat == OP_SUBST
3419                 && !PL_lex_inpat
3420                 && isDIGIT(*s)
3421                 && *s != '0'
3422                 && !isDIGIT(s[1]))
3423             {
3424                 /* diag_listed_as: \%d better written as $%d */
3425                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3426                 *--s = '$';
3427                 break;
3428             }
3429
3430             /* string-change backslash escapes */
3431             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3432                 --s;
3433                 break;
3434             }
3435             /* In a pattern, process \N, but skip any other backslash escapes.
3436              * This is because we don't want to translate an escape sequence
3437              * into a meta symbol and have the regex compiler use the meta
3438              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3439              * in spite of this, we do have to process \N here while the proper
3440              * charnames handler is in scope.  See bugs #56444 and #62056.
3441              *
3442              * There is a complication because \N in a pattern may also stand
3443              * for 'match a non-nl', and not mean a charname, in which case its
3444              * processing should be deferred to the regex compiler.  To be a
3445              * charname it must be followed immediately by a '{', and not look
3446              * like \N followed by a curly quantifier, i.e., not something like
3447              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3448              * quantifier */
3449             else if (PL_lex_inpat
3450                     && (*s != 'N'
3451                         || s[1] != '{'
3452                         || regcurly(s + 1)))
3453             {
3454                 *d++ = '\\';
3455                 goto default_action;
3456             }
3457
3458             switch (*s) {
3459             default:
3460                 {
3461                     if ((isALPHANUMERIC(*s)))
3462                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3463                                        "Unrecognized escape \\%c passed through",
3464                                        *s);
3465                     /* default action is to copy the quoted character */
3466                     goto default_action;
3467                 }
3468
3469             /* eg. \132 indicates the octal constant 0132 */
3470             case '0': case '1': case '2': case '3':
3471             case '4': case '5': case '6': case '7':
3472                 {
3473                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3474                     STRLEN len = 3;
3475                     uv = grok_oct(s, &len, &flags, NULL);
3476                     s += len;
3477                     if (len < 3 && s < send && isDIGIT(*s)
3478                         && ckWARN(WARN_MISC))
3479                     {
3480                         Perl_warner(aTHX_ packWARN(WARN_MISC),
3481                                     "%s", form_short_octal_warning(s, len));
3482                     }
3483                 }
3484                 goto NUM_ESCAPE_INSERT;
3485
3486             /* eg. \o{24} indicates the octal constant \024 */
3487             case 'o':
3488                 {
3489                     const char* error;
3490
3491                     bool valid = grok_bslash_o(&s, &uv, &error,
3492                                                TRUE, /* Output warning */
3493                                                FALSE, /* Not strict */
3494                                                TRUE, /* Output warnings for
3495                                                          non-portables */
3496                                                UTF);
3497                     if (! valid) {
3498                         yyerror(error);
3499                         uv = 0; /* drop through to ensure range ends are set */
3500                     }
3501                     goto NUM_ESCAPE_INSERT;
3502                 }
3503
3504             /* eg. \x24 indicates the hex constant 0x24 */
3505             case 'x':
3506                 {
3507                     const char* error;
3508
3509                     bool valid = grok_bslash_x(&s, &uv, &error,
3510                                                TRUE, /* Output warning */
3511                                                FALSE, /* Not strict */
3512                                                TRUE,  /* Output warnings for
3513                                                          non-portables */
3514                                                UTF);
3515                     if (! valid) {
3516                         yyerror(error);
3517                         uv = 0; /* drop through to ensure range ends are set */
3518                     }
3519                 }
3520
3521               NUM_ESCAPE_INSERT:
3522                 /* Insert oct or hex escaped character. */
3523
3524                 /* Here uv is the ordinal of the next character being added */
3525                 if (UVCHR_IS_INVARIANT(uv)) {
3526                     *d++ = (char) uv;
3527                 }
3528                 else {
3529                     if (!has_utf8 && uv > 255) {
3530
3531                         /* Here, 'uv' won't fit unless we convert to UTF-8.
3532                          * If we've only seen invariants so far, all we have to
3533                          * do is turn on the flag */
3534                         if (utf8_variant_count == 0) {
3535                             SvUTF8_on(sv);
3536                         }
3537                         else {
3538                             SvCUR_set(sv, d - SvPVX_const(sv));
3539                             SvPOK_on(sv);
3540                             *d = '\0';
3541
3542                             sv_utf8_upgrade_flags_grow(
3543                                            sv,
3544                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3545
3546                                            /* Since we're having to grow here,
3547                                             * make sure we have enough room for
3548                                             * this escape and a NUL, so the
3549                                             * code immediately below won't have
3550                                             * to actually grow again */
3551                                           UVCHR_SKIP(uv)
3552                                         + (STRLEN)(send - s) + 1);
3553                             d = SvPVX(sv) + SvCUR(sv);
3554                         }
3555
3556                         has_above_latin1 = TRUE;
3557                         has_utf8 = TRUE;
3558                     }
3559
3560                     if (! has_utf8) {
3561                         *d++ = (char)uv;
3562                         utf8_variant_count++;
3563                     }
3564                     else {
3565                        /* Usually, there will already be enough room in 'sv'
3566                         * since such escapes are likely longer than any UTF-8
3567                         * sequence they can end up as.  This isn't the case on
3568                         * EBCDIC where \x{40000000} contains 12 bytes, and the
3569                         * UTF-8 for it contains 14.  And, we have to allow for
3570                         * a trailing NUL.  It probably can't happen on ASCII
3571                         * platforms, but be safe.  See Note on sizing above. */
3572                         const STRLEN needed = d - SvPVX(sv)
3573                                             + UVCHR_SKIP(uv)
3574                                             + (send - s)
3575                                             + 1;
3576                         if (UNLIKELY(needed > SvLEN(sv))) {
3577                             SvCUR_set(sv, d - SvPVX_const(sv));
3578                             d = SvCUR(sv) + SvGROW(sv, needed);
3579                         }
3580
3581                         d = (char*)uvchr_to_utf8((U8*)d, uv);
3582                         if (PL_lex_inwhat == OP_TRANS
3583                             && PL_parser->lex_sub_op)
3584                         {
3585                             PL_parser->lex_sub_op->op_private |=
3586                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3587                                              : OPpTRANS_TO_UTF);
3588                         }
3589                     }
3590                 }
3591 #ifdef EBCDIC
3592                 non_portable_endpoint++;
3593 #endif
3594                 continue;
3595
3596             case 'N':
3597                 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3598                  * named character, like \N{LATIN SMALL LETTER A}, or a named
3599                  * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3600                  * GRAVE} (except y/// can't handle the latter, croaking).  For
3601                  * convenience all three forms are referred to as "named
3602                  * characters" below.
3603                  *
3604                  * For patterns, \N also can mean to match a non-newline.  Code
3605                  * before this 'switch' statement should already have handled
3606                  * this situation, and hence this code only has to deal with
3607                  * the named character cases.
3608                  *
3609                  * For non-patterns, the named characters are converted to
3610                  * their string equivalents.  In patterns, named characters are
3611                  * not converted to their ultimate forms for the same reasons
3612                  * that other escapes aren't.  Instead, they are converted to
3613                  * the \N{U+...} form to get the value from the charnames that
3614                  * is in effect right now, while preserving the fact that it
3615                  * was a named character, so that the regex compiler knows
3616                  * this.
3617                  *
3618                  * The structure of this section of code (besides checking for
3619                  * errors and upgrading to utf8) is:
3620                  *    If the named character is of the form \N{U+...}, pass it
3621                  *      through if a pattern; otherwise convert the code point
3622                  *      to utf8
3623                  *    Otherwise must be some \N{NAME}: convert to
3624                  *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3625                  *
3626                  * Transliteration is an exception.  The conversion to utf8 is
3627                  * only done if the code point requires it to be representable.
3628                  *
3629                  * Here, 's' points to the 'N'; the test below is guaranteed to
3630                  * succeed if we are being called on a pattern, as we already
3631                  * know from a test above that the next character is a '{'.  A
3632                  * non-pattern \N must mean 'named character', which requires
3633                  * braces */
3634                 s++;
3635                 if (*s != '{') {
3636                     yyerror("Missing braces on \\N{}");
3637                     *d++ = '\0';
3638                     continue;
3639                 }
3640                 s++;
3641
3642                 /* If there is no matching '}', it is an error. */
3643                 if (! (e = strchr(s, '}'))) {
3644                     if (! PL_lex_inpat) {
3645                         yyerror("Missing right brace on \\N{}");
3646                     } else {
3647                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3648                     }
3649                     yyquit(); /* Have exhausted the input. */
3650                 }
3651
3652                 /* Here it looks like a named character */
3653
3654                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3655                     s += 2;         /* Skip to next char after the 'U+' */
3656                     if (PL_lex_inpat) {
3657
3658                         /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3659                         /* Check the syntax.  */
3660                         const char *orig_s;
3661                         orig_s = s - 5;
3662                         if (!isXDIGIT(*s)) {
3663                           bad_NU:
3664                             yyerror(
3665                                 "Invalid hexadecimal number in \\N{U+...}"
3666                             );
3667                             s = e + 1;
3668                             *d++ = '\0';
3669                             continue;
3670                         }
3671                         while (++s < e) {
3672                             if (isXDIGIT(*s))
3673                                 continue;
3674                             else if ((*s == '.' || *s == '_')
3675                                   && isXDIGIT(s[1]))
3676                                 continue;
3677                             goto bad_NU;
3678                         }
3679
3680                         /* Pass everything through unchanged.
3681                          * +1 is for the '}' */
3682                         Copy(orig_s, d, e - orig_s + 1, char);
3683                         d += e - orig_s + 1;
3684                     }
3685                     else {  /* Not a pattern: convert the hex to string */
3686                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3687                                 | PERL_SCAN_SILENT_ILLDIGIT
3688                                 | PERL_SCAN_DISALLOW_PREFIX;
3689                         STRLEN len = e - s;
3690                         uv = grok_hex(s, &len, &flags, NULL);
3691                         if (len == 0 || (len != (STRLEN)(e - s)))
3692                             goto bad_NU;
3693
3694                          /* For non-tr///, if the destination is not in utf8,
3695                           * unconditionally recode it to be so.  This is
3696                           * because \N{} implies Unicode semantics, and scalars
3697                           * have to be in utf8 to guarantee those semantics.
3698                           * tr/// doesn't care about Unicode rules, so no need
3699                           * there to upgrade to UTF-8 for small enough code
3700                           * points */
3701                         if (! has_utf8 && (   uv > 0xFF
3702                                            || PL_lex_inwhat != OP_TRANS))
3703                         {
3704                             /* See Note on sizing above.  */
3705                             const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1;
3706
3707                             SvCUR_set(sv, d - SvPVX_const(sv));
3708                             SvPOK_on(sv);
3709                             *d = '\0';
3710
3711                             if (utf8_variant_count == 0) {
3712                                 SvUTF8_on(sv);
3713                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3714                             }
3715                             else {
3716                                 sv_utf8_upgrade_flags_grow(
3717                                                sv,
3718                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3719                                                extra);
3720                                 d = SvPVX(sv) + SvCUR(sv);
3721                             }
3722
3723                             has_utf8 = TRUE;
3724                             has_above_latin1 = TRUE;
3725                         }
3726
3727                         /* Add the (Unicode) code point to the output. */
3728                         if (! has_utf8 || OFFUNI_IS_INVARIANT(uv)) {
3729                             *d++ = (char) LATIN1_TO_NATIVE(uv);
3730                         }
3731                         else {
3732                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3733                         }
3734                     }
3735                 }
3736                 else /* Here is \N{NAME} but not \N{U+...}. */
3737                      if ((res = get_and_check_backslash_N_name(s, e)))
3738                 {
3739                     STRLEN len;
3740                     const char *str = SvPV_const(res, len);
3741                     if (PL_lex_inpat) {
3742
3743                         if (! len) { /* The name resolved to an empty string */
3744                             Copy("\\N{}", d, 4, char);
3745                             d += 4;
3746                         }
3747                         else {
3748                             /* In order to not lose information for the regex
3749                             * compiler, pass the result in the specially made
3750                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3751                             * the code points in hex of each character
3752                             * returned by charnames */
3753
3754                             const char *str_end = str + len;
3755                             const STRLEN off = d - SvPVX_const(sv);
3756
3757                             if (! SvUTF8(res)) {
3758                                 /* For the non-UTF-8 case, we can determine the
3759                                  * exact length needed without having to parse
3760                                  * through the string.  Each character takes up
3761                                  * 2 hex digits plus either a trailing dot or
3762                                  * the "}" */
3763                                 const char initial_text[] = "\\N{U+";
3764                                 const STRLEN initial_len = sizeof(initial_text)
3765                                                            - 1;
3766                                 d = off + SvGROW(sv, off
3767                                                     + 3 * len
3768
3769                                                     /* +1 for trailing NUL */
3770                                                     + initial_len + 1
3771
3772                                                     + (STRLEN)(send - e));
3773                                 Copy(initial_text, d, initial_len, char);
3774                                 d += initial_len;
3775                                 while (str < str_end) {
3776                                     char hex_string[4];
3777                                     int len =
3778                                         my_snprintf(hex_string,
3779                                                   sizeof(hex_string),
3780                                                   "%02X.",
3781
3782                                                   /* The regex compiler is
3783                                                    * expecting Unicode, not
3784                                                    * native */
3785                                                   NATIVE_TO_LATIN1(*str));
3786                                     PERL_MY_SNPRINTF_POST_GUARD(len,
3787                                                            sizeof(hex_string));
3788                                     Copy(hex_string, d, 3, char);
3789                                     d += 3;
3790                                     str++;
3791                                 }
3792                                 d--;    /* Below, we will overwrite the final
3793                                            dot with a right brace */
3794                             }
3795                             else {
3796                                 STRLEN char_length; /* cur char's byte length */
3797
3798                                 /* and the number of bytes after this is
3799                                  * translated into hex digits */
3800                                 STRLEN output_length;
3801
3802                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3803                                  * for max('U+', '.'); and 1 for NUL */
3804                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3805
3806                                 /* Get the first character of the result. */
3807                                 U32 uv = utf8n_to_uvchr((U8 *) str,
3808                                                         len,
3809                                                         &char_length,
3810                                                         UTF8_ALLOW_ANYUV);
3811                                 /* Convert first code point to Unicode hex,
3812                                  * including the boiler plate before it. */
3813                                 output_length =
3814                                     my_snprintf(hex_string, sizeof(hex_string),
3815                                              "\\N{U+%X",
3816                                              (unsigned int) NATIVE_TO_UNI(uv));
3817
3818                                 /* Make sure there is enough space to hold it */
3819                                 d = off + SvGROW(sv, off
3820                                                     + output_length
3821                                                     + (STRLEN)(send - e)
3822                                                     + 2);       /* '}' + NUL */
3823                                 /* And output it */
3824                                 Copy(hex_string, d, output_length, char);
3825                                 d += output_length;
3826
3827                                 /* For each subsequent character, append dot and
3828                                 * its Unicode code point in hex */
3829                                 while ((str += char_length) < str_end) {
3830                                     const STRLEN off = d - SvPVX_const(sv);
3831                                     U32 uv = utf8n_to_uvchr((U8 *) str,
3832                                                             str_end - str,
3833                                                             &char_length,
3834                                                             UTF8_ALLOW_ANYUV);
3835                                     output_length =
3836                                         my_snprintf(hex_string,
3837                                              sizeof(hex_string),
3838                                              ".%X",
3839                                              (unsigned int) NATIVE_TO_UNI(uv));
3840
3841                                     d = off + SvGROW(sv, off
3842                                                         + output_length
3843                                                         + (STRLEN)(send - e)
3844                                                         + 2);   /* '}' +  NUL */
3845                                     Copy(hex_string, d, output_length, char);
3846                                     d += output_length;
3847                                 }
3848                             }
3849
3850                             *d++ = '}'; /* Done.  Add the trailing brace */
3851                         }
3852                     }
3853                     else { /* Here, not in a pattern.  Convert the name to a
3854                             * string. */
3855
3856                         if (PL_lex_inwhat == OP_TRANS) {
3857                             str = SvPV_const(res, len);
3858                             if (len > ((SvUTF8(res))
3859                                        ? UTF8SKIP(str)
3860                                        : 1U))
3861                             {
3862                                 yyerror(Perl_form(aTHX_
3863                                     "%.*s must not be a named sequence"
3864                                     " in transliteration operator",
3865                                         /*  +1 to include the "}" */
3866                                     (int) (e + 1 - start), start));
3867                                 *d++ = '\0';
3868                                 goto end_backslash_N;
3869                             }
3870
3871                             if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
3872                                 has_above_latin1 = TRUE;
3873                             }
3874
3875                         }
3876                         else if (! SvUTF8(res)) {
3877                             /* Make sure \N{} return is UTF-8.  This is because
3878                              * \N{} implies Unicode semantics, and scalars have
3879                              * to be in utf8 to guarantee those semantics; but
3880                              * not needed in tr/// */
3881                             sv_utf8_upgrade_flags(res, 0);
3882                             str = SvPV_const(res, len);
3883                         }
3884
3885                          /* Upgrade destination to be utf8 if this new
3886                           * component is */
3887                         if (! has_utf8 && SvUTF8(res)) {
3888                             /* See Note on sizing above.  */
3889                             const STRLEN extra = len + (send - s) + 1;
3890
3891                             SvCUR_set(sv, d - SvPVX_const(sv));
3892                             SvPOK_on(sv);
3893                             *d = '\0';
3894
3895                             if (utf8_variant_count == 0) {
3896                                 SvUTF8_on(sv);
3897                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3898                             }
3899                             else {
3900                                 sv_utf8_upgrade_flags_grow(sv,
3901                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3902                                                 extra);
3903                                 d = SvPVX(sv) + SvCUR(sv);
3904                             }
3905                             has_utf8 = TRUE;
3906                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3907
3908                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3909                              * set correctly here). */
3910                             const STRLEN extra = len + (send - e) + 1;
3911                             const STRLEN off = d - SvPVX_const(sv);
3912                             d = off + SvGROW(sv, off + extra);
3913                         }
3914                         Copy(str, d, len, char);
3915                         d += len;
3916                     }
3917
3918                     SvREFCNT_dec(res);
3919
3920                 } /* End \N{NAME} */
3921
3922               end_backslash_N:
3923 #ifdef EBCDIC
3924                 backslash_N++; /* \N{} is defined to be Unicode */
3925 #endif
3926                 s = e + 1;  /* Point to just after the '}' */
3927                 continue;
3928
3929             /* \c is a control character */
3930             case 'c':
3931                 s++;
3932                 if (s < send) {
3933                     *d++ = grok_bslash_c(*s, 1);
3934                 }
3935                 else {
3936                     yyerror("Missing control char name in \\c");
3937                     yyquit();   /* Are at end of input, no sense continuing */
3938                 }
3939 #ifdef EBCDIC
3940                 non_portable_endpoint++;
3941 #endif
3942                 break;
3943
3944             /* printf-style backslashes, formfeeds, newlines, etc */
3945             case 'b':
3946                 *d++ = '\b';
3947                 break;
3948             case 'n':
3949                 *d++ = '\n';
3950                 break;
3951             case 'r':
3952                 *d++ = '\r';
3953                 break;
3954             case 'f':
3955                 *d++ = '\f';
3956                 break;
3957             case 't':
3958                 *d++ = '\t';
3959                 break;
3960             case 'e':
3961                 *d++ = ESC_NATIVE;
3962                 break;
3963             case 'a':
3964                 *d++ = '\a';
3965                 break;
3966             } /* end switch */
3967
3968             s++;
3969             continue;
3970         } /* end if (backslash) */
3971
3972     default_action:
3973         /* Just copy the input to the output, though we may have to convert
3974          * to/from UTF-8.
3975          *
3976          * If the input has the same representation in UTF-8 as not, it will be
3977          * a single byte, and we don't care about UTF8ness; just copy the byte */
3978         if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
3979             *d++ = *s++;
3980         }
3981         else if (! this_utf8 && ! has_utf8) {
3982             /* If neither source nor output is UTF-8, is also a single byte,
3983              * just copy it; but this byte counts should we later have to
3984              * convert to UTF-8 */
3985             *d++ = *s++;
3986             utf8_variant_count++;
3987         }
3988         else if (this_utf8 && has_utf8) {   /* Both UTF-8, can just copy */
3989             const STRLEN len = UTF8SKIP(s);
3990
3991             /* We expect the source to have already been checked for
3992              * malformedness */
3993             assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
3994
3995             Copy(s, d, len, U8);
3996             d += len;
3997             s += len;
3998         }
3999         else { /* UTF8ness matters and doesn't match, need to convert */
4000             STRLEN len = 1;
4001             const UV nextuv   = (this_utf8)
4002                                 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
4003                                 : (UV) ((U8) *s);
4004             STRLEN need = UVCHR_SKIP(nextuv);
4005
4006             if (!has_utf8) {
4007                 SvCUR_set(sv, d - SvPVX_const(sv));
4008                 SvPOK_on(sv);
4009                 *d = '\0';
4010
4011                 /* See Note on sizing above. */
4012                 need += (STRLEN)(send - s) + 1;
4013
4014                 if (utf8_variant_count == 0) {
4015                     SvUTF8_on(sv);
4016                     d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4017                 }
4018                 else {
4019                     sv_utf8_upgrade_flags_grow(sv,
4020                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4021                                                need);
4022                     d = SvPVX(sv) + SvCUR(sv);
4023                 }
4024                 has_utf8 = TRUE;
4025             } else if (need > len) {
4026                 /* encoded value larger than old, may need extra space (NOTE:
4027                  * SvCUR() is not set correctly here).   See Note on sizing
4028                  * above.  */
4029                 const STRLEN extra = need + (send - s) + 1;
4030                 const STRLEN off = d - SvPVX_const(sv);
4031                 d = off + SvGROW(sv, off + extra);
4032             }
4033             s += len;
4034
4035             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
4036         }
4037     } /* while loop to process each character */
4038
4039     /* terminate the string and set up the sv */
4040     *d = '\0';
4041     SvCUR_set(sv, d - SvPVX_const(sv));
4042     if (SvCUR(sv) >= SvLEN(sv))
4043         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %" UVuf
4044                    " >= %" UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
4045
4046     SvPOK_on(sv);
4047     if (has_utf8) {
4048         SvUTF8_on(sv);
4049         if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
4050             PL_parser->lex_sub_op->op_private |=
4051                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
4052         }
4053     }
4054
4055     /* shrink the sv if we allocated more than we used */
4056     if (SvCUR(sv) + 5 < SvLEN(sv)) {
4057         SvPV_shrink_to_cur(sv);
4058     }
4059
4060     /* return the substring (via pl_yylval) only if we parsed anything */
4061     if (s > start) {
4062         char *s2 = start;
4063         for (; s2 < s; s2++) {
4064             if (*s2 == '\n')
4065                 COPLINE_INC_WITH_HERELINES;
4066         }
4067         SvREFCNT_inc_simple_void_NN(sv);
4068         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4069             && ! PL_parser->lex_re_reparsing)
4070         {
4071             const char *const key = PL_lex_inpat ? "qr" : "q";
4072             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4073             const char *type;
4074             STRLEN typelen;
4075
4076             if (PL_lex_inwhat == OP_TRANS) {
4077                 type = "tr";
4078                 typelen = 2;
4079             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4080                 type = "s";
4081                 typelen = 1;
4082             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4083                 type = "q";
4084                 typelen = 1;
4085             } else  {
4086                 type = "qq";
4087                 typelen = 2;
4088             }
4089
4090             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4091                                 type, typelen);
4092         }
4093         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4094     }
4095     LEAVE_with_name("scan_const");
4096     return s;
4097 }
4098
4099 /* S_intuit_more
4100  * Returns TRUE if there's more to the expression (e.g., a subscript),
4101  * FALSE otherwise.
4102  *
4103  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4104  *
4105  * ->[ and ->{ return TRUE
4106  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4107  * { and [ outside a pattern are always subscripts, so return TRUE
4108  * if we're outside a pattern and it's not { or [, then return FALSE
4109  * if we're in a pattern and the first char is a {
4110  *   {4,5} (any digits around the comma) returns FALSE
4111  * if we're in a pattern and the first char is a [
4112  *   [] returns FALSE
4113  *   [SOMETHING] has a funky algorithm to decide whether it's a
4114  *      character class or not.  It has to deal with things like
4115  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4116  * anything else returns TRUE
4117  */
4118
4119 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4120
4121 STATIC int
4122 S_intuit_more(pTHX_ char *s)
4123 {
4124     PERL_ARGS_ASSERT_INTUIT_MORE;
4125
4126     if (PL_lex_brackets)
4127         return TRUE;
4128     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4129         return TRUE;
4130     if (*s == '-' && s[1] == '>'
4131      && FEATURE_POSTDEREF_QQ_IS_ENABLED
4132      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4133         ||(s[2] == '@' && strchr("*[{",s[3])) ))
4134         return TRUE;
4135     if (*s != '{' && *s != '[')
4136         return FALSE;
4137     if (!PL_lex_inpat)
4138         return TRUE;
4139
4140     /* In a pattern, so maybe we have {n,m}. */
4141     if (*s == '{') {
4142         if (regcurly(s)) {
4143             return FALSE;
4144         }
4145         return TRUE;
4146     }
4147
4148     /* On the other hand, maybe we have a character class */
4149
4150     s++;
4151     if (*s == ']' || *s == '^')
4152         return FALSE;
4153     else {
4154         /* this is terrifying, and it works */
4155         int weight;
4156         char seen[256];
4157         const char * const send = strchr(s,']');
4158         unsigned char un_char, last_un_char;
4159         char tmpbuf[sizeof PL_tokenbuf * 4];
4160
4161         if (!send)              /* has to be an expression */
4162             return TRUE;
4163         weight = 2;             /* let's weigh the evidence */
4164
4165         if (*s == '$')
4166             weight -= 3;
4167         else if (isDIGIT(*s)) {
4168             if (s[1] != ']') {
4169                 if (isDIGIT(s[1]) && s[2] == ']')
4170                     weight -= 10;
4171             }
4172             else
4173                 weight -= 100;
4174         }
4175         Zero(seen,256,char);
4176         un_char = 255;
4177         for (; s < send; s++) {
4178             last_un_char = un_char;
4179             un_char = (unsigned char)*s;
4180             switch (*s) {
4181             case '@':
4182             case '&':
4183             case '$':
4184                 weight -= seen[un_char] * 10;
4185                 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4186                     int len;
4187                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4188                     len = (int)strlen(tmpbuf);
4189                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4190                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
4191                         weight -= 100;
4192                     else
4193                         weight -= 10;
4194                 }
4195                 else if (*s == '$'
4196                          && s[1]
4197                          && strchr("[#!%*<>()-=",s[1]))
4198                 {
4199                     if (/*{*/ strchr("])} =",s[2]))
4200                         weight -= 10;
4201                     else
4202                         weight -= 1;
4203                 }
4204                 break;
4205             case '\\':
4206                 un_char = 254;
4207                 if (s[1]) {
4208                     if (strchr("wds]",s[1]))
4209                         weight += 100;
4210                     else if (seen[(U8)'\''] || seen[(U8)'"'])
4211                         weight += 1;
4212                     else if (strchr("rnftbxcav",s[1]))
4213                         weight += 40;
4214                     else if (isDIGIT(s[1])) {
4215                         weight += 40;
4216                         while (s[1] && isDIGIT(s[1]))
4217                             s++;
4218                     }
4219                 }
4220                 else
4221                     weight += 100;
4222                 break;
4223             case '-':
4224                 if (s[1] == '\\')
4225                     weight += 50;
4226                 if (strchr("aA01! ",last_un_char))
4227                     weight += 30;
4228                 if (strchr("zZ79~",s[1]))
4229                     weight += 30;
4230                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4231                     weight -= 5;        /* cope with negative subscript */
4232                 break;
4233             default:
4234                 if (!isWORDCHAR(last_un_char)
4235                     && !(last_un_char == '$' || last_un_char == '@'
4236                          || last_un_char == '&')
4237                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4238                     char *d = s;
4239                     while (isALPHA(*s))
4240                         s++;
4241                     if (keyword(d, s - d, 0))
4242                         weight -= 150;
4243                 }
4244                 if (un_char == last_un_char + 1)
4245                     weight += 5;
4246                 weight -= seen[un_char];
4247                 break;
4248             }
4249             seen[un_char]++;
4250         }
4251         if (weight >= 0)        /* probably a character class */
4252             return FALSE;
4253     }
4254
4255     return TRUE;
4256 }
4257
4258 /*
4259  * S_intuit_method
4260  *
4261  * Does all the checking to disambiguate
4262  *   foo bar
4263  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4264  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4265  *
4266  * First argument is the stuff after the first token, e.g. "bar".
4267  *
4268  * Not a method if foo is a filehandle.
4269  * Not a method if foo is a subroutine prototyped to take a filehandle.
4270  * Not a method if it's really "Foo $bar"
4271  * Method if it's "foo $bar"
4272  * Not a method if it's really "print foo $bar"
4273  * Method if it's really "foo package::" (interpreted as package->foo)
4274  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4275  * Not a method if bar is a filehandle or package, but is quoted with
4276  *   =>
4277  */
4278
4279 STATIC int
4280 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4281 {
4282     char *s = start + (*start == '$');
4283     char tmpbuf[sizeof PL_tokenbuf];
4284     STRLEN len;
4285     GV* indirgv;
4286         /* Mustn't actually add anything to a symbol table.
4287            But also don't want to "initialise" any placeholder
4288            constants that might already be there into full
4289            blown PVGVs with attached PVCV.  */
4290     GV * const gv =
4291         ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4292
4293     PERL_ARGS_ASSERT_INTUIT_METHOD;
4294
4295     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4296             return 0;
4297     if (cv && SvPOK(cv)) {
4298         const char *proto = CvPROTO(cv);
4299         if (proto) {
4300             while (*proto && (isSPACE(*proto) || *proto == ';'))
4301                 proto++;
4302             if (*proto == '*')
4303                 return 0;
4304         }
4305     }
4306
4307     if (*start == '$') {
4308         SSize_t start_off = start - SvPVX(PL_linestr);
4309         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4310             || isUPPER(*PL_tokenbuf))
4311             return 0;
4312         /* this could be $# */
4313         if (isSPACE(*s))
4314             s = skipspace(s);
4315         PL_bufptr = SvPVX(PL_linestr) + start_off;
4316         PL_expect = XREF;
4317         return *s == '(' ? FUNCMETH : METHOD;
4318     }
4319
4320     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4321     /* start is the beginning of the possible filehandle/object,
4322      * and s is the end of it
4323      * tmpbuf is a copy of it (but with single quotes as double colons)
4324      */
4325
4326     if (!keyword(tmpbuf, len, 0)) {
4327         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4328             len -= 2;
4329             tmpbuf[len] = '\0';
4330             goto bare_package;
4331         }
4332         indirgv = gv_fetchpvn_flags(tmpbuf, len,
4333                                     GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4334                                     SVt_PVCV);
4335         if (indirgv && SvTYPE(indirgv) != SVt_NULL
4336          && (!isGV(indirgv) || GvCVu(indirgv)))
4337             return 0;
4338         /* filehandle or package name makes it a method */
4339         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4340             s = skipspace(s);
4341             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4342                 return 0;       /* no assumptions -- "=>" quotes bareword */
4343       bare_package:
4344             NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4345                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4346             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4347             PL_expect = XTERM;
4348             force_next(BAREWORD);
4349             PL_bufptr = s;
4350             return *s == '(' ? FUNCMETH : METHOD;
4351         }
4352     }
4353     return 0;
4354 }
4355
4356 /* Encoded script support. filter_add() effectively inserts a
4357  * 'pre-processing' function into the current source input stream.
4358  * Note that the filter function only applies to the current source file
4359  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4360  *
4361  * The datasv parameter (which may be NULL) can be used to pass
4362  * private data to this instance of the filter. The filter function
4363  * can recover the SV using the FILTER_DATA macro and use it to
4364  * store private buffers and state information.
4365  *
4366  * The supplied datasv parameter is upgraded to a PVIO type
4367  * and the IoDIRP/IoANY field is used to store the function pointer,
4368  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4369  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4370  * private use must be set using malloc'd pointers.
4371  */
4372
4373 SV *
4374 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4375 {
4376     if (!funcp)
4377         return NULL;
4378
4379     if (!PL_parser)
4380         return NULL;
4381
4382     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4383         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4384
4385     if (!PL_rsfp_filters)
4386         PL_rsfp_filters = newAV();
4387     if (!datasv)
4388         datasv = newSV(0);
4389     SvUPGRADE(datasv, SVt_PVIO);
4390     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4391     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4392     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4393                           FPTR2DPTR(void *, IoANY(datasv)),
4394                           SvPV_nolen(datasv)));
4395     av_unshift(PL_rsfp_filters, 1);
4396     av_store(PL_rsfp_filters, 0, datasv) ;
4397     if (
4398         !PL_parser->filtered
4399      && PL_parser->lex_flags & LEX_EVALBYTES
4400      && PL_bufptr < PL_bufend
4401     ) {
4402         const char *s = PL_bufptr;
4403         while (s < PL_bufend) {
4404             if (*s == '\n') {
4405                 SV *linestr = PL_parser->linestr;
4406                 char *buf = SvPVX(linestr);
4407                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4408                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4409                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4410                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4411                 STRLEN const last_uni_pos =
4412                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4413                 STRLEN const last_lop_pos =
4414                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4415                 av_push(PL_rsfp_filters, linestr);
4416                 PL_parser->linestr =
4417                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4418                 buf = SvPVX(PL_parser->linestr);
4419                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4420                 PL_parser->bufptr = buf + bufptr_pos;
4421                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4422                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4423                 PL_parser->linestart = buf + linestart_pos;
4424                 if (PL_parser->last_uni)
4425                     PL_parser->last_uni = buf + last_uni_pos;
4426                 if (PL_parser->last_lop)
4427                     PL_parser->last_lop = buf + last_lop_pos;
4428                 SvLEN(linestr) = SvCUR(linestr);
4429                 SvCUR(linestr) = s-SvPVX(linestr);
4430                 PL_parser->filtered = 1;
4431                 break;
4432             }
4433             s++;
4434         }
4435     }
4436     return(datasv);
4437 }
4438
4439
4440 /* Delete most recently added instance of this filter function. */
4441 void
4442 Perl_filter_del(pTHX_ filter_t funcp)
4443 {
4444     SV *datasv;
4445
4446     PERL_ARGS_ASSERT_FILTER_DEL;
4447
4448 #ifdef DEBUGGING
4449     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4450                           FPTR2DPTR(void*, funcp)));
4451 #endif
4452     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4453         return;
4454     /* if filter is on top of stack (usual case) just pop it off */
4455     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4456     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4457         sv_free(av_pop(PL_rsfp_filters));
4458
4459         return;
4460     }
4461     /* we need to search for the correct entry and clear it     */
4462     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4463 }
4464
4465
4466 /* Invoke the idxth filter function for the current rsfp.        */
4467 /* maxlen 0 = read one text line */
4468 I32
4469 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4470 {
4471     filter_t funcp;
4472     SV *datasv = NULL;
4473     /* This API is bad. It should have been using unsigned int for maxlen.
4474        Not sure if we want to change the API, but if not we should sanity
4475        check the value here.  */
4476     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4477
4478     PERL_ARGS_ASSERT_FILTER_READ;
4479
4480     if (!PL_parser || !PL_rsfp_filters)
4481         return -1;
4482     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4483         /* Provide a default input filter to make life easy.    */
4484         /* Note that we append to the line. This is handy.      */
4485         DEBUG_P(PerlIO_printf(Perl_debug_log,
4486                               "filter_read %d: from rsfp\n", idx));
4487         if (correct_length) {
4488             /* Want a block */
4489             int len ;
4490             const int old_len = SvCUR(buf_sv);
4491
4492             /* ensure buf_sv is large enough */
4493             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4494             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4495                                    correct_length)) <= 0) {
4496                 if (PerlIO_error(PL_rsfp))
4497                     return -1;          /* error */
4498                 else
4499                     return 0 ;          /* end of file */
4500             }
4501             SvCUR_set(buf_sv, old_len + len) ;
4502             SvPVX(buf_sv)[old_len + len] = '\0';
4503         } else {
4504             /* Want a line */
4505             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4506                 if (PerlIO_error(PL_rsfp))
4507                     return -1;          /* error */
4508                 else
4509                     return 0 ;          /* end of file */
4510             }
4511         }
4512         return SvCUR(buf_sv);
4513     }
4514     /* Skip this filter slot if filter has been deleted */
4515     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4516         DEBUG_P(PerlIO_printf(Perl_debug_log,
4517                               "filter_read %d: skipped (filter deleted)\n",
4518                               idx));
4519         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4520     }
4521     if (SvTYPE(datasv) != SVt_PVIO) {
4522         if (correct_length) {
4523             /* Want a block */
4524             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4525             if (!remainder) return 0; /* eof */
4526             if (correct_length > remainder) correct_length = remainder;
4527             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4528             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4529         } else {
4530             /* Want a line */
4531             const char *s = SvEND(datasv);
4532             const char *send = SvPVX(datasv) + SvLEN(datasv);
4533             while (s < send) {
4534                 if (*s == '\n') {
4535                     s++;
4536                     break;
4537                 }
4538                 s++;
4539             }
4540             if (s == send) return 0; /* eof */
4541             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4542             SvCUR_set(datasv, s-SvPVX(datasv));
4543         }
4544         return SvCUR(buf_sv);
4545     }
4546     /* Get function pointer hidden within datasv        */
4547     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4548     DEBUG_P(PerlIO_printf(Perl_debug_log,
4549                           "filter_read %d: via function %p (%s)\n",
4550                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4551     /* Call function. The function is expected to       */
4552     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4553     /* Return: <0:error, =0:eof, >0:not eof             */
4554     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4555 }
4556
4557 STATIC char *
4558 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4559 {
4560     PERL_ARGS_ASSERT_FILTER_GETS;
4561
4562 #ifdef PERL_CR_FILTER
4563     if (!PL_rsfp_filters) {
4564         filter_add(S_cr_textfilter,NULL);
4565     }
4566 #endif
4567     if (PL_rsfp_filters) {
4568         if (!append)
4569             SvCUR_set(sv, 0);   /* start with empty line        */
4570         if (FILTER_READ(0, sv, 0) > 0)
4571             return ( SvPVX(sv) ) ;
4572         else
4573             return NULL ;
4574     }
4575     else
4576         return (sv_gets(sv, PL_rsfp, append));
4577 }
4578
4579 STATIC HV *
4580 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4581 {
4582     GV *gv;
4583
4584     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4585
4586     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4587         return PL_curstash;
4588
4589     if (len > 2
4590         && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4591         && (gv = gv_fetchpvn_flags(pkgname,
4592                                    len,
4593                                    ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4594     {
4595         return GvHV(gv);                        /* Foo:: */
4596     }
4597
4598     /* use constant CLASS => 'MyClass' */
4599     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4600     if (gv && GvCV(gv)) {
4601         SV * const sv = cv_const_sv(GvCV(gv));
4602         if (sv)
4603             return gv_stashsv(sv, 0);
4604     }
4605
4606     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4607 }
4608
4609
4610 STATIC char *
4611 S_tokenize_use(pTHX_ int is_use, char *s) {
4612     PERL_ARGS_ASSERT_TOKENIZE_USE;
4613
4614     if (PL_expect != XSTATE)
4615         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4616                     is_use ? "use" : "no"));
4617     PL_expect = XTERM;
4618     s = skipspace(s);
4619     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4620         s = force_version(s, TRUE);
4621         if (*s == ';' || *s == '}'
4622                 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4623             NEXTVAL_NEXTTOKE.opval = NULL;
4624             force_next(BAREWORD);
4625         }
4626         else if (*s == 'v') {
4627             s = force_word(s,BAREWORD,FALSE,TRUE);
4628             s = force_version(s, FALSE);
4629         }
4630     }
4631     else {
4632         s = force_word(s,BAREWORD,FALSE,TRUE);
4633         s = force_version(s, FALSE);
4634     }
4635     pl_yylval.ival = is_use;
4636     return s;
4637 }
4638 #ifdef DEBUGGING
4639     static const char* const exp_name[] =
4640         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4641           "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4642           "SIGVAR", "TERMORDORDOR"
4643         };
4644 #endif
4645
4646 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4647 STATIC bool
4648 S_word_takes_any_delimiter(char *p, STRLEN len)
4649 {
4650     return (len == 1 && strchr("msyq", p[0]))
4651             || (len == 2
4652                 && ((p[0] == 't' && p[1] == 'r')
4653                     || (p[0] == 'q' && strchr("qwxr", p[1]))));
4654 }
4655
4656 static void
4657 S_check_scalar_slice(pTHX_ char *s)
4658 {
4659     s++;
4660     while (SPACE_OR_TAB(*s)) s++;
4661     if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4662                                                              PL_bufend,
4663                                                              UTF))
4664     {
4665         return;
4666     }
4667     while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4668            || (*s && strchr(" \t$#+-'\"", *s)))
4669     {
4670         s += UTF ? UTF8SKIP(s) : 1;
4671     }
4672     if (*s == '}' || *s == ']')
4673         pl_yylval.ival = OPpSLICEWARNING;
4674 }
4675
4676 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4677 static void
4678 S_lex_token_boundary(pTHX)
4679 {
4680     PL_oldoldbufptr = PL_oldbufptr;
4681     PL_oldbufptr = PL_bufptr;
4682 }
4683
4684 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
4685 static char *
4686 S_vcs_conflict_marker(pTHX_ char *s)
4687 {
4688     lex_token_boundary();
4689     PL_bufptr = s;
4690     yyerror("Version control conflict marker");
4691     while (s < PL_bufend && *s != '\n')
4692         s++;
4693     return s;
4694 }
4695
4696 /*
4697   yylex
4698
4699   Works out what to call the token just pulled out of the input
4700   stream.  The yacc parser takes care of taking the ops we return and
4701   stitching them into a tree.
4702
4703   Returns:
4704     The type of the next token
4705
4706   Structure:
4707       Check if we have already built the token; if so, use it.
4708       Switch based on the current state:
4709           - if we have a case modifier in a string, deal with that
4710           - handle other cases of interpolation inside a string
4711           - scan the next line if we are inside a format
4712       In the normal state, switch on the next character:
4713           - default:
4714             if alphabetic, go to key lookup
4715             unrecognized character - croak
4716           - 0/4/26: handle end-of-line or EOF
4717           - cases for whitespace
4718           - \n and #: handle comments and line numbers
4719           - various operators, brackets and sigils
4720           - numbers
4721           - quotes
4722           - 'v': vstrings (or go to key lookup)
4723           - 'x' repetition operator (or go to key lookup)
4724           - other ASCII alphanumerics (key lookup begins here):
4725               word before => ?
4726               keyword plugin
4727               scan built-in keyword (but do nothing with it yet)
4728               check for statement label
4729               check for lexical subs
4730                   goto just_a_word if there is one
4731               see whether built-in keyword is overridden
4732               switch on keyword number:
4733                   - default: just_a_word:
4734                       not a built-in keyword; handle bareword lookup
4735                       disambiguate between method and sub call
4736                       fall back to bareword
4737                   - cases for built-in keywords
4738 */
4739
4740
4741 int
4742 Perl_yylex(pTHX)
4743 {
4744     dVAR;
4745     char *s = PL_bufptr;
4746     char *d;
4747     STRLEN len;
4748     bool bof = FALSE;
4749     const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4750     U8 formbrack = 0;
4751     U32 fake_eof = 0;
4752
4753     /* orig_keyword, gvp, and gv are initialized here because
4754      * jump to the label just_a_word_zero can bypass their
4755      * initialization later. */
4756     I32 orig_keyword = 0;
4757     GV *gv = NULL;
4758     GV **gvp = NULL;
4759
4760     if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
4761         const U8* first_bad_char_loc;
4762         if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
4763                                                         PL_bufend - PL_bufptr,
4764                                                         &first_bad_char_loc)))
4765         {
4766             _force_out_malformed_utf8_message(first_bad_char_loc,
4767                                               (U8 *) PL_bufend,
4768                                               0,
4769                                               1 /* 1 means die */ );
4770             NOT_REACHED; /* NOTREACHED */
4771         }
4772         PL_parser->recheck_utf8_validity = FALSE;
4773     }
4774     DEBUG_T( {
4775         SV* tmp = newSVpvs("");
4776         PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
4777             (IV)CopLINE(PL_curcop),
4778             lex_state_names[PL_lex_state],
4779             exp_name[PL_expect],
4780             pv_display(tmp, s, strlen(s), 0, 60));
4781         SvREFCNT_dec(tmp);
4782     } );
4783
4784     /* when we've already built the next token, just pull it out of the queue */
4785     if (PL_nexttoke) {
4786         PL_nexttoke--;
4787         pl_yylval = PL_nextval[PL_nexttoke];
4788         {
4789             I32 next_type;
4790             next_type = PL_nexttype[PL_nexttoke];
4791             if (next_type & (7<<24)) {
4792                 if (next_type & (1<<24)) {
4793                     if (PL_lex_brackets > 100)
4794                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4795                     PL_lex_brackstack[PL_lex_brackets++] =
4796                         (char) ((next_type >> 16) & 0xff);
4797                 }
4798                 if (next_type & (2<<24))
4799                     PL_lex_allbrackets++;
4800                 if (next_type & (4<<24))
4801                     PL_lex_allbrackets--;
4802                 next_type &= 0xffff;
4803             }
4804             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4805         }
4806     }
4807
4808     switch (PL_lex_state) {
4809     case LEX_NORMAL:
4810     case LEX_INTERPNORMAL:
4811         break;
4812
4813     /* interpolated case modifiers like \L \U, including \Q and \E.
4814        when we get here, PL_bufptr is at the \
4815     */
4816     case LEX_INTERPCASEMOD:
4817 #ifdef DEBUGGING
4818         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4819             Perl_croak(aTHX_
4820                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4821                        PL_bufptr, PL_bufend, *PL_bufptr);
4822 #endif
4823         /* handle \E or end of string */
4824         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4825             /* if at a \E */
4826             if (PL_lex_casemods) {
4827                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4828                 PL_lex_casestack[PL_lex_casemods] = '\0';
4829
4830                 if (PL_bufptr != PL_bufend
4831                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4832                         || oldmod == 'F')) {
4833                     PL_bufptr += 2;
4834                     PL_lex_state = LEX_INTERPCONCAT;
4835                 }
4836                 PL_lex_allbrackets--;
4837                 return REPORT(')');
4838             }
4839             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4840                /* Got an unpaired \E */
4841                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4842                         "Useless use of \\E");
4843             }
4844             if (PL_bufptr != PL_bufend)
4845                 PL_bufptr += 2;
4846             PL_lex_state = LEX_INTERPCONCAT;
4847             return yylex();
4848         }
4849         else {
4850             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4851               "### Saw case modifier\n"); });
4852             s = PL_bufptr + 1;
4853             if (s[1] == '\\' && s[2] == 'E') {
4854                 PL_bufptr = s + 3;
4855                 PL_lex_state = LEX_INTERPCONCAT;
4856                 return yylex();
4857             }
4858             else {
4859                 I32 tmp;
4860                 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4861                     tmp = *s, *s = s[2], s[2] = (char)tmp;      /* misordered... */
4862                 if ((*s == 'L' || *s == 'U' || *s == 'F')
4863                     && (strpbrk(PL_lex_casestack, "LUF")))
4864                 {
4865                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4866                     PL_lex_allbrackets--;
4867                     return REPORT(')');
4868                 }
4869                 if (PL_lex_casemods > 10)
4870                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4871                 PL_lex_casestack[PL_lex_casemods++] = *s;
4872                 PL_lex_casestack[PL_lex_casemods] = '\0';
4873                 PL_lex_state = LEX_INTERPCONCAT;
4874                 NEXTVAL_NEXTTOKE.ival = 0;
4875                 force_next((2<<24)|'(');
4876                 if (*s == 'l')
4877                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4878                 else if (*s == 'u')
4879                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4880                 else if (*s == 'L')
4881                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4882                 else if (*s == 'U')
4883                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4884                 else if (*s == 'Q')
4885                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4886                 else if (*s == 'F')
4887                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4888                 else
4889                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4890                 PL_bufptr = s + 1;
4891             }
4892             force_next(FUNC);
4893             if (PL_lex_starts) {
4894                 s = PL_bufptr;
4895                 PL_lex_starts = 0;
4896                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4897                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4898                     TOKEN(',');
4899                 else
4900                     AopNOASSIGN(OP_CONCAT);
4901             }
4902             else
4903                 return yylex();
4904         }
4905
4906     case LEX_INTERPPUSH:
4907         return REPORT(sublex_push());
4908
4909     case LEX_INTERPSTART:
4910         if (PL_bufptr == PL_bufend)
4911             return REPORT(sublex_done());
4912         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4913               "### Interpolated variable\n"); });
4914         PL_expect = XTERM;
4915         /* for /@a/, we leave the joining for the regex engine to do
4916          * (unless we're within \Q etc) */
4917         PL_lex_dojoin = (*PL_bufptr == '@'
4918                             && (!PL_lex_inpat || PL_lex_casemods));
4919         PL_lex_state = LEX_INTERPNORMAL;
4920         if (PL_lex_dojoin) {
4921             NEXTVAL_NEXTTOKE.ival = 0;
4922             force_next(',');
4923             force_ident("\"", '$');
4924             NEXTVAL_NEXTTOKE.ival = 0;
4925             force_next('$');
4926             NEXTVAL_NEXTTOKE.ival = 0;
4927             force_next((2<<24)|'(');
4928             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4929             force_next(FUNC);
4930         }
4931         /* Convert (?{...}) and friends to 'do {...}' */
4932         if (PL_lex_inpat && *PL_bufptr == '(') {
4933             PL_parser->lex_shared->re_eval_start = PL_bufptr;
4934             PL_bufptr += 2;
4935             if (*PL_bufptr != '{')
4936                 PL_bufptr++;
4937             PL_expect = XTERMBLOCK;
4938             force_next(DO);
4939         }
4940
4941         if (PL_lex_starts++) {
4942             s = PL_bufptr;
4943             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4944             if (!PL_lex_casemods && PL_lex_inpat)
4945                 TOKEN(',');
4946             else
4947                 AopNOASSIGN(OP_CONCAT);
4948         }
4949         return yylex();
4950
4951     case LEX_INTERPENDMAYBE:
4952         if (intuit_more(PL_bufptr)) {
4953             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4954             break;
4955         }
4956         /* FALLTHROUGH */
4957
4958     case LEX_INTERPEND:
4959         if (PL_lex_dojoin) {
4960             const U8 dojoin_was = PL_lex_dojoin;
4961             PL_lex_dojoin = FALSE;
4962             PL_lex_state = LEX_INTERPCONCAT;
4963             PL_lex_allbrackets--;
4964             return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN);
4965         }
4966         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4967             && SvEVALED(PL_lex_repl))
4968         {
4969             if (PL_bufptr != PL_bufend)
4970                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4971             PL_lex_repl = NULL;
4972         }
4973         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
4974            re_eval_str.  If the here-doc body’s length equals the previous
4975            value of re_eval_start, re_eval_start will now be null.  So
4976            check re_eval_str as well. */
4977         if (PL_parser->lex_shared->re_eval_start
4978          || PL_parser->lex_shared->re_eval_str) {
4979             SV *sv;
4980             if (*PL_bufptr != ')')
4981                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4982             PL_bufptr++;
4983             /* having compiled a (?{..}) expression, return the original
4984              * text too, as a const */
4985             if (PL_parser->lex_shared->re_eval_str) {
4986                 sv = PL_parser->lex_shared->re_eval_str;
4987                 PL_parser->lex_shared->re_eval_str = NULL;
4988                 SvCUR_set(sv,
4989                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4990                 SvPV_shrink_to_cur(sv);
4991             }
4992             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4993                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4994             NEXTVAL_NEXTTOKE.opval =
4995                     newSVOP(OP_CONST, 0,
4996                                  sv);
4997             force_next(THING);
4998             PL_parser->lex_shared->re_eval_start = NULL;
4999             PL_expect = XTERM;
5000             return REPORT(',');
5001         }
5002
5003         /* FALLTHROUGH */
5004     case LEX_INTERPCONCAT:
5005 #ifdef DEBUGGING
5006         if (PL_lex_brackets)
5007             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5008                        (long) PL_lex_brackets);
5009 #endif
5010         if (PL_bufptr == PL_bufend)
5011             return REPORT(sublex_done());
5012
5013         /* m'foo' still needs to be parsed for possible (?{...}) */
5014         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5015             SV *sv = newSVsv(PL_linestr);
5016             sv = tokeq(sv);
5017             pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
5018             s = PL_bufend;
5019         }
5020         else {
5021             int save_error_count = PL_error_count;
5022
5023             s = scan_const(PL_bufptr);
5024
5025             /* Set flag if this was a pattern and there were errors.  op.c will
5026              * refuse to compile a pattern with this flag set.  Otherwise, we
5027              * could get segfaults, etc. */
5028             if (PL_lex_inpat && PL_error_count > save_error_count) {
5029                 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
5030             }
5031             if (*s == '\\')
5032                 PL_lex_state = LEX_INTERPCASEMOD;
5033             else
5034                 PL_lex_state = LEX_INTERPSTART;
5035         }
5036
5037         if (s != PL_bufptr) {
5038             NEXTVAL_NEXTTOKE = pl_yylval;
5039             PL_expect = XTERM;
5040             force_next(THING);
5041             if (PL_lex_starts++) {
5042                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5043                 if (!PL_lex_casemods && PL_lex_inpat)
5044                     TOKEN(',');
5045                 else
5046                     AopNOASSIGN(OP_CONCAT);
5047             }
5048             else {
5049                 PL_bufptr = s;
5050                 return yylex();
5051             }
5052         }
5053
5054         return yylex();
5055     case LEX_FORMLINE:
5056         s = scan_formline(PL_bufptr);
5057         if (!PL_lex_formbrack)
5058         {
5059             formbrack = 1;
5060             goto rightbracket;
5061         }
5062         PL_bufptr = s;
5063         return yylex();
5064     }
5065
5066     /* We really do *not* want PL_linestr ever becoming a COW. */
5067     assert (!SvIsCOW(PL_linestr));
5068     s = PL_bufptr;
5069     PL_oldoldbufptr = PL_oldbufptr;
5070     PL_oldbufptr = s;
5071     PL_parser->saw_infix_sigil = 0;
5072
5073     if (PL_in_my == KEY_sigvar) {
5074         /* we expect the sigil and optional var name part of a
5075          * signature element here. Since a '$' is not necessarily
5076          * followed by a var name, handle it specially here; the general
5077          * yylex code would otherwise try to interpret whatever follows
5078          * as a var; e.g. ($, ...) would be seen as the var '$,'
5079          */
5080
5081         U8 sigil;
5082
5083         s = skipspace(s);
5084         sigil = *s++;
5085         PL_bufptr = s; /* for error reporting */
5086         switch (sigil) {
5087         case '$':
5088         case '@':
5089         case '%':
5090             /* spot stuff that looks like an prototype */
5091             if (strchr("$:@%&*;\\[]", *s)) {
5092                 yyerror("Illegal character following sigil in a subroutine signature");
5093                 break;
5094             }
5095             /* '$#' is banned, while '$ # comment' isn't */
5096             if (*s == '#') {
5097                 yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
5098                 break;
5099             }
5100             s = skipspace(s);
5101             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5102                 char *dest = PL_tokenbuf + 1;
5103                 /* read var name, including sigil, into PL_tokenbuf */
5104                 PL_tokenbuf[0] = sigil;
5105                 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
5106                     0, cBOOL(UTF), FALSE);
5107                 *dest = '\0';
5108                 assert(PL_tokenbuf[1]); /* we have a variable name */
5109                 NEXTVAL_NEXTTOKE.ival = sigil;
5110                 force_next('p'); /* force a signature pending identifier */
5111             }
5112             else
5113                 PL_in_my = 0;
5114             PL_expect = XOPERATOR;
5115             break;
5116
5117         case ')':
5118             PL_expect = XBLOCK;
5119             break;
5120         case ',': /* handle ($a,,$b) */
5121             break;
5122
5123         default:
5124             PL_in_my = 0;
5125             yyerror("A signature parameter must start with '$', '@' or '%'");
5126             /* very crude error recovery: skip to likely next signature
5127              * element */
5128             while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5129                 s++;
5130             break;
5131         }
5132         TOKEN(sigil);
5133     }
5134
5135   retry:
5136     switch (*s) {
5137     default:
5138         if (UTF) {
5139             if (isIDFIRST_utf8_safe(s, PL_bufend)) {
5140                 goto keylookup;
5141             }
5142         }
5143         else if (isALNUMC(*s)) {
5144             goto keylookup;
5145         }
5146     {
5147         SV *dsv = newSVpvs_flags("", SVs_TEMP);
5148         const char *c;
5149         if (UTF) {
5150             STRLEN skiplen = UTF8SKIP(s);
5151             STRLEN stravail = PL_bufend - s;
5152             c = sv_uni_display(dsv, newSVpvn_flags(s,
5153                                                    skiplen > stravail ? stravail : skiplen,
5154                                                    SVs_TEMP | SVf_UTF8),
5155                                10, UNI_DISPLAY_ISPRINT);
5156         }
5157         else {
5158             c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5159         }
5160         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5161         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5162             d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)PL_linestart) : s - UNRECOGNIZED_PRECEDE_COUNT;
5163         } else {
5164             d = PL_linestart;
5165         }
5166         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
5167                           UTF8fARG(UTF, (s - d), d),
5168                          (int) len + 1);
5169     }
5170     case 4:
5171     case 26:
5172         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
5173     case 0:
5174         if ((!PL_rsfp || PL_lex_inwhat)
5175          && (!PL_parser->filtered || s+1 < PL_bufend)) {
5176             PL_last_uni = 0;
5177             PL_last_lop = 0;
5178             if (PL_lex_brackets
5179                 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
5180             {
5181                 yyerror((const char *)
5182                         (PL_lex_formbrack
5183                          ? "Format not terminated"
5184                          : "Missing right curly or square bracket"));
5185             }
5186             DEBUG_T( { PerlIO_printf(Perl_debug_log,
5187                         "### Tokener got EOF\n");
5188             } );
5189             TOKEN(0);
5190         }
5191         if (s++ < PL_bufend)
5192             goto retry;                 /* ignore stray nulls */
5193         PL_last_uni = 0;
5194         PL_last_lop = 0;
5195         if (!PL_in_eval && !PL_preambled) {
5196             PL_preambled = TRUE;
5197             if (PL_perldb) {
5198                 /* Generate a string of Perl code to load the debugger.
5199                  * If PERL5DB is set, it will return the contents of that,
5200                  * otherwise a compile-time require of perl5db.pl.  */
5201
5202                 const char * const pdb = PerlEnv_getenv("PERL5DB");
5203
5204                 if (pdb) {
5205                     sv_setpv(PL_linestr, pdb);
5206                     sv_catpvs(PL_linestr,";");
5207                 } else {
5208                     SETERRNO(0,SS_NORMAL);
5209                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5210                 }
5211                 PL_parser->preambling = CopLINE(PL_curcop);
5212             } else
5213                 SvPVCLEAR(PL_linestr);
5214             if (PL_preambleav) {
5215                 SV **svp = AvARRAY(PL_preambleav);
5216                 SV **const end = svp + AvFILLp(PL_preambleav);
5217                 while(svp <= end) {
5218                     sv_catsv(PL_linestr, *svp);
5219                     ++svp;
5220                     sv_catpvs(PL_linestr, ";");
5221                 }
5222                 sv_free(MUTABLE_SV(PL_preambleav));
5223                 PL_preambleav = NULL;
5224             }
5225             if (PL_minus_E)
5226                 sv_catpvs(PL_linestr,
5227                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5228             if (PL_minus_n || PL_minus_p) {
5229                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5230                 if (PL_minus_l)
5231                     sv_catpvs(PL_linestr,"chomp;");
5232                 if (PL_minus_a) {
5233                     if (PL_minus_F) {
5234                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5235                              || *PL_splitstr == '"')
5236                               && strchr(PL_splitstr + 1, *PL_splitstr))
5237                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5238                         else {
5239                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5240                                bytes can be used as quoting characters.  :-) */
5241                             const char *splits = PL_splitstr;
5242                             sv_catpvs(PL_linestr, "our @F=split(q\0");
5243                             do {
5244                                 /* Need to \ \s  */
5245                                 if (*splits == '\\')
5246                                     sv_catpvn(PL_linestr, splits, 1);
5247                                 sv_catpvn(PL_linestr, splits, 1);
5248                             } while (*splits++);
5249                             /* This loop will embed the trailing NUL of
5250                                PL_linestr as the last thing it does before
5251                                terminating.  */
5252                             sv_catpvs(PL_linestr, ");");
5253                         }
5254                     }
5255                     else
5256                         sv_catpvs(PL_linestr,"our @F=split(' ');");
5257                 }
5258             }
5259             sv_catpvs(PL_linestr, "\n");
5260             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5261             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5262             PL_last_lop = PL_last_uni = NULL;
5263             if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
5264                 update_debugger_info(PL_linestr, NULL, 0);
5265             goto retry;
5266         }
5267         do {
5268             fake_eof = 0;
5269             bof = cBOOL(PL_rsfp);
5270             if (0) {
5271               fake_eof:
5272                 fake_eof = LEX_FAKE_EOF;
5273             }
5274             PL_bufptr = PL_bufend;
5275             COPLINE_INC_WITH_HERELINES;
5276             if (!lex_next_chunk(fake_eof)) {
5277                 CopLINE_dec(PL_curcop);
5278                 s = PL_bufptr;
5279                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
5280             }
5281             CopLINE_dec(PL_curcop);
5282             s = PL_bufptr;
5283             /* If it looks like the start of a BOM or raw UTF-16,
5284              * check if it in fact is. */
5285             if (bof && PL_rsfp
5286                 && (*s == 0
5287                     || *(U8*)s == BOM_UTF8_FIRST_BYTE
5288                         || *(U8*)s >= 0xFE
5289                         || s[1] == 0))
5290             {
5291                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5292                 bof = (offset == (Off_t)SvCUR(PL_linestr));
5293 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5294                 /* offset may include swallowed CR */
5295                 if (!bof)
5296                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5297 #endif
5298                 if (bof) {
5299                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5300                     s = swallow_bom((U8*)s);
5301                 }
5302             }
5303             if (PL_parser->in_pod) {
5304                 /* Incest with pod. */
5305                 if (*s == '=' && strEQs(s, "=cut") && !isALPHA(s[4])) {
5306                     SvPVCLEAR(PL_linestr);
5307                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5308                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5309                     PL_last_lop = PL_last_uni = NULL;
5310                     PL_parser->in_pod = 0;
5311                 }
5312             }
5313             if (PL_rsfp || PL_parser->filtered)
5314                 incline(s);
5315         } while (PL_parser->in_pod);
5316         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5317         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5318         PL_last_lop = PL_last_uni = NULL;
5319         if (CopLINE(PL_curcop) == 1) {
5320             while (s < PL_bufend && isSPACE(*s))
5321                 s++;
5322             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5323                 s++;
5324             d = NULL;
5325             if (!PL_in_eval) {
5326                 if (*s == '#' && *(s+1) == '!')
5327                     d = s + 2;
5328 #ifdef ALTERNATE_SHEBANG
5329                 else {
5330                     static char const as[] = ALTERNATE_SHEBANG;
5331                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5332                         d = s + (sizeof(as) - 1);
5333                 }
5334 #endif /* ALTERNATE_SHEBANG */
5335             }
5336             if (d) {
5337                 char *ipath;
5338                 char *ipathend;
5339
5340                 while (isSPACE(*d))
5341                     d++;
5342                 ipath = d;
5343                 while (*d && !isSPACE(*d))
5344                     d++;
5345                 ipathend = d;
5346
5347 #ifdef ARG_ZERO_IS_SCRIPT
5348                 if (ipathend > ipath) {
5349                     /*
5350                      * HP-UX (at least) sets argv[0] to the script name,
5351                      * which makes $^X incorrect.  And Digital UNIX and Linux,
5352                      * at least, set argv[0] to the basename of the Perl
5353                      * interpreter. So, having found "#!", we'll set it right.
5354                      */
5355                     SV* copfilesv = CopFILESV(PL_curcop);
5356                     if (copfilesv) {
5357                         SV * const x =
5358                             GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5359                                              SVt_PV)); /* $^X */
5360                         assert(SvPOK(x) || SvGMAGICAL(x));
5361                         if (sv_eq(x, copfilesv)) {
5362                             sv_setpvn(x, ipath, ipathend - ipath);
5363                             SvSETMAGIC(x);
5364                         }
5365                         else {
5366                             STRLEN blen;
5367                             STRLEN llen;
5368                             const char *bstart = SvPV_const(copfilesv, blen);
5369                             const char * const lstart = SvPV_const(x, llen);
5370                             if (llen < blen) {
5371                                 bstart += blen - llen;
5372                                 if (strnEQ(bstart, lstart, llen) &&     bstart[-1] == '/') {
5373                                     sv_setpvn(x, ipath, ipathend - ipath);
5374                                     SvSETMAGIC(x);
5375                                 }
5376                             }
5377                         }
5378                     }
5379                     else {
5380                         /* Anything to do if no copfilesv? */
5381                     }
5382                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
5383                 }
5384 #endif /* ARG_ZERO_IS_SCRIPT */
5385
5386                 /*
5387                  * Look for options.
5388                  */
5389                 d = instr(s,"perl -");
5390                 if (!d) {
5391                     d = instr(s,"perl");
5392 #if defined(DOSISH)
5393                     /* avoid getting into infinite loops when shebang
5394                      * line contains "Perl" rather than "perl" */
5395                     if (!d) {
5396                         for (d = ipathend-4; d >= ipath; --d) {
5397                             if (isALPHA_FOLD_EQ(*d, 'p')
5398                                 && !ibcmp(d, "perl", 4))
5399                             {
5400                                 break;
5401                             }
5402                         }
5403                         if (d < ipath)
5404                             d = NULL;
5405                     }
5406 #endif
5407                 }
5408 #ifdef ALTERNATE_SHEBANG
5409                 /*
5410                  * If the ALTERNATE_SHEBANG on this system starts with a
5411                  * character that can be part of a Perl expression, then if
5412                  * we see it but not "perl", we're probably looking at the
5413                  * start of Perl code, not a request to hand off to some
5414                  * other interpreter.  Similarly, if "perl" is there, but
5415                  * not in the first 'word' of the line, we assume the line
5416                  * contains the start of the Perl program.
5417                  */
5418                 if (d && *s != '#') {
5419                     const char *c = ipath;
5420                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5421                         c++;
5422                     if (c < d)
5423                         d = NULL;       /* "perl" not in first word; ignore */
5424                     else
5425                         *s = '#';       /* Don't try to parse shebang line */
5426                 }
5427 #endif /* ALTERNATE_SHEBANG */
5428                 if (!d
5429                     && *s == '#'
5430                     && ipathend > ipath
5431                     && !PL_minus_c
5432                     && !instr(s,"indir")
5433                     && instr(PL_origargv[0],"perl"))
5434                 {
5435                     dVAR;
5436                     char **newargv;
5437
5438                     *ipathend = '\0';
5439                     s = ipathend + 1;
5440                     while (s < PL_bufend && isSPACE(*s))
5441                         s++;
5442                     if (s < PL_bufend) {
5443                         Newx(newargv,PL_origargc+3,char*);
5444                         newargv[1] = s;
5445                         while (s < PL_bufend && !isSPACE(*s))
5446                             s++;
5447                         *s = '\0';
5448                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5449                     }
5450                     else
5451                         newargv = PL_origargv;
5452                     newargv[0] = ipath;
5453                     PERL_FPU_PRE_EXEC
5454                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5455                     PERL_FPU_POST_EXEC
5456                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5457                 }
5458                 if (d) {
5459                     while (*d && !isSPACE(*d))
5460                         d++;
5461                     while (SPACE_OR_TAB(*d))
5462                         d++;
5463
5464                     if (*d++ == '-') {
5465                         const bool switches_done = PL_doswitches;
5466                         const U32 oldpdb = PL_perldb;
5467                         const bool oldn = PL_minus_n;
5468                         const bool oldp = PL_minus_p;
5469                         const char *d1 = d;
5470
5471                         do {
5472                             bool baduni = FALSE;
5473                             if (*d1 == 'C') {
5474                                 const char *d2 = d1 + 1;
5475                                 if (parse_unicode_opts((const char **)&d2)
5476                                     != PL_unicode)
5477                                     baduni = TRUE;
5478                             }
5479                             if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
5480                                 const char * const m = d1;
5481                                 while (*d1 && !isSPACE(*d1))
5482                                     d1++;
5483                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5484                                       (int)(d1 - m), m);
5485                             }
5486                             d1 = moreswitches(d1);
5487                         } while (d1);
5488                         if (PL_doswitches && !switches_done) {
5489                             int argc = PL_origargc;
5490                             char **argv = PL_origargv;
5491                             do {
5492                                 argc--,argv++;
5493                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5494                             init_argv_symbols(argc,argv);
5495                         }
5496                         if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
5497                             || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5498                               /* if we have already added "LINE: while (<>) {",
5499                                  we must not do it again */
5500                         {
5501                             SvPVCLEAR(PL_linestr);
5502                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5503                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5504                             PL_last_lop = PL_last_uni = NULL;
5505                             PL_preambled = FALSE;
5506                             if (PERLDB_LINE_OR_SAVESRC)
5507                                 (void)gv_fetchfile(PL_origfilename);
5508                             goto retry;
5509                         }
5510                     }
5511                 }
5512             }
5513         }
5514         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5515             PL_lex_state = LEX_FORMLINE;
5516             force_next(FORMRBRACK);
5517             TOKEN(';');
5518         }
5519         goto retry;
5520     case '\r':
5521 #ifdef PERL_STRICT_CR
5522         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5523         Perl_croak(aTHX_
5524       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5525 #endif
5526     case ' ': case '\t': case '\f': case '\v':
5527         s++;
5528         goto retry;
5529     case '#':
5530     case '\n':
5531         if (PL_lex_state != LEX_NORMAL
5532             || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
5533         {
5534             const bool in_comment = *s == '#';
5535             if (*s == '#' && s == PL_linestart && PL_in_eval
5536              && !PL_rsfp && !PL_parser->filtered) {
5537                 /* handle eval qq[#line 1 "foo"\n ...] */
5538                 CopLINE_dec(PL_curcop);
5539                 incline(s);
5540             }
5541             d = s;
5542             while (d < PL_bufend && *d != '\n')
5543                 d++;
5544             if (d < PL_bufend)
5545                 d++;
5546             else if (d > PL_bufend)
5547                 /* Found by Ilya: feed random input to Perl. */
5548                 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5549                            d, PL_bufend);
5550             s = d;
5551             if (in_comment && d == PL_bufend
5552                 && PL_lex_state == LEX_INTERPNORMAL
5553                 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5554                 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5555             else
5556                 incline(s);
5557             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5558                 PL_lex_state = LEX_FORMLINE;
5559                 force_next(FORMRBRACK);
5560                 TOKEN(';');
5561             }
5562         }
5563         else {
5564             while (s < PL_bufend && *s != '\n')
5565                 s++;
5566             if (s < PL_bufend)
5567                 {
5568                     s++;
5569                     if (s < PL_bufend)
5570                         incline(s);
5571                 }
5572             else if (s > PL_bufend)
5573                 /* Found by Ilya: feed random input to Perl. */
5574                 Perl_croak(aTHX_ "panic: input overflow");
5575         }
5576         goto retry;
5577     case '-':
5578         if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5579             I32 ftst = 0;
5580             char tmp;
5581
5582             s++;
5583             PL_bufptr = s;
5584             tmp = *s++;
5585
5586             while (s < PL_bufend && SPACE_OR_TAB(*s))
5587                 s++;
5588
5589             if (strEQs(s,"=>")) {
5590                 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5591                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5592                 OPERATOR('-');          /* unary minus */
5593             }
5594             switch (tmp) {
5595             case 'r': ftst = OP_FTEREAD;        break;
5596             case 'w': ftst = OP_FTEWRITE;       break;
5597             case 'x': ftst = OP_FTEEXEC;        break;
5598             case 'o': ftst = OP_FTEOWNED;       break;
5599             case 'R': ftst = OP_FTRREAD;        break;
5600             case 'W': ftst = OP_FTRWRITE;       break;
5601             case 'X': ftst = OP_FTREXEC;        break;
5602             case 'O': ftst = OP_FTROWNED;       break;
5603             case 'e': ftst = OP_FTIS;           break;
5604             case 'z': ftst = OP_FTZERO;         break;
5605             case 's': ftst = OP_FTSIZE;         break;
5606             case 'f': ftst = OP_FTFILE;         break;
5607             case 'd': ftst = OP_FTDIR;          break;
5608             case 'l': ftst = OP_FTLINK;         break;
5609             case 'p': ftst = OP_FTPIPE;         break;
5610             case 'S': ftst = OP_FTSOCK;         break;
5611             case 'u': ftst = OP_FTSUID;         break;
5612             case 'g': ftst = OP_FTSGID;         break;
5613             case 'k': ftst = OP_FTSVTX;         break;
5614             case 'b': ftst = OP_FTBLK;          break;
5615             case 'c': ftst = OP_FTCHR;          break;
5616             case 't': ftst = OP_FTTTY;          break;
5617             case 'T': ftst = OP_FTTEXT;         break;
5618             case 'B': ftst = OP_FTBINARY;       break;
5619             case 'M': case 'A': case 'C':
5620                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5621                 switch (tmp) {
5622                 case 'M': ftst = OP_FTMTIME;    break;
5623                 case 'A': ftst = OP_FTATIME;    break;
5624                 case 'C': ftst = OP_FTCTIME;    break;
5625                 default:                        break;
5626                 }
5627                 break;
5628             default:
5629                 break;
5630             }
5631             if (ftst) {
5632                 PL_last_uni = PL_oldbufptr;
5633                 PL_last_lop_op = (OPCODE)ftst;
5634                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5635                         "### Saw file test %c\n", (int)tmp);
5636                 } );
5637                 FTST(ftst);
5638             }
5639             else {
5640                 /* Assume it was a minus followed by a one-letter named
5641                  * subroutine call (or a -bareword), then. */
5642                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5643                         "### '-%c' looked like a file test but was not\n",
5644                         (int) tmp);
5645                 } );
5646                 s = --PL_bufptr;
5647             }
5648         }
5649         {
5650             const char tmp = *s++;
5651             if (*s == tmp) {
5652                 s++;
5653                 if (PL_expect == XOPERATOR)
5654                     TERM(POSTDEC);
5655                 else
5656                     OPERATOR(PREDEC);
5657             }
5658             else if (*s == '>') {
5659                 s++;
5660                 s = skipspace(s);
5661                 if (((*s == '$' || *s == '&') && s[1] == '*')
5662                   ||(*s == '$' && s[1] == '#' && s[2] == '*')
5663                   ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5664                   ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5665                  )
5666                 {
5667                     PL_expect = XPOSTDEREF;
5668                     TOKEN(ARROW);
5669                 }
5670                 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5671                     s = force_word(s,METHOD,FALSE,TRUE);
5672                     TOKEN(ARROW);
5673                 }
5674                 else if (*s == '$')
5675                     OPERATOR(ARROW);
5676                 else
5677                     TERM(ARROW);
5678             }
5679             if (PL_expect == XOPERATOR) {
5680                 if (*s == '='
5681                     && !PL_lex_allbrackets
5682                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5683                 {
5684                     s--;
5685                     TOKEN(0);
5686                 }
5687                 Aop(OP_SUBTRACT);
5688             }
5689             else {
5690                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5691                     check_uni();
5692                 OPERATOR('-');          /* unary minus */
5693             }
5694         }
5695
5696     case '+':
5697         {
5698             const char tmp = *s++;
5699             if (*s == tmp) {
5700                 s++;
5701                 if (PL_expect == XOPERATOR)
5702                     TERM(POSTINC);
5703                 else
5704                     OPERATOR(PREINC);
5705             }
5706             if (PL_expect == XOPERATOR) {
5707                 if (*s == '='
5708                     && !PL_lex_allbrackets
5709                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5710                 {
5711                     s--;
5712                     TOKEN(0);
5713                 }
5714                 Aop(OP_ADD);
5715             }
5716             else {
5717                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5718                     check_uni();
5719                 OPERATOR('+');
5720             }
5721         }
5722
5723     case '*':
5724         if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5725         if (PL_expect != XOPERATOR) {
5726             s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5727             PL_expect = XOPERATOR;
5728             force_ident(PL_tokenbuf, '*');
5729             if (!*PL_tokenbuf)
5730                 PREREF('*');
5731             TERM('*');
5732         }
5733         s++;
5734         if (*s == '*') {
5735             s++;
5736             if (*s == '=' && !PL_lex_allbrackets
5737                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5738             {
5739                 s -= 2;
5740                 TOKEN(0);
5741             }
5742             PWop(OP_POW);
5743         }
5744         if (*s == '='
5745             && !PL_lex_allbrackets
5746             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5747         {
5748             s--;
5749             TOKEN(0);
5750         }
5751         PL_parser->saw_infix_sigil = 1;
5752         Mop(OP_MULTIPLY);
5753
5754     case '%':
5755     {
5756         if (PL_expect == XOPERATOR) {
5757             if (s[1] == '='
5758                 && !PL_lex_allbrackets
5759                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5760             {
5761                 TOKEN(0);
5762             }
5763             ++s;
5764             PL_parser->saw_infix_sigil = 1;
5765             Mop(OP_MODULO);
5766         }
5767         else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5768         PL_tokenbuf[0] = '%';
5769         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5770         pl_yylval.ival = 0;
5771         if (!PL_tokenbuf[1]) {
5772             PREREF('%');
5773         }
5774         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5775             if (*s == '[')
5776                 PL_tokenbuf[0] = '@';
5777         }
5778         PL_expect = XOPERATOR;
5779         force_ident_maybe_lex('%');
5780         TERM('%');
5781     }
5782     case '^':
5783         d = s;
5784         bof = FEATURE_BITWISE_IS_ENABLED;
5785         if (bof && s[1] == '.')
5786             s++;
5787         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5788                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5789         {
5790             s = d;
5791             TOKEN(0);
5792         }
5793         s++;
5794         BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5795     case '[':
5796         if (PL_lex_brackets > 100)
5797             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5798         PL_lex_brackstack[PL_lex_brackets++] = 0;
5799         PL_lex_allbrackets++;
5800         {
5801             const char tmp = *s++;
5802             OPERATOR(tmp);
5803         }
5804     case '~':
5805         if (s[1] == '~'
5806             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5807         {
5808             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5809                 TOKEN(0);
5810             s += 2;
5811             Perl_ck_warner_d(aTHX_
5812                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5813                 "Smartmatch is experimental");
5814             Eop(OP_SMARTMATCH);
5815         }
5816         s++;
5817         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
5818             s++;
5819             BCop(OP_SCOMPLEMENT);
5820         }
5821         BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
5822     case ',':
5823         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5824             TOKEN(0);
5825         s++;
5826         OPERATOR(',');
5827     case ':':
5828         if (s[1] == ':') {
5829             len = 0;
5830             goto just_a_word_zero_gv;
5831         }
5832         s++;
5833         {
5834         OP *attrs;
5835
5836         switch (PL_expect) {
5837         case XOPERATOR:
5838             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5839                 break;
5840             PL_bufptr = s;      /* update in case we back off */
5841             if (*s == '=') {
5842                 Perl_croak(aTHX_
5843                            "Use of := for an empty attribute list is not allowed");
5844             }
5845             goto grabattrs;
5846         case XATTRBLOCK:
5847             PL_expect = XBLOCK;
5848             goto grabattrs;
5849         case XATTRTERM:
5850             PL_expect = XTERMBLOCK;
5851          grabattrs:
5852             s = skipspace(s);
5853             attrs = NULL;
5854             while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5855                 I32 tmp;
5856                 SV *sv;
5857                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5858                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5859                     if (tmp < 0) tmp = -tmp;
5860                     switch (tmp) {
5861                     case KEY_or:
5862                     case KEY_and:
5863                     case KEY_for:
5864                     case KEY_foreach:
5865                     case KEY_unless:
5866                     case KEY_if:
5867                     case KEY_while:
5868                     case KEY_until:
5869                         goto got_attrs;
5870                     default:
5871                         break;
5872                     }
5873                 }
5874                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5875                 if (*d == '(') {
5876                     d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5877                     if (!d) {
5878                         if (attrs)
5879                             op_free(attrs);
5880                         sv_free(sv);
5881                         Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5882                     }
5883                     COPLINE_SET_FROM_MULTI_END;
5884                 }
5885                 if (PL_lex_stuff) {
5886                     sv_catsv(sv, PL_lex_stuff);
5887                     attrs = op_append_elem(OP_LIST, attrs,
5888                                         newSVOP(OP_CONST, 0, sv));
5889                     SvREFCNT_dec_NN(PL_lex_stuff);
5890                     PL_lex_stuff = NULL;
5891                 }
5892                 else {
5893                     /* NOTE: any CV attrs applied here need to be part of
5894                        the CVf_BUILTIN_ATTRS define in cv.h! */
5895                     if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5896                         sv_free(sv);
5897                         CvLVALUE_on(PL_compcv);
5898                     }
5899                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5900                         sv_free(sv);
5901                         CvMETHOD_on(PL_compcv);
5902                     }
5903                     else if (!PL_in_my && len == 5
5904                           && strnEQ(SvPVX(sv), "const", len))
5905                     {
5906                         sv_free(sv);
5907                         Perl_ck_warner_d(aTHX_
5908                             packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5909                            ":const is experimental"
5910                         );
5911                         CvANONCONST_on(PL_compcv);
5912                         if (!CvANON(PL_compcv))
5913                             yyerror(":const is not permitted on named "
5914                                     "subroutines");
5915                     }
5916                     /* After we've set the flags, it could be argued that
5917                        we don't need to do the attributes.pm-based setting
5918                        process, and shouldn't bother appending recognized
5919                        flags.  To experiment with that, uncomment the
5920                        following "else".  (Note that's already been
5921                        uncommented.  That keeps the above-applied built-in
5922                        attributes from being intercepted (and possibly
5923                        rejected) by a package's attribute routines, but is
5924                        justified by the performance win for the common case
5925                        of applying only built-in attributes.) */
5926                     else
5927                         attrs = op_append_elem(OP_LIST, attrs,
5928                                             newSVOP(OP_CONST, 0,
5929                                                     sv));
5930                 }
5931                 s = skipspace(d);
5932                 if (*s == ':' && s[1] != ':')
5933                     s = skipspace(s+1);
5934                 else if (s == d)
5935                     break;      /* require real whitespace or :'s */
5936                 /* XXX losing whitespace on sequential attributes here */
5937             }
5938             {
5939                 if (*s != ';'
5940                     && *s != '}'
5941                     && !(PL_expect == XOPERATOR
5942                          ? (*s == '=' ||  *s == ')')
5943                          : (*s == '{' ||  *s == '(')))
5944                 {
5945                     const char q = ((*s == '\'') ? '"' : '\'');
5946                     /* If here for an expression, and parsed no attrs, back
5947                        off. */
5948                     if (PL_expect == XOPERATOR && !attrs) {
5949                         s = PL_bufptr;
5950                         break;
5951                     }
5952                     /* MUST advance bufptr here to avoid bogus "at end of line"
5953                        context messages from yyerror().
5954                     */
5955                     PL_bufptr = s;
5956                     yyerror( (const char *)
5957                              (*s
5958                               ? Perl_form(aTHX_ "Invalid separator character "
5959                                           "%c%c%c in attribute list", q, *s, q)
5960                               : "Unterminated attribute list" ) );
5961                     if (attrs)
5962                         op_free(attrs);
5963                     OPERATOR(':');
5964                 }
5965             }
5966         got_attrs:
5967             if (attrs) {
5968                 NEXTVAL_NEXTTOKE.opval = attrs;
5969                 force_next(THING);
5970             }
5971             TOKEN(COLONATTR);
5972         }
5973         }
5974         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5975             s--;
5976             TOKEN(0);
5977         }
5978         PL_lex_allbrackets--;
5979         OPERATOR(':');
5980     case '(':
5981         s++;
5982         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5983             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5984         else
5985             PL_expect = XTERM;
5986         s = skipspace(s);
5987         PL_lex_allbrackets++;
5988         TOKEN('(');
5989     case ';':
5990         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5991             TOKEN(0);
5992         CLINE;
5993         s++;
5994         PL_expect = XSTATE;
5995         TOKEN(';');
5996     case ')':
5997         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5998             TOKEN(0);
5999         s++;
6000         PL_lex_allbrackets--;
6001         s = skipspace(s);
6002         if (*s == '{')
6003             PREBLOCK(')');
6004         TERM(')');
6005     case ']':
6006         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6007             TOKEN(0);
6008         s++;
6009         if (PL_lex_brackets <= 0)
6010             /* diag_listed_as: Unmatched right %s bracket */
6011             yyerror("Unmatched right square bracket");
6012         else
6013             --PL_lex_brackets;
6014         PL_lex_allbrackets--;
6015         if (PL_lex_state == LEX_INTERPNORMAL) {
6016             if (PL_lex_brackets == 0) {
6017                 if (*s == '-' && s[1] == '>')
6018                     PL_lex_state = LEX_INTERPENDMAYBE;
6019                 else if (*s != '[' && *s != '{')
6020                     PL_lex_state = LEX_INTERPEND;
6021             }
6022         }
6023         TERM(']');
6024     case '{':
6025         s++;
6026       leftbracket:
6027         if (PL_lex_brackets > 100) {
6028             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6029         }
6030         switch (PL_expect) {
6031         case XTERM:
6032         case XTERMORDORDOR:
6033             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6034             PL_lex_allbrackets++;
6035             OPERATOR(HASHBRACK);
6036         case XOPERATOR:
6037             while (s < PL_bufend && SPACE_OR_TAB(*s))
6038                 s++;
6039             d = s;
6040             PL_tokenbuf[0] = '\0';
6041             if (d < PL_bufend && *d == '-') {
6042                 PL_tokenbuf[0] = '-';
6043                 d++;
6044                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6045                     d++;
6046             }
6047             if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6048                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6049                               FALSE, &len);
6050                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6051                     d++;
6052                 if (*d == '}') {
6053                     const char minus = (PL_tokenbuf[0] == '-');
6054                     s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6055                     if (minus)
6056                         force_next('-');
6057                 }
6058             }
6059             /* FALLTHROUGH */
6060         case XATTRTERM:
6061         case XTERMBLOCK:
6062             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6063             PL_lex_allbrackets++;
6064             PL_expect = XSTATE;
6065             break;
6066         case XATTRBLOCK:
6067         case XBLOCK:
6068             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6069             PL_lex_allbrackets++;
6070             PL_expect = XSTATE;
6071             break;
6072         case XBLOCKTERM:
6073             PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6074             PL_lex_allbrackets++;
6075             PL_expect = XSTATE;
6076             break;
6077         default: {
6078                 const char *t;
6079                 if (PL_oldoldbufptr == PL_last_lop)
6080                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6081                 else
6082                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6083                 PL_lex_allbrackets++;
6084                 s = skipspace(s);
6085                 if (*s == '}') {
6086                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6087                         PL_expect = XTERM;
6088                         /* This hack is to get the ${} in the message. */
6089                         PL_bufptr = s+1;
6090                         yyerror("syntax error");
6091                         break;
6092                     }
6093                     OPERATOR(HASHBRACK);
6094                 }
6095                 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6096                     /* ${...} or @{...} etc., but not print {...}
6097                      * Skip the disambiguation and treat this as a block.
6098                      */
6099                     goto block_expectation;
6100                 }
6101                 /* This hack serves to disambiguate a pair of curlies
6102                  * as being a block or an anon hash.  Normally, expectation
6103                  * determines that, but in cases where we're not in a
6104                  * position to expect anything in particular (like inside
6105                  * eval"") we have to resolve the ambiguity.  This code
6106                  * covers the case where the first term in the curlies is a
6107                  * quoted string.  Most other cases need to be explicitly
6108                  * disambiguated by prepending a "+" before the opening
6109                  * curly in order to force resolution as an anon hash.
6110                  *
6111                  * XXX should probably propagate the outer expectation
6112                  * into eval"" to rely less on this hack, but that could
6113                  * potentially break current behavior of eval"".
6114                  * GSAR 97-07-21
6115                  */
6116                 t = s;
6117                 if (*s == '\'' || *s == '"' || *s == '`') {
6118                     /* common case: get past first string, handling escapes */
6119                     for (t++; t < PL_bufend && *t != *s;)
6120                         if (*t++ == '\\')
6121                             t++;
6122                     t++;
6123                 }
6124                 else if (*s == 'q') {
6125                     if (++t < PL_bufend
6126                         && (!isWORDCHAR(*t)
6127                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6128                                 && !isWORDCHAR(*t))))
6129                     {
6130                         /* skip q//-like construct */
6131                         const char *tmps;
6132                         char open, close, term;
6133                         I32 brackets = 1;
6134
6135                         while (t < PL_bufend && isSPACE(*t))
6136                             t++;
6137                         /* check for q => */
6138                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6139                             OPERATOR(HASHBRACK);
6140                         }
6141                         term = *t;
6142                         open = term;
6143                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6144                             term = tmps[5];
6145                         close = term;
6146                         if (open == close)
6147                             for (t++; t < PL_bufend; t++) {
6148                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6149                                     t++;
6150                                 else if (*t == open)
6151                                     break;
6152                             }
6153                         else {
6154                             for (t++; t < PL_bufend; t++) {
6155                                 if (*t == '\\' && t+1 < PL_bufend)
6156                                     t++;
6157                                 else if (*t == close && --brackets <= 0)
6158                                     break;
6159                                 else if (*t == open)
6160                                     brackets++;
6161                             }
6162                         }
6163                         t++;
6164                     }
6165                     else
6166                         /* skip plain q word */
6167                         while (   t < PL_bufend
6168                                && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6169                         {
6170                             t += UTF ? UTF8SKIP(t) : 1;
6171                         }
6172                 }
6173                 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6174                     t += UTF ? UTF8SKIP(t) : 1;
6175                     while (   t < PL_bufend
6176                            && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6177                     {
6178                         t += UTF ? UTF8SKIP(t) : 1;
6179                     }
6180                 }
6181                 while (t < PL_bufend && isSPACE(*t))
6182                     t++;
6183                 /* if comma follows first term, call it an anon hash */
6184                 /* XXX it could be a comma expression with loop modifiers */
6185                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6186                                    || (*t == '=' && t[1] == '>')))
6187                     OPERATOR(HASHBRACK);
6188                 if (PL_expect == XREF)
6189                 {
6190                   block_expectation:
6191                     /* If there is an opening brace or 'sub:', treat it
6192                        as a term to make ${{...}}{k} and &{sub:attr...}
6193                        dwim.  Otherwise, treat it as a statement, so
6194                        map {no strict; ...} works.
6195                      */
6196                     s = skipspace(s);
6197                     if (*s == '{') {
6198                         PL_expect = XTERM;
6199                         break;
6200                     }
6201                     if (strEQs(s, "sub")) {
6202                         d = s + 3;
6203                         d = skipspace(d);
6204                         if (*d == ':') {
6205                             PL_expect = XTERM;
6206                             break;
6207                         }
6208                     }
6209                     PL_expect = XSTATE;
6210                 }
6211                 else {
6212                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6213                     PL_expect = XSTATE;
6214                 }
6215             }
6216             break;
6217         }
6218         pl_yylval.ival = CopLINE(PL_curcop);
6219         PL_copline = NOLINE;   /* invalidate current command line number */
6220         TOKEN(formbrack ? '=' : '{');
6221     case '}':
6222         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6223             TOKEN(0);
6224       rightbracket:
6225         s++;
6226         if (PL_lex_brackets <= 0)
6227             /* diag_listed_as: Unmatched right %s bracket */
6228             yyerror("Unmatched right curly bracket");
6229         else
6230             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6231         PL_lex_allbrackets--;
6232         if (PL_lex_state == LEX_INTERPNORMAL) {
6233             if (PL_lex_brackets == 0) {
6234                 if (PL_expect & XFAKEBRACK) {
6235                     PL_expect &= XENUMMASK;
6236                     PL_lex_state = LEX_INTERPEND;
6237                     PL_bufptr = s;
6238                     return yylex();     /* ignore fake brackets */
6239                 }
6240                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6241                  && SvEVALED(PL_lex_repl))
6242                     PL_lex_state = LEX_INTERPEND;
6243                 else if (*s == '-' && s[1] == '>')
6244                     PL_lex_state = LEX_INTERPENDMAYBE;
6245                 else if (*s != '[' && *s != '{')
6246                     PL_lex_state = LEX_INTERPEND;
6247             }
6248         }
6249         if (PL_expect & XFAKEBRACK) {
6250             PL_expect &= XENUMMASK;
6251             PL_bufptr = s;
6252             return yylex();             /* ignore fake brackets */
6253         }
6254         force_next(formbrack ? '.' : '}');
6255         if (formbrack) LEAVE;
6256         if (formbrack == 2) { /* means . where arguments were expected */
6257             force_next(';');
6258             TOKEN(FORMRBRACK);
6259         }
6260         TOKEN(';');
6261     case '&':
6262         if (PL_expect == XPOSTDEREF) POSTDEREF('&');
6263         s++;
6264         if (*s++ == '&') {
6265             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6266                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6267                 s -= 2;
6268                 TOKEN(0);
6269             }
6270             AOPERATOR(ANDAND);
6271         }
6272         s--;
6273         if (PL_expect == XOPERATOR) {
6274             if (   PL_bufptr == PL_linestart
6275                 && ckWARN(WARN_SEMICOLON)
6276                 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6277             {
6278                 CopLINE_dec(PL_curcop);
6279                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6280                 CopLINE_inc(PL_curcop);
6281             }
6282             d = s;
6283             if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6284                 s++;
6285             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6286                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6287                 s = d;
6288                 s--;
6289                 TOKEN(0);
6290             }
6291             if (d == s) {
6292                 PL_parser->saw_infix_sigil = 1;
6293                 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6294             }
6295             else
6296                 BAop(OP_SBIT_AND);
6297         }
6298
6299         PL_tokenbuf[0] = '&';
6300         s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6301         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6302         if (PL_tokenbuf[1]) {
6303             force_ident_maybe_lex('&');
6304         }
6305         else
6306             PREREF('&');
6307         TERM('&');
6308
6309     case '|':
6310         s++;
6311         if (*s++ == '|') {
6312             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6313                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6314                 s -= 2;
6315                 TOKEN(0);
6316             }
6317             AOPERATOR(OROR);
6318         }
6319         s--;
6320         d = s;
6321         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6322             s++;
6323         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6324                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6325             s = d - 1;
6326             TOKEN(0);
6327         }
6328         BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6329     case '=':
6330         s++;
6331         {
6332             const char tmp = *s++;
6333             if (tmp == '=') {
6334                 if ((s == PL_linestart+2 || s[-3] == '\n') && strEQs(s, "=====")) {
6335                     s = vcs_conflict_marker(s + 5);
6336                     goto retry;
6337                 }
6338                 if (!PL_lex_allbrackets
6339                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6340                 {
6341                     s -= 2;
6342                     TOKEN(0);
6343                 }
6344                 Eop(OP_EQ);
6345             }
6346             if (tmp == '>') {
6347                 if (!PL_lex_allbrackets
6348                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
6349                 {
6350                     s -= 2;
6351                     TOKEN(0);
6352                 }
6353                 OPERATOR(',');
6354             }
6355             if (tmp == '~')
6356                 PMop(OP_MATCH);
6357             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6358                 && strchr("+-*/%.^&|<",tmp))
6359                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6360                             "Reversed %c= operator",(int)tmp);
6361             s--;
6362             if (PL_expect == XSTATE
6363                 && isALPHA(tmp)
6364                 && (s == PL_linestart+1 || s[-2] == '\n') )
6365             {
6366                 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6367                     || PL_lex_state != LEX_NORMAL) {
6368                     d = PL_bufend;
6369                     while (s < d) {
6370                         if (*s++ == '\n') {
6371                             incline(s);
6372                             if (strEQs(s,"=cut")) {
6373                                 s = strchr(s,'\n');
6374                                 if (s)
6375                                     s++;
6376                                 else
6377                                     s = d;
6378                                 incline(s);
6379                                 goto retry;
6380                             }
6381                         }
6382                     }
6383                     goto retry;
6384                 }
6385                 s = PL_bufend;
6386                 PL_parser->in_pod = 1;
6387                 goto retry;
6388             }
6389         }
6390         if (PL_expect == XBLOCK) {
6391             const char *t = s;
6392 #ifdef PERL_STRICT_CR
6393             while (SPACE_OR_TAB(*t))
6394 #else
6395             while (SPACE_OR_TAB(*t) || *t == '\r')
6396 #endif
6397                 t++;
6398             if (*t == '\n' || *t == '#') {
6399                 formbrack = 1;
6400                 ENTER;
6401                 SAVEI8(PL_parser->form_lex_state);
6402                 SAVEI32(PL_lex_formbrack);
6403                 PL_parser->form_lex_state = PL_lex_state;
6404                 PL_lex_formbrack = PL_lex_brackets + 1;
6405                 goto leftbracket;
6406             }
6407         }
6408         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6409             s--;
6410             TOKEN(0);
6411         }
6412         pl_yylval.ival = 0;
6413         OPERATOR(ASSIGNOP);
6414     case '!':
6415         s++;
6416         {
6417             const char tmp = *s++;
6418             if (tmp == '=') {
6419                 /* was this !=~ where !~ was meant?
6420                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6421
6422                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6423                     const char *t = s+1;
6424
6425                     while (t < PL_bufend && isSPACE(*t))
6426                         ++t;
6427
6428                     if (*t == '/' || *t == '?'
6429                         || ((*t == 'm' || *t == 's' || *t == 'y')
6430                             && !isWORDCHAR(t[1]))
6431                         || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6432                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6433                                     "!=~ should be !~");
6434                 }
6435                 if (!PL_lex_allbrackets
6436                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6437                 {
6438                     s -= 2;
6439                     TOKEN(0);
6440                 }
6441                 Eop(OP_NE);
6442             }
6443             if (tmp == '~')
6444                 PMop(OP_NOT);
6445         }
6446         s--;
6447         OPERATOR('!');
6448     case '<':
6449         if (PL_expect != XOPERATOR) {
6450             if (s[1] != '<' && !strchr(s,'>'))
6451                 check_uni();
6452             if (s[1] == '<' && s[2] != '>') {
6453                 if ((s == PL_linestart || s[-1] == '\n') && strEQs(s+2, "<<<<<")) {
6454                     s = vcs_conflict_marker(s + 7);
6455                     goto retry;
6456                 }
6457                 s = scan_heredoc(s);
6458             }
6459             else
6460                 s = scan_inputsymbol(s);
6461             PL_expect = XOPERATOR;
6462             TOKEN(sublex_start());
6463         }
6464         s++;
6465         {
6466             char tmp = *s++;
6467             if (tmp == '<') {
6468                 if ((s == PL_linestart+2 || s[-3] == '\n') && strEQs(s, "<<<<<")) {
6469                     s = vcs_conflict_marker(s + 5);
6470                     goto retry;
6471                 }
6472                 if (*s == '=' && !PL_lex_allbrackets
6473                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6474                 {
6475                     s -= 2;
6476                     TOKEN(0);
6477                 }
6478                 SHop(OP_LEFT_SHIFT);
6479             }
6480             if (tmp == '=') {
6481                 tmp = *s++;
6482                 if (tmp == '>') {
6483                     if (!PL_lex_allbrackets
6484                         && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6485                     {
6486                         s -= 3;
6487                         TOKEN(0);
6488                     }
6489                     Eop(OP_NCMP);
6490                 }
6491                 s--;
6492                 if (!PL_lex_allbrackets
6493                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6494                 {
6495                     s -= 2;
6496                     TOKEN(0);
6497                 }
6498                 Rop(OP_LE);
6499             }
6500         }
6501         s--;
6502         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6503             s--;
6504             TOKEN(0);
6505         }
6506         Rop(OP_LT);
6507     case '>':
6508         s++;
6509         {
6510             const char tmp = *s++;
6511             if (tmp == '>') {
6512                 if ((s == PL_linestart+2 || s[-3] == '\n') && strEQs(s, ">>>>>")) {
6513                     s = vcs_conflict_marker(s + 5);
6514                     goto retry;
6515                 }
6516                 if (*s == '=' && !PL_lex_allbrackets
6517                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6518                 {
6519                     s -= 2;
6520                     TOKEN(0);
6521                 }
6522                 SHop(OP_RIGHT_SHIFT);
6523             }
6524             else if (tmp == '=') {
6525                 if (!PL_lex_allbrackets
6526                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6527                 {
6528                     s -= 2;
6529                     TOKEN(0);
6530                 }
6531                 Rop(OP_GE);
6532             }
6533         }
6534         s--;
6535         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6536             s--;
6537             TOKEN(0);
6538         }
6539         Rop(OP_GT);
6540
6541     case '$':
6542         CLINE;
6543
6544         if (PL_expect == XPOSTDEREF) {
6545             if (s[1] == '#') {
6546                 s++;
6547                 POSTDEREF(DOLSHARP);
6548             }
6549             POSTDEREF('$');
6550         }
6551
6552         if (   s[1] == '#'
6553             && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
6554                 || strchr("{$:+-@", s[2])))
6555         {
6556             PL_tokenbuf[0] = '@';
6557             s = scan_ident(s + 1, PL_tokenbuf + 1,
6558                            sizeof PL_tokenbuf - 1, FALSE);
6559             if (PL_expect == XOPERATOR) {
6560                 d = s;
6561                 if (PL_bufptr > s) {
6562                     d = PL_bufptr-1;
6563                     PL_bufptr = PL_oldbufptr;
6564                 }
6565                 no_op("Array length", d);
6566             }
6567             if (!PL_tokenbuf[1])
6568                 PREREF(DOLSHARP);
6569             PL_expect = XOPERATOR;
6570             force_ident_maybe_lex('#');
6571             TOKEN(DOLSHARP);
6572         }
6573
6574         PL_tokenbuf[0] = '$';
6575         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6576         if (PL_expect == XOPERATOR) {
6577             d = s;
6578             if (PL_bufptr > s) {
6579                 d = PL_bufptr-1;
6580                 PL_bufptr = PL_oldbufptr;
6581             }
6582             no_op("Scalar", d);
6583         }
6584         if (!PL_tokenbuf[1]) {
6585             if (s == PL_bufend)
6586                 yyerror("Final $ should be \\$ or $name");
6587             PREREF('$');
6588         }
6589
6590         d = s;
6591         {
6592             const char tmp = *s;
6593             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6594                 s = skipspace(s);
6595
6596             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6597                 && intuit_more(s)) {
6598                 if (*s == '[') {
6599                     PL_tokenbuf[0] = '@';
6600                     if (ckWARN(WARN_SYNTAX)) {
6601                         char *t = s+1;
6602
6603                         while (   isSPACE(*t)
6604                                || isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)
6605                                || *t == '$')
6606                         {
6607                             t += UTF ? UTF8SKIP(t) : 1;
6608                         }
6609                         if (*t++ == ',') {
6610                             PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
6611                             while (t < PL_bufend && *t != ']')
6612                                 t++;
6613                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6614                                         "Multidimensional syntax %" UTF8f " not supported",
6615                                         UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
6616                         }
6617                     }
6618                 }
6619                 else if (*s == '{') {
6620                     char *t;
6621                     PL_tokenbuf[0] = '%';
6622                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6623                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6624                         {
6625                             char tmpbuf[sizeof PL_tokenbuf];
6626                             do {
6627                                 t++;
6628                             } while (isSPACE(*t));
6629                             if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
6630                                 STRLEN len;
6631                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6632                                               &len);
6633                                 while (isSPACE(*t))
6634                                     t++;
6635                                 if (  *t == ';'
6636                                     && get_cvn_flags(tmpbuf, len, UTF
6637                                                                   ? SVf_UTF8
6638                                                                   : 0))
6639                                 {
6640                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6641                                         "You need to quote \"%" UTF8f "\"",
6642                                          UTF8fARG(UTF, len, tmpbuf));
6643                                 }
6644                             }
6645                         }
6646                 }
6647             }
6648
6649             PL_expect = XOPERATOR;
6650             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6651                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6652                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6653                     PL_expect = XOPERATOR;
6654                 else if (strchr("$@\"'`q", *s))
6655                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6656                 else if (   strchr("&*<%", *s)
6657                          && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
6658                 {
6659                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6660                 }
6661                 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6662                     char tmpbuf[sizeof PL_tokenbuf];
6663                     int t2;
6664                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6665                     if ((t2 = keyword(tmpbuf, len, 0))) {
6666                         /* binary operators exclude handle interpretations */
6667                         switch (t2) {
6668                         case -KEY_x:
6669                         case -KEY_eq:
6670                         case -KEY_ne:
6671                         case -KEY_gt:
6672                         case -KEY_lt:
6673                         case -KEY_ge:
6674                         case -KEY_le:
6675                         case -KEY_cmp:
6676                             break;
6677                         default:
6678                             PL_expect = XTERM;  /* e.g. print $fh length() */
6679                             break;
6680                         }
6681                     }
6682                     else {
6683                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6684                     }
6685                 }
6686                 else if (isDIGIT(*s))
6687                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6688                 else if (*s == '.' && isDIGIT(s[1]))
6689                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6690                 else if ((*s == '?' || *s == '-' || *s == '+')
6691                          && !isSPACE(s[1]) && s[1] != '=')
6692                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6693                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6694                          && s[1] != '/')
6695                     PL_expect = XTERM;          /* e.g. print $fh /.../
6696                                                    XXX except DORDOR operator
6697                                                 */
6698                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6699                          && s[2] != '=')
6700                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6701             }
6702         }
6703         force_ident_maybe_lex('$');
6704         TOKEN('$');
6705
6706     case '@':
6707         if (PL_expect == XPOSTDEREF)
6708             POSTDEREF('@');
6709         PL_tokenbuf[0] = '@';
6710         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6711         if (PL_expect == XOPERATOR) {
6712             d = s;
6713             if (PL_bufptr > s) {
6714                 d = PL_bufptr-1;
6715                 PL_bufptr = PL_oldbufptr;
6716             }
6717             no_op("Array", d);
6718         }
6719         pl_yylval.ival = 0;
6720         if (!PL_tokenbuf[1]) {
6721             PREREF('@');
6722         }
6723         if (PL_lex_state == LEX_NORMAL)
6724             s = skipspace(s);
6725         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6726             if (*s == '{')
6727                 PL_tokenbuf[0] = '%';
6728
6729             /* Warn about @ where they meant $. */
6730             if (*s == '[' || *s == '{') {
6731                 if (ckWARN(WARN_SYNTAX)) {
6732                     S_check_scalar_slice(aTHX_ s);
6733                 }
6734             }
6735         }
6736         PL_expect = XOPERATOR;
6737         force_ident_maybe_lex('@');
6738         TERM('@');
6739
6740      case '/':                  /* may be division, defined-or, or pattern */
6741         if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6742             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6743                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6744                 TOKEN(0);
6745             s += 2;
6746             AOPERATOR(DORDOR);
6747         }
6748         else if (PL_expect == XOPERATOR) {
6749             s++;
6750             if (*s == '=' && !PL_lex_allbrackets
6751                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6752             {
6753                 s--;
6754                 TOKEN(0);
6755             }
6756             Mop(OP_DIVIDE);
6757         }
6758         else {
6759             /* Disable warning on "study /blah/" */
6760             if (    PL_oldoldbufptr == PL_last_uni
6761                 && (   *PL_last_uni != 's' || s - PL_last_uni < 5
6762                     || memNE(PL_last_uni, "study", 5)
6763                     || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6764              ))
6765                 check_uni();
6766             s = scan_pat(s,OP_MATCH);
6767             TERM(sublex_start());
6768         }
6769
6770      case '?':                  /* conditional */
6771         s++;
6772         if (!PL_lex_allbrackets
6773             && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
6774         {
6775             s--;
6776             TOKEN(0);
6777         }
6778         PL_lex_allbrackets++;
6779         OPERATOR('?');
6780
6781     case '.':
6782         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6783 #ifdef PERL_STRICT_CR
6784             && s[1] == '\n'
6785 #else
6786             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6787 #endif
6788             && (s == PL_linestart || s[-1] == '\n') )
6789         {
6790             PL_expect = XSTATE;
6791             formbrack = 2; /* dot seen where arguments expected */
6792             goto rightbracket;
6793         }
6794         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6795             s += 3;
6796             OPERATOR(YADAYADA);
6797         }
6798         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6799             char tmp = *s++;
6800             if (*s == tmp) {
6801                 if (!PL_lex_allbrackets
6802                     && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
6803                 {
6804                     s--;
6805                     TOKEN(0);
6806                 }
6807                 s++;
6808                 if (*s == tmp) {
6809                     s++;
6810                     pl_yylval.ival = OPf_SPECIAL;
6811                 }
6812                 else
6813                     pl_yylval.ival = 0;
6814                 OPERATOR(DOTDOT);
6815             }
6816             if (*s == '=' && !PL_lex_allbrackets
6817                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6818             {
6819                 s--;
6820                 TOKEN(0);
6821             }
6822             Aop(OP_CONCAT);
6823         }
6824         /* FALLTHROUGH */
6825     case '0': case '1': case '2': case '3': case '4':
6826     case '5': case '6': case '7': case '8': case '9':
6827         s = scan_num(s, &pl_yylval);
6828         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6829         if (PL_expect == XOPERATOR)
6830             no_op("Number",s);
6831         TERM(THING);
6832
6833     case '\'':
6834         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6835         if (!s)
6836             missingterm(NULL);
6837         COPLINE_SET_FROM_MULTI_END;
6838         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6839         if (PL_expect == XOPERATOR) {
6840             no_op("String",s);
6841         }
6842         pl_yylval.ival = OP_CONST;
6843         TERM(sublex_start());
6844
6845     case '"':
6846         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6847         DEBUG_T( {
6848             if (s)
6849                 printbuf("### Saw string before %s\n", s);
6850             else
6851                 PerlIO_printf(Perl_debug_log,
6852                              "### Saw unterminated string\n");
6853         } );
6854         if (PL_expect == XOPERATOR) {
6855                 no_op("String",s);
6856         }
6857         if (!s)
6858             missingterm(NULL);
6859         pl_yylval.ival = OP_CONST;
6860         /* FIXME. I think that this can be const if char *d is replaced by
6861            more localised variables.  */
6862         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6863             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6864                 pl_yylval.ival = OP_STRINGIFY;
6865                 break;
6866             }
6867         }
6868         if (pl_yylval.ival == OP_CONST)
6869             COPLINE_SET_FROM_MULTI_END;
6870         TERM(sublex_start());
6871
6872     case '`':
6873         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6874         DEBUG_T( {
6875             if (s)
6876                 printbuf("### Saw backtick string before %s\n", s);
6877             else
6878                 PerlIO_printf(Perl_debug_log,
6879                              "### Saw unterminated backtick string\n");
6880         } );
6881         if (PL_expect == XOPERATOR)
6882             no_op("Backticks",s);
6883         if (!s)
6884             missingterm(NULL);
6885         pl_yylval.ival = OP_BACKTICK;
6886         TERM(sublex_start());
6887
6888     case '\\':
6889         s++;
6890         if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6891          && isDIGIT(*s))
6892             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6893                            *s, *s);
6894         if (PL_expect == XOPERATOR)
6895             no_op("Backslash",s);
6896         OPERATOR(REFGEN);
6897
6898     case 'v':
6899         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6900             char *start = s + 2;
6901             while (isDIGIT(*start) || *start == '_')
6902                 start++;
6903             if (*start == '.' && isDIGIT(start[1])) {
6904                 s = scan_num(s, &pl_yylval);
6905                 TERM(THING);
6906             }
6907             else if ((*start == ':' && start[1] == ':')
6908                   || (PL_expect == XSTATE && *start == ':'))
6909                 goto keylookup;
6910             else if (PL_expect == XSTATE) {
6911                 d = start;
6912                 while (d < PL_bufend && isSPACE(*d)) d++;
6913                 if (*d == ':') goto keylookup;
6914             }
6915             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6916             if (!isALPHA(*start) && (PL_expect == XTERM
6917                         || PL_expect == XREF || PL_expect == XSTATE
6918                         || PL_expect == XTERMORDORDOR)) {
6919                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6920                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6921                 if (!gv) {
6922                     s = scan_num(s, &pl_yylval);
6923                     TERM(THING);
6924                 }
6925             }
6926         }
6927         goto keylookup;
6928     case 'x':
6929         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6930             s++;
6931             Mop(OP_REPEAT);
6932         }
6933         goto keylookup;
6934
6935     case '_':
6936     case 'a': case 'A':
6937     case 'b': case 'B':
6938     case 'c': case 'C':
6939     case 'd': case 'D':
6940     case 'e': case 'E':
6941     case 'f': case 'F':
6942     case 'g': case 'G':
6943     case 'h': case 'H':
6944     case 'i': case 'I':
6945     case 'j': case 'J':
6946     case 'k': case 'K':
6947     case 'l': case 'L':
6948     case 'm': case 'M':
6949     case 'n': case 'N':
6950     case 'o': case 'O':
6951     case 'p': case 'P':
6952     case 'q': case 'Q':
6953     case 'r': case 'R':
6954     case 's': case 'S':
6955     case 't': case 'T':
6956     case 'u': case 'U':
6957               case 'V':
6958     case 'w': case 'W':
6959               case 'X':
6960     case 'y': case 'Y':
6961     case 'z': case 'Z':
6962
6963       keylookup: {
6964         bool anydelim;
6965         bool lex;
6966         I32 tmp;
6967         SV *sv;
6968         CV *cv;
6969         PADOFFSET off;
6970         OP *rv2cv_op;
6971
6972         lex = FALSE;
6973         orig_keyword = 0;
6974         off = 0;
6975         sv = NULL;
6976         cv = NULL;
6977         gv = NULL;
6978         gvp = NULL;
6979         rv2cv_op = NULL;
6980
6981         PL_bufptr = s;
6982         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6983
6984         /* Some keywords can be followed by any delimiter, including ':' */
6985         anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
6986
6987         /* x::* is just a word, unless x is "CORE" */
6988         if (!anydelim && *s == ':' && s[1] == ':') {
6989             if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
6990             goto just_a_word;
6991         }
6992
6993         d = s;
6994         while (d < PL_bufend && isSPACE(*d))
6995                 d++;    /* no comments skipped here, or s### is misparsed */
6996
6997         /* Is this a word before a => operator? */
6998         if (*d == '=' && d[1] == '>') {
6999           fat_arrow:
7000             CLINE;
7001             pl_yylval.opval
7002                 = newSVOP(OP_CONST, 0,
7003                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7004             pl_yylval.opval->op_private = OPpCONST_BARE;
7005             TERM(BAREWORD);
7006         }
7007
7008         /* Check for plugged-in keyword */
7009         {
7010             OP *o;
7011             int result;
7012             char *saved_bufptr = PL_bufptr;
7013             PL_bufptr = s;
7014             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
7015             s = PL_bufptr;
7016             if (result == KEYWORD_PLUGIN_DECLINE) {
7017                 /* not a plugged-in keyword */
7018                 PL_bufptr = saved_bufptr;
7019             } else if (result == KEYWORD_PLUGIN_STMT) {
7020                 pl_yylval.opval = o;
7021                 CLINE;
7022                 if (!PL_nexttoke) PL_expect = XSTATE;
7023                 return REPORT(PLUGSTMT);
7024             } else if (result == KEYWORD_PLUGIN_EXPR) {
7025                 pl_yylval.opval = o;
7026                 CLINE;
7027                 if (!PL_nexttoke) PL_expect = XOPERATOR;
7028                 return REPORT(PLUGEXPR);
7029             } else {
7030                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
7031                                         PL_tokenbuf);
7032             }
7033         }
7034
7035         /* Check for built-in keyword */
7036         tmp = keyword(PL_tokenbuf, len, 0);
7037
7038         /* Is this a label? */
7039         if (!anydelim && PL_expect == XSTATE
7040               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7041             s = d + 1;
7042             pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
7043             pl_yylval.pval[len] = '\0';
7044             pl_yylval.pval[len+1] = UTF ? 1 : 0;
7045             CLINE;
7046             TOKEN(LABEL);
7047         }
7048
7049         /* Check for lexical sub */
7050         if (PL_expect != XOPERATOR) {
7051             char tmpbuf[sizeof PL_tokenbuf + 1];
7052             *tmpbuf = '&';
7053             Copy(PL_tokenbuf, tmpbuf+1, len, char);
7054             off = pad_findmy_pvn(tmpbuf, len+1, 0);
7055             if (off != NOT_IN_PAD) {
7056                 assert(off); /* we assume this is boolean-true below */
7057                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7058                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
7059                     HEK * const stashname = HvNAME_HEK(stash);
7060                     sv = newSVhek(stashname);
7061                     sv_catpvs(sv, "::");
7062                     sv_catpvn_flags(sv, PL_tokenbuf, len,
7063                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
7064                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7065                                     SVt_PVCV);
7066                     off = 0;
7067                     if (!gv) {
7068                         sv_free(sv);
7069                         sv = NULL;
7070                         goto just_a_word;
7071                     }
7072                 }
7073                 else {
7074                     rv2cv_op = newOP(OP_PADANY, 0);
7075                     rv2cv_op->op_targ = off;
7076                     cv = find_lexical_cv(off);
7077                 }
7078                 lex = TRUE;
7079                 goto just_a_word;
7080             }
7081             off = 0;
7082         }
7083
7084         if (tmp < 0) {                  /* second-class keyword? */
7085             GV *ogv = NULL;     /* override (winner) */
7086             GV *hgv = NULL;     /* hidden (loser) */
7087             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7088                 CV *cv;
7089                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7090                                             (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
7091                                             SVt_PVCV))
7092                     && (cv = GvCVu(gv)))
7093                 {
7094                     if (GvIMPORTED_CV(gv))
7095                         ogv = gv;
7096                     else if (! CvMETHOD(cv))
7097                         hgv = gv;
7098                 }
7099                 if (!ogv
7100                     && (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7101                                                               len, FALSE))
7102                     && (gv = *gvp)
7103                     && (isGV_with_GP(gv)
7104                         ? GvCVu(gv) && GvIMPORTED_CV(gv)
7105                         :   SvPCS_IMPORTED(gv)
7106                         && (gv_init(gv, PL_globalstash, PL_tokenbuf,
7107                                                                  len, 0), 1)))
7108                 {
7109                     ogv = gv;
7110                 }
7111             }
7112             if (ogv) {
7113                 orig_keyword = tmp;
7114                 tmp = 0;                /* overridden by import or by GLOBAL */
7115             }
7116             else if (gv && !gvp
7117                      && -tmp==KEY_lock  /* XXX generalizable kludge */
7118                      && GvCVu(gv))
7119             {
7120                 tmp = 0;                /* any sub overrides "weak" keyword */
7121             }
7122             else {                      /* no override */
7123                 tmp = -tmp;
7124                 if (tmp == KEY_dump) {
7125                     Perl_ck_warner_d(aTHX_ packWARN2(WARN_MISC,WARN_DEPRECATED),
7126                                      "dump() better written as CORE::dump(). "
7127                                      "dump() will no longer be available "
7128                                      "in Perl 5.30");
7129                 }
7130                 gv = NULL;
7131                 gvp = 0;
7132                 if (hgv && tmp != KEY_x)        /* never ambiguous */
7133                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7134                                    "Ambiguous call resolved as CORE::%s(), "
7135                                    "qualify as such or use &",
7136                                    GvENAME(hgv));
7137             }
7138         }
7139
7140         if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7141          && (!anydelim || *s != '#')) {
7142             /* no override, and not s### either; skipspace is safe here
7143              * check for => on following line */
7144             bool arrow;
7145             STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7146             STRLEN   soff = s         - SvPVX(PL_linestr);
7147             s = peekspace(s);
7148             arrow = *s == '=' && s[1] == '>';
7149             PL_bufptr = SvPVX(PL_linestr) + bufoff;
7150             s         = SvPVX(PL_linestr) +   soff;
7151             if (arrow)
7152                 goto fat_arrow;
7153         }
7154
7155       reserved_word:
7156         switch (tmp) {
7157
7158             /* Trade off - by using this evil construction we can pull the
7159                variable gv into the block labelled keylookup. If not, then
7160                we have to give it function scope so that the goto from the
7161                earlier ':' case doesn't bypass the initialisation.  */
7162             just_a_word_zero_gv:
7163                 sv = NULL;
7164                 cv = NULL;
7165                 gv = NULL;
7166                 gvp = NULL;
7167                 rv2cv_op = NULL;
7168                 orig_keyword = 0;
7169                 lex = 0;
7170                 off = 0;
7171         default:                        /* not a keyword */
7172           just_a_word: {
7173                 int pkgname = 0;
7174                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7175                 bool safebw;
7176
7177
7178                 /* Get the rest if it looks like a package qualifier */
7179
7180                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7181                     STRLEN morelen;
7182                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7183                                   TRUE, &morelen);
7184                     if (!morelen)
7185                         Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7186                                 UTF8fARG(UTF, len, PL_tokenbuf),
7187                                 *s == '\'' ? "'" : "::");
7188                     len += morelen;
7189                     pkgname = 1;
7190                 }
7191
7192                 if (PL_expect == XOPERATOR) {
7193                     if (PL_bufptr == PL_linestart) {
7194                         CopLINE_dec(PL_curcop);
7195                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7196                         CopLINE_inc(PL_curcop);
7197                     }
7198                     else
7199                         no_op("Bareword",s);
7200                 }
7201
7202                 /* See if the name is "Foo::",
7203                    in which case Foo is a bareword
7204                    (and a package name). */
7205
7206                 if (len > 2
7207                     && PL_tokenbuf[len - 2] == ':'
7208                     && PL_tokenbuf[len - 1] == ':')
7209                 {
7210                     if (ckWARN(WARN_BAREWORD)
7211                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7212                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7213                                     "Bareword \"%" UTF8f
7214                                     "\" refers to nonexistent package",
7215                                     UTF8fARG(UTF, len, PL_tokenbuf));
7216                     len -= 2;
7217                     PL_tokenbuf[len] = '\0';
7218                     gv = NULL;
7219                     gvp = 0;
7220                     safebw = TRUE;
7221                 }
7222                 else {
7223                     safebw = FALSE;
7224                 }
7225
7226                 /* if we saw a global override before, get the right name */
7227
7228                 if (!sv)
7229                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7230                                                 len);
7231                 if (gvp) {
7232                     SV * const tmp_sv = sv;
7233                     sv = newSVpvs("CORE::GLOBAL::");
7234                     sv_catsv(sv, tmp_sv);
7235                     SvREFCNT_dec(tmp_sv);
7236                 }
7237
7238
7239                 /* Presume this is going to be a bareword of some sort. */
7240                 CLINE;
7241                 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
7242                 pl_yylval.opval->op_private = OPpCONST_BARE;
7243
7244                 /* And if "Foo::", then that's what it certainly is. */
7245                 if (safebw)
7246                     goto safe_bareword;
7247
7248                 if (!off)
7249                 {
7250                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7251                     const_op->op_private = OPpCONST_BARE;
7252                     rv2cv_op =
7253                         newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7254                     cv = lex
7255                         ? isGV(gv)
7256                             ? GvCV(gv)
7257                             : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
7258                                 ? (CV *)SvRV(gv)
7259                                 : ((CV *)gv)
7260                         : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
7261                 }
7262
7263                 /* Use this var to track whether intuit_method has been
7264                    called.  intuit_method returns 0 or > 255.  */
7265                 tmp = 1;
7266
7267                 /* See if it's the indirect object for a list operator. */
7268
7269                 if (PL_oldoldbufptr
7270                     && PL_oldoldbufptr < PL_bufptr
7271                     && (PL_oldoldbufptr == PL_last_lop
7272                         || PL_oldoldbufptr == PL_last_uni)
7273                     && /* NO SKIPSPACE BEFORE HERE! */
7274                        (PL_expect == XREF
7275                         || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7276                                                                == OA_FILEREF))
7277                 {
7278                     bool immediate_paren = *s == '(';
7279                     SSize_t s_off;
7280
7281                     /* (Now we can afford to cross potential line boundary.) */
7282                     s = skipspace(s);
7283
7284                     /* intuit_method() can indirectly call lex_next_chunk(),
7285                      * invalidating s
7286                      */
7287                     s_off = s - SvPVX(PL_linestr);
7288                     /* Two barewords in a row may indicate method call. */
7289                     if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7290                             || *s == '$')
7291                         && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
7292                     {
7293                         /* the code at method: doesn't use s */
7294                         goto method;
7295                     }
7296                     s = SvPVX(PL_linestr) + s_off;
7297
7298                     /* If not a declared subroutine, it's an indirect object. */
7299                     /* (But it's an indir obj regardless for sort.) */
7300                     /* Also, if "_" follows a filetest operator, it's a bareword */
7301
7302                     if (
7303                         ( !immediate_paren && (PL_last_lop_op == OP_SORT
7304                          || (!cv
7305                              && (PL_last_lop_op != OP_MAPSTART
7306                                  && PL_last_lop_op != OP_GREPSTART))))
7307                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7308                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7309                                                             == OA_FILESTATOP))
7310                        )
7311                     {
7312                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7313                         goto bareword;
7314                     }
7315                 }
7316
7317                 PL_expect = XOPERATOR;
7318                 s = skipspace(s);
7319
7320                 /* Is this a word before a => operator? */
7321                 if (*s == '=' && s[1] == '>' && !pkgname) {
7322                     op_free(rv2cv_op);
7323                     CLINE;
7324                     if (gvp || (lex && !off)) {
7325                         assert (cSVOPx(pl_yylval.opval)->op_sv == sv);
7326                         /* This is our own scalar, created a few lines
7327                            above, so this is safe. */
7328                         SvREADONLY_off(sv);
7329                         sv_setpv(sv, PL_tokenbuf);
7330                         if (UTF && !IN_BYTES
7331                          && is_utf8_string((U8*)PL_tokenbuf, len))
7332                               SvUTF8_on(sv);
7333                         SvREADONLY_on(sv);
7334                     }
7335                     TERM(BAREWORD);
7336                 }
7337
7338                 /* If followed by a paren, it's certainly a subroutine. */
7339                 if (*s == '(') {
7340                     CLINE;
7341                     if (cv) {
7342                         d = s + 1;
7343                         while (SPACE_OR_TAB(*d))
7344                             d++;
7345                         if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7346                             s = d + 1;
7347                             goto its_constant;
7348                         }
7349                     }
7350                     NEXTVAL_NEXTTOKE.opval =
7351                         off ? rv2cv_op : pl_yylval.opval;
7352                     if (off)
7353                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
7354                     else op_free(rv2cv_op),        force_next(BAREWORD);
7355                     pl_yylval.ival = 0;
7356                     TOKEN('&');
7357                 }
7358
7359                 /* If followed by var or block, call it a method (unless sub) */
7360
7361                 if ((*s == '$' || *s == '{') && !cv) {
7362                     op_free(rv2cv_op);
7363                     PL_last_lop = PL_oldbufptr;
7364                     PL_last_lop_op = OP_METHOD;
7365                     if (!PL_lex_allbrackets
7366                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7367                     {
7368                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7369                     }
7370                     PL_expect = XBLOCKTERM;
7371                     PL_bufptr = s;
7372                     return REPORT(METHOD);
7373                 }
7374
7375                 /* If followed by a bareword, see if it looks like indir obj. */
7376
7377                 if (   tmp == 1
7378                     && !orig_keyword
7379                     && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7380                     && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
7381                 {
7382                   method:
7383                     if (lex && !off) {
7384                         assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
7385                         SvREADONLY_off(sv);
7386                         sv_setpvn(sv, PL_tokenbuf, len);
7387                         if (UTF && !IN_BYTES
7388                          && is_utf8_string((U8*)PL_tokenbuf, len))
7389                             SvUTF8_on (sv);
7390                         else SvUTF8_off(sv);
7391                     }
7392                     op_free(rv2cv_op);
7393                     if (tmp == METHOD && !PL_lex_allbrackets
7394                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7395                     {
7396                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7397                     }
7398                     return REPORT(tmp);
7399                 }
7400
7401                 /* Not a method, so call it a subroutine (if defined) */
7402
7403                 if (cv) {
7404                     /* Check for a constant sub */
7405                     if ((sv = cv_const_sv_or_av(cv))) {
7406                   its_constant:
7407                         op_free(rv2cv_op);
7408                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7409                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7410                         if (SvTYPE(sv) == SVt_PVAV)
7411                             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7412                                                       pl_yylval.opval);
7413                         else {
7414                             pl_yylval.opval->op_private = 0;
7415                             pl_yylval.opval->op_folded = 1;
7416                             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7417                         }
7418                         TOKEN(BAREWORD);
7419                     }
7420
7421                     op_free(pl_yylval.opval);
7422                     pl_yylval.opval =
7423                         off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7424                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7425                     PL_last_lop = PL_oldbufptr;
7426                     PL_last_lop_op = OP_ENTERSUB;
7427                     /* Is there a prototype? */
7428                     if (
7429                         SvPOK(cv))
7430                     {
7431                         STRLEN protolen = CvPROTOLEN(cv);
7432                         const char *proto = CvPROTO(cv);
7433                         bool optional;
7434                         proto = S_strip_spaces(aTHX_ proto, &protolen);
7435                         if (!protolen)
7436                             TERM(FUNC0SUB);
7437                         if ((optional = *proto == ';'))
7438                           do
7439                             proto++;
7440                           while (*proto == ';');
7441                         if (
7442                             (
7443                                 (
7444                                     *proto == '$' || *proto == '_'
7445                                  || *proto == '*' || *proto == '+'
7446                                 )
7447                              && proto[1] == '\0'
7448                             )
7449                          || (
7450                              *proto == '\\' && proto[1] && proto[2] == '\0'
7451                             )
7452                         )
7453                             UNIPROTO(UNIOPSUB,optional);
7454                         if (*proto == '\\' && proto[1] == '[') {
7455                             const char *p = proto + 2;
7456                             while(*p && *p != ']')
7457                                 ++p;
7458                             if(*p == ']' && !p[1])
7459                                 UNIPROTO(UNIOPSUB,optional);
7460                         }
7461                         if (*proto == '&' && *s == '{') {
7462                             if (PL_curstash)
7463                                 sv_setpvs(PL_subname, "__ANON__");
7464                             else
7465                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7466                             if (!PL_lex_allbrackets
7467                                 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7468                             {
7469                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7470                             }
7471                             PREBLOCK(LSTOPSUB);
7472                         }
7473                     }
7474                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7475                     PL_expect = XTERM;
7476                     force_next(off ? PRIVATEREF : BAREWORD);
7477                     if (!PL_lex_allbrackets
7478                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7479                     {
7480                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7481                     }
7482                     TOKEN(NOAMP);
7483                 }
7484
7485                 /* Call it a bare word */
7486
7487                 if (PL_hints & HINT_STRICT_SUBS)
7488                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
7489                 else {
7490                 bareword:
7491                     /* after "print" and similar functions (corresponding to
7492                      * "F? L" in opcode.pl), whatever wasn't already parsed as
7493                      * a filehandle should be subject to "strict subs".
7494                      * Likewise for the optional indirect-object argument to system
7495                      * or exec, which can't be a bareword */
7496                     if ((PL_last_lop_op == OP_PRINT
7497                             || PL_last_lop_op == OP_PRTF
7498                             || PL_last_lop_op == OP_SAY
7499                             || PL_last_lop_op == OP_SYSTEM
7500                             || PL_last_lop_op == OP_EXEC)
7501                             && (PL_hints & HINT_STRICT_SUBS))
7502                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7503                     if (lastchar != '-') {
7504                         if (ckWARN(WARN_RESERVED)) {
7505                             d = PL_tokenbuf;
7506                             while (isLOWER(*d))
7507                                 d++;
7508                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7509                             {
7510                                 /* PL_warn_reserved is constant */
7511                                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
7512                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7513                                        PL_tokenbuf);
7514                                 GCC_DIAG_RESTORE;
7515                             }
7516                         }
7517                     }
7518                 }
7519                 op_free(rv2cv_op);
7520
7521             safe_bareword:
7522                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7523                  && saw_infix_sigil) {
7524                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7525                                      "Operator or semicolon missing before %c%" UTF8f,
7526                                      lastchar,
7527                                      UTF8fARG(UTF, strlen(PL_tokenbuf),
7528                                               PL_tokenbuf));
7529                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7530                                      "Ambiguous use of %c resolved as operator %c",
7531                                      lastchar, lastchar);
7532                 }
7533                 TOKEN(BAREWORD);
7534             }
7535
7536         case KEY___FILE__:
7537             FUN0OP(
7538                 newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7539             );
7540
7541         case KEY___LINE__:
7542             FUN0OP(
7543                 newSVOP(OP_CONST, 0,
7544                     Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7545             );
7546
7547         case KEY___PACKAGE__:
7548             FUN0OP(
7549                 newSVOP(OP_CONST, 0,
7550                                         (PL_curstash
7551                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7552                                          : &PL_sv_undef))
7553             );
7554
7555         case KEY___DATA__:
7556         case KEY___END__: {
7557             GV *gv;
7558             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7559                 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7560                                         ? PL_curstash
7561                                         : PL_defstash;
7562                 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7563                 if (!isGV(gv))
7564                     gv_init(gv,stash,"DATA",4,0);
7565                 GvMULTI_on(gv);
7566                 if (!GvIO(gv))
7567                     GvIOp(gv) = newIO();
7568                 IoIFP(GvIOp(gv)) = PL_rsfp;
7569 #if defined(HAS_FCNTL) && defined(F_SETFD) && defined(FD_CLOEXEC)
7570                 {
7571                     const int fd = PerlIO_fileno(PL_rsfp);
7572                     if (fd >= 3) {
7573                         fcntl(fd,F_SETFD, FD_CLOEXEC);
7574                     }
7575                 }
7576 #endif
7577                 /* Mark this internal pseudo-handle as clean */
7578                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7579                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7580                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7581                 else
7582                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7583 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7584                 /* if the script was opened in binmode, we need to revert
7585                  * it to text mode for compatibility; but only iff it has CRs
7586                  * XXX this is a questionable hack at best. */
7587                 if (PL_bufend-PL_bufptr > 2
7588                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7589                 {
7590                     Off_t loc = 0;
7591                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7592                         loc = PerlIO_tell(PL_rsfp);
7593                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7594                     }
7595 #ifdef NETWARE
7596                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7597 #else
7598                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7599 #endif  /* NETWARE */
7600                         if (loc > 0)
7601                             PerlIO_seek(PL_rsfp, loc, 0);
7602                     }
7603                 }
7604 #endif
7605 #ifdef PERLIO_LAYERS
7606                 if (!IN_BYTES) {
7607                     if (UTF)
7608                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7609                 }
7610 #endif
7611                 PL_rsfp = NULL;
7612             }
7613             goto fake_eof;
7614         }
7615
7616         case KEY___SUB__:
7617             FUN0OP(CvCLONE(PL_compcv)
7618                         ? newOP(OP_RUNCV, 0)
7619                         : newPVOP(OP_RUNCV,0,NULL));
7620
7621         case KEY_AUTOLOAD:
7622         case KEY_DESTROY:
7623         case KEY_BEGIN:
7624         case KEY_UNITCHECK:
7625         case KEY_CHECK:
7626         case KEY_INIT:
7627         case KEY_END:
7628             if (PL_expect == XSTATE) {
7629                 s = PL_bufptr;
7630                 goto really_sub;
7631             }
7632             goto just_a_word;
7633
7634         case_KEY_CORE:
7635             {
7636                 STRLEN olen = len;
7637                 d = s;
7638                 s += 2;
7639                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7640                 if ((*s == ':' && s[1] == ':')
7641                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7642                 {
7643                     s = d;
7644                     len = olen;
7645                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7646                     goto just_a_word;
7647                 }
7648                 if (!tmp)
7649                     Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
7650                                       UTF8fARG(UTF, len, PL_tokenbuf));
7651                 if (tmp < 0)
7652                     tmp = -tmp;
7653                 else if (tmp == KEY_require || tmp == KEY_do
7654                       || tmp == KEY_glob)
7655                     /* that's a way to remember we saw "CORE::" */
7656                     orig_keyword = tmp;
7657                 goto reserved_word;
7658             }
7659
7660         case KEY_abs:
7661             UNI(OP_ABS);
7662
7663         case KEY_alarm:
7664             UNI(OP_ALARM);
7665
7666         case KEY_accept:
7667             LOP(OP_ACCEPT,XTERM);
7668
7669         case KEY_and:
7670             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7671                 return REPORT(0);
7672             OPERATOR(ANDOP);
7673
7674         case KEY_atan2:
7675             LOP(OP_ATAN2,XTERM);
7676
7677         case KEY_bind:
7678             LOP(OP_BIND,XTERM);
7679
7680         case KEY_binmode:
7681             LOP(OP_BINMODE,XTERM);
7682
7683         case KEY_bless:
7684             LOP(OP_BLESS,XTERM);
7685
7686         case KEY_break:
7687             FUN0(OP_BREAK);
7688
7689         case KEY_chop:
7690             UNI(OP_CHOP);
7691
7692         case KEY_continue:
7693                     /* We have to disambiguate the two senses of
7694                       "continue". If the next token is a '{' then
7695                       treat it as the start of a continue block;
7696                       otherwise treat it as a control operator.
7697                      */
7698                     s = skipspace(s);
7699                     if (*s == '{')
7700             PREBLOCK(CONTINUE);
7701                     else
7702                         FUN0(OP_CONTINUE);
7703
7704         case KEY_chdir:
7705             /* may use HOME */
7706             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7707             UNI(OP_CHDIR);
7708
7709         case KEY_close:
7710             UNI(OP_CLOSE);
7711
7712         case KEY_closedir:
7713             UNI(OP_CLOSEDIR);
7714
7715         case KEY_cmp:
7716             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7717                 return REPORT(0);
7718             Eop(OP_SCMP);
7719
7720         case KEY_caller:
7721             UNI(OP_CALLER);
7722
7723         case KEY_crypt:
7724 #ifdef FCRYPT
7725             if (!PL_cryptseen) {
7726                 PL_cryptseen = TRUE;
7727                 init_des();
7728             }
7729 #endif
7730             LOP(OP_CRYPT,XTERM);
7731
7732         case KEY_chmod:
7733             LOP(OP_CHMOD,XTERM);
7734
7735         case KEY_chown:
7736             LOP(OP_CHOWN,XTERM);
7737
7738         case KEY_connect:
7739             LOP(OP_CONNECT,XTERM);
7740
7741         case KEY_chr:
7742             UNI(OP_CHR);
7743
7744         case KEY_cos:
7745             UNI(OP_COS);
7746
7747         case KEY_chroot:
7748             UNI(OP_CHROOT);
7749
7750         case KEY_default:
7751             PREBLOCK(DEFAULT);
7752
7753         case KEY_do:
7754             s = skipspace(s);
7755             if (*s == '{')
7756                 PRETERMBLOCK(DO);
7757             if (*s != '\'') {
7758                 *PL_tokenbuf = '&';
7759                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7760                               1, &len);
7761                 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7762                  && !keyword(PL_tokenbuf + 1, len, 0)) {
7763                     SSize_t off = s-SvPVX(PL_linestr);
7764                     d = skipspace(d);
7765                     s = SvPVX(PL_linestr)+off;
7766                     if (*d == '(') {
7767                         force_ident_maybe_lex('&');
7768                         s = d;
7769                     }
7770                 }
7771             }
7772             if (orig_keyword == KEY_do) {
7773                 orig_keyword = 0;
7774                 pl_yylval.ival = 1;
7775             }
7776             else
7777                 pl_yylval.ival = 0;
7778             OPERATOR(DO);
7779
7780         case KEY_die:
7781             PL_hints |= HINT_BLOCK_SCOPE;
7782             LOP(OP_DIE,XTERM);
7783
7784         case KEY_defined:
7785             UNI(OP_DEFINED);
7786
7787         case KEY_delete:
7788             UNI(OP_DELETE);
7789
7790         case KEY_dbmopen:
7791             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7792                               STR_WITH_LEN("NDBM_File::"),
7793                               STR_WITH_LEN("DB_File::"),
7794                               STR_WITH_LEN("GDBM_File::"),
7795                               STR_WITH_LEN("SDBM_File::"),
7796                               STR_WITH_LEN("ODBM_File::"),
7797                               NULL);
7798             LOP(OP_DBMOPEN,XTERM);
7799
7800         case KEY_dbmclose:
7801             UNI(OP_DBMCLOSE);
7802
7803         case KEY_dump:
7804             LOOPX(OP_DUMP);
7805
7806         case KEY_else:
7807             PREBLOCK(ELSE);
7808
7809         case KEY_elsif:
7810             pl_yylval.ival = CopLINE(PL_curcop);
7811             OPERATOR(ELSIF);
7812
7813         case KEY_eq:
7814             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7815                 return REPORT(0);
7816             Eop(OP_SEQ);
7817
7818         case KEY_exists:
7819             UNI(OP_EXISTS);
7820
7821         case KEY_exit:
7822             UNI(OP_EXIT);
7823
7824         case KEY_eval:
7825             s = skipspace(s);
7826             if (*s == '{') { /* block eval */
7827                 PL_expect = XTERMBLOCK;
7828                 UNIBRACK(OP_ENTERTRY);
7829             }
7830             else { /* string eval */
7831                 PL_expect = XTERM;
7832                 UNIBRACK(OP_ENTEREVAL);
7833             }
7834
7835         case KEY_evalbytes:
7836             PL_expect = XTERM;
7837             UNIBRACK(-OP_ENTEREVAL);
7838
7839         case KEY_eof:
7840             UNI(OP_EOF);
7841
7842         case KEY_exp:
7843             UNI(OP_EXP);
7844
7845         case KEY_each:
7846             UNI(OP_EACH);
7847
7848         case KEY_exec:
7849             LOP(OP_EXEC,XREF);
7850
7851         case KEY_endhostent:
7852             FUN0(OP_EHOSTENT);
7853
7854         case KEY_endnetent:
7855             FUN0(OP_ENETENT);
7856
7857         case KEY_endservent:
7858             FUN0(OP_ESERVENT);
7859
7860         case KEY_endprotoent:
7861             FUN0(OP_EPROTOENT);
7862
7863         case KEY_endpwent:
7864             FUN0(OP_EPWENT);
7865
7866         case KEY_endgrent:
7867             FUN0(OP_EGRENT);
7868
7869         case KEY_for:
7870         case KEY_foreach:
7871             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7872                 return REPORT(0);
7873             pl_yylval.ival = CopLINE(PL_curcop);
7874             s = skipspace(s);
7875             if (   PL_expect == XSTATE
7876                 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
7877             {
7878                 char *p = s;
7879                 SSize_t s_off = s - SvPVX(PL_linestr);
7880
7881                 if ((PL_bufend - p) >= 3
7882                     && strEQs(p, "my") && isSPACE(*(p + 2)))
7883                 {
7884                     p += 2;
7885                 }
7886                 else if ((PL_bufend - p) >= 4
7887                          && strEQs(p, "our") && isSPACE(*(p + 3)))
7888                     p += 3;
7889                 p = skipspace(p);
7890                 /* skip optional package name, as in "for my abc $x (..)" */
7891                 if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
7892                     p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7893                     p = skipspace(p);
7894                 }
7895                 if (*p != '$' && *p != '\\')
7896                     Perl_croak(aTHX_ "Missing $ on loop variable");
7897
7898                 /* The buffer may have been reallocated, update s */
7899                 s = SvPVX(PL_linestr) + s_off;
7900             }
7901             OPERATOR(FOR);
7902
7903         case KEY_formline:
7904             LOP(OP_FORMLINE,XTERM);
7905
7906         case KEY_fork:
7907             FUN0(OP_FORK);
7908
7909         case KEY_fc:
7910             UNI(OP_FC);
7911
7912         case KEY_fcntl:
7913             LOP(OP_FCNTL,XTERM);
7914
7915         case KEY_fileno:
7916             UNI(OP_FILENO);
7917
7918         case KEY_flock:
7919             LOP(OP_FLOCK,XTERM);
7920
7921         case KEY_gt:
7922             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7923                 return REPORT(0);
7924             Rop(OP_SGT);
7925
7926         case KEY_ge:
7927             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7928                 return REPORT(0);
7929             Rop(OP_SGE);
7930
7931         case KEY_grep:
7932             LOP(OP_GREPSTART, XREF);
7933
7934         case KEY_goto:
7935             LOOPX(OP_GOTO);
7936
7937         case KEY_gmtime:
7938             UNI(OP_GMTIME);
7939
7940         case KEY_getc:
7941             UNIDOR(OP_GETC);
7942
7943         case KEY_getppid:
7944             FUN0(OP_GETPPID);
7945
7946         case KEY_getpgrp:
7947             UNI(OP_GETPGRP);
7948
7949         case KEY_getpriority:
7950             LOP(OP_GETPRIORITY,XTERM);
7951
7952         case KEY_getprotobyname:
7953             UNI(OP_GPBYNAME);
7954
7955         case KEY_getprotobynumber:
7956             LOP(OP_GPBYNUMBER,XTERM);
7957
7958         case KEY_getprotoent:
7959             FUN0(OP_GPROTOENT);
7960
7961         case KEY_getpwent:
7962             FUN0(OP_GPWENT);
7963
7964         case KEY_getpwnam:
7965             UNI(OP_GPWNAM);
7966
7967         case KEY_getpwuid:
7968             UNI(OP_GPWUID);
7969
7970         case KEY_getpeername:
7971             UNI(OP_GETPEERNAME);
7972
7973         case KEY_gethostbyname:
7974             UNI(OP_GHBYNAME);
7975
7976         case KEY_gethostbyaddr:
7977             LOP(OP_GHBYADDR,XTERM);
7978
7979         case KEY_gethostent:
7980             FUN0(OP_GHOSTENT);
7981
7982         case KEY_getnetbyname:
7983             UNI(OP_GNBYNAME);
7984
7985         case KEY_getnetbyaddr:
7986             LOP(OP_GNBYADDR,XTERM);
7987
7988         case KEY_getnetent:
7989             FUN0(OP_GNETENT);
7990
7991         case KEY_getservbyname:
7992             LOP(OP_GSBYNAME,XTERM);
7993
7994         case KEY_getservbyport:
7995             LOP(OP_GSBYPORT,XTERM);
7996
7997         case KEY_getservent:
7998             FUN0(OP_GSERVENT);
7999
8000         case KEY_getsockname:
8001             UNI(OP_GETSOCKNAME);
8002
8003         case KEY_getsockopt:
8004             LOP(OP_GSOCKOPT,XTERM);
8005
8006         case KEY_getgrent:
8007             FUN0(OP_GGRENT);
8008
8009         case KEY_getgrnam:
8010             UNI(OP_GGRNAM);
8011
8012         case KEY_getgrgid:
8013             UNI(OP_GGRGID);
8014
8015         case KEY_getlogin:
8016             FUN0(OP_GETLOGIN);
8017
8018         case KEY_given:
8019             pl_yylval.ival = CopLINE(PL_curcop);
8020             Perl_ck_warner_d(aTHX_
8021                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8022                 "given is experimental");
8023             OPERATOR(GIVEN);
8024
8025         case KEY_glob:
8026             LOP(
8027              orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
8028              XTERM
8029             );
8030
8031         case KEY_hex:
8032             UNI(OP_HEX);
8033
8034         case KEY_if:
8035             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8036                 return REPORT(0);
8037             pl_yylval.ival = CopLINE(PL_curcop);
8038             OPERATOR(IF);
8039
8040         case KEY_index:
8041             LOP(OP_INDEX,XTERM);
8042
8043         case KEY_int:
8044             UNI(OP_INT);
8045
8046         case KEY_ioctl:
8047             LOP(OP_IOCTL,XTERM);
8048
8049         case KEY_join:
8050             LOP(OP_JOIN,XTERM);
8051
8052         case KEY_keys:
8053             UNI(OP_KEYS);
8054
8055         case KEY_kill:
8056             LOP(OP_KILL,XTERM);
8057
8058         case KEY_last:
8059             LOOPX(OP_LAST);
8060
8061         case KEY_lc:
8062             UNI(OP_LC);
8063
8064         case KEY_lcfirst:
8065             UNI(OP_LCFIRST);
8066
8067         case KEY_local:
8068             OPERATOR(LOCAL);
8069
8070         case KEY_length:
8071             UNI(OP_LENGTH);
8072
8073         case KEY_lt:
8074             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8075                 return REPORT(0);
8076             Rop(OP_SLT);
8077
8078         case KEY_le:
8079             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8080                 return REPORT(0);
8081             Rop(OP_SLE);
8082
8083         case KEY_localtime:
8084             UNI(OP_LOCALTIME);
8085
8086         case KEY_log:
8087             UNI(OP_LOG);
8088
8089         case KEY_link:
8090             LOP(OP_LINK,XTERM);
8091
8092         case KEY_listen:
8093             LOP(OP_LISTEN,XTERM);
8094
8095         case KEY_lock:
8096             UNI(OP_LOCK);
8097
8098         case KEY_lstat:
8099             UNI(OP_LSTAT);
8100
8101         case KEY_m:
8102             s = scan_pat(s,OP_MATCH);
8103             TERM(sublex_start());
8104
8105         case KEY_map:
8106             LOP(OP_MAPSTART, XREF);
8107
8108         case KEY_mkdir:
8109             LOP(OP_MKDIR,XTERM);
8110
8111         case KEY_msgctl:
8112             LOP(OP_MSGCTL,XTERM);
8113
8114         case KEY_msgget:
8115             LOP(OP_MSGGET,XTERM);
8116
8117         case KEY_msgrcv:
8118             LOP(OP_MSGRCV,XTERM);
8119
8120         case KEY_msgsnd:
8121             LOP(OP_MSGSND,XTERM);
8122
8123         case KEY_our:
8124         case KEY_my:
8125         case KEY_state:
8126             if (PL_in_my) {
8127                 PL_bufptr = s;
8128                 yyerror(Perl_form(aTHX_
8129                                   "Can't redeclare \"%s\" in \"%s\"",
8130                                    tmp      == KEY_my    ? "my" :
8131                                    tmp      == KEY_state ? "state" : "our",
8132                                    PL_in_my == KEY_my    ? "my" :
8133                                    PL_in_my == KEY_state ? "state" : "our"));
8134             }
8135             PL_in_my = (U16)tmp;
8136             s = skipspace(s);
8137             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8138                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8139                 if (len == 3 && strEQs(PL_tokenbuf, "sub"))
8140                     goto really_sub;
8141                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8142                 if (!PL_in_my_stash) {
8143                     char tmpbuf[1024];
8144                     int len;
8145                     PL_bufptr = s;
8146                     len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8147                     PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
8148                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8149                 }
8150             }
8151             else if (*s == '\\') {
8152                 if (!FEATURE_MYREF_IS_ENABLED)
8153                     Perl_croak(aTHX_ "The experimental declared_refs "
8154                                      "feature is not enabled");
8155                 Perl_ck_warner_d(aTHX_
8156                      packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
8157                     "Declaring references is experimental");
8158             }
8159             OPERATOR(MY);
8160
8161         case KEY_next:
8162             LOOPX(OP_NEXT);
8163
8164         case KEY_ne:
8165             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8166                 return REPORT(0);
8167             Eop(OP_SNE);
8168
8169         case KEY_no:
8170             s = tokenize_use(0, s);
8171             TOKEN(USE);
8172
8173         case KEY_not:
8174             if (*s == '(' || (s = skipspace(s), *s == '('))
8175                 FUN1(OP_NOT);
8176             else {
8177                 if (!PL_lex_allbrackets
8178                     && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8179                 {
8180                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8181                 }
8182                 OPERATOR(NOTOP);
8183             }
8184
8185         case KEY_open:
8186             s = skipspace(s);
8187             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8188                 const char *t;
8189                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8190                               &len);
8191                 for (t=d; isSPACE(*t);)
8192                     t++;
8193                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8194                     /* [perl #16184] */
8195                     && !(t[0] == '=' && t[1] == '>')
8196                     && !(t[0] == ':' && t[1] == ':')
8197                     && !keyword(s, d-s, 0)
8198                 ) {
8199                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8200                        "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8201                         UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8202                 }
8203             }
8204             LOP(OP_OPEN,XTERM);
8205
8206         case KEY_or:
8207             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8208                 return REPORT(0);
8209             pl_yylval.ival = OP_OR;
8210             OPERATOR(OROP);
8211
8212         case KEY_ord:
8213             UNI(OP_ORD);
8214
8215         case KEY_oct:
8216             UNI(OP_OCT);
8217
8218         case KEY_opendir:
8219             LOP(OP_OPEN_DIR,XTERM);
8220
8221         case KEY_print:
8222             checkcomma(s,PL_tokenbuf,"filehandle");
8223             LOP(OP_PRINT,XREF);
8224
8225         case KEY_printf:
8226             checkcomma(s,PL_tokenbuf,"filehandle");
8227             LOP(OP_PRTF,XREF);
8228
8229         case KEY_prototype:
8230             UNI(OP_PROTOTYPE);
8231
8232         case KEY_push:
8233             LOP(OP_PUSH,XTERM);
8234
8235         case KEY_pop:
8236             UNIDOR(OP_POP);
8237
8238         case KEY_pos:
8239             UNIDOR(OP_POS);
8240
8241         case KEY_pack:
8242             LOP(OP_PACK,XTERM);
8243
8244         case KEY_package:
8245             s = force_word(s,BAREWORD,FALSE,TRUE);
8246             s = skipspace(s);
8247             s = force_strict_version(s);
8248             PREBLOCK(PACKAGE);
8249
8250         case KEY_pipe:
8251             LOP(OP_PIPE_OP,XTERM);
8252
8253         case KEY_q:
8254             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8255             if (!s)
8256                 missingterm(NULL);
8257             COPLINE_SET_FROM_MULTI_END;
8258             pl_yylval.ival = OP_CONST;
8259             TERM(sublex_start());
8260
8261         case KEY_quotemeta:
8262             UNI(OP_QUOTEMETA);
8263
8264         case KEY_qw: {
8265             OP *words = NULL;
8266             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8267             if (!s)
8268                 missingterm(NULL);
8269             COPLINE_SET_FROM_MULTI_END;
8270             PL_expect = XOPERATOR;
8271             if (SvCUR(PL_lex_stuff)) {
8272                 int warned_comma = !ckWARN(WARN_QW);
8273                 int warned_comment = warned_comma;
8274                 d = SvPV_force(PL_lex_stuff, len);
8275                 while (len) {
8276                     for (; isSPACE(*d) && len; --len, ++d)
8277                         /**/;
8278                     if (len) {
8279                         SV *sv;
8280                         const char *b = d;
8281                         if (!warned_comma || !warned_comment) {
8282                             for (; !isSPACE(*d) && len; --len, ++d) {
8283                                 if (!warned_comma && *d == ',') {
8284                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8285                                         "Possible attempt to separate words with commas");
8286                                     ++warned_comma;
8287                                 }
8288                                 else if (!warned_comment && *d == '#') {
8289                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8290                                         "Possible attempt to put comments in qw() list");
8291                                     ++warned_comment;
8292                                 }
8293                             }
8294                         }
8295                         else {
8296                             for (; !isSPACE(*d) && len; --len, ++d)
8297                                 /**/;
8298                         }
8299                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8300                         words = op_append_elem(OP_LIST, words,
8301                                             newSVOP(OP_CONST, 0, tokeq(sv)));
8302                     }
8303                 }
8304             }
8305             if (!words)
8306                 words = newNULLLIST();
8307             SvREFCNT_dec_NN(PL_lex_stuff);
8308             PL_lex_stuff = NULL;
8309             PL_expect = XOPERATOR;
8310             pl_yylval.opval = sawparens(words);
8311             TOKEN(QWLIST);
8312         }
8313
8314         case KEY_qq:
8315             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8316             if (!s)
8317                 missingterm(NULL);
8318             pl_yylval.ival = OP_STRINGIFY;
8319             if (SvIVX(PL_lex_stuff) == '\'')
8320                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
8321             TERM(sublex_start());
8322
8323         case KEY_qr:
8324             s = scan_pat(s,OP_QR);
8325             TERM(sublex_start());
8326
8327         case KEY_qx:
8328             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8329             if (!s)
8330                 missingterm(NULL);
8331             pl_yylval.ival = OP_BACKTICK;
8332             TERM(sublex_start());
8333
8334         case KEY_return:
8335             OLDLOP(OP_RETURN);
8336
8337         case KEY_require:
8338             s = skipspace(s);
8339             if (isDIGIT(*s)) {
8340                 s = force_version(s, FALSE);
8341             }
8342             else if (*s != 'v' || !isDIGIT(s[1])
8343                     || (s = force_version(s, TRUE), *s == 'v'))
8344             {
8345                 *PL_tokenbuf = '\0';
8346                 s = force_word(s,BAREWORD,TRUE,TRUE);
8347                 if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
8348                                            PL_tokenbuf + sizeof(PL_tokenbuf),
8349                                            UTF))
8350                 {
8351                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8352                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
8353                 }
8354                 else if (*s == '<')
8355                     yyerror("<> at require-statement should be quotes");
8356             }
8357             if (orig_keyword == KEY_require) {
8358                 orig_keyword = 0;
8359                 pl_yylval.ival = 1;
8360             }
8361             else
8362                 pl_yylval.ival = 0;
8363             PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
8364             PL_bufptr = s;
8365             PL_last_uni = PL_oldbufptr;
8366             PL_last_lop_op = OP_REQUIRE;
8367             s = skipspace(s);
8368             return REPORT( (int)REQUIRE );
8369
8370         case KEY_reset:
8371             UNI(OP_RESET);
8372
8373         case KEY_redo:
8374             LOOPX(OP_REDO);
8375
8376         case KEY_rename:
8377             LOP(OP_RENAME,XTERM);
8378
8379         case KEY_rand:
8380             UNI(OP_RAND);
8381
8382         case KEY_rmdir:
8383             UNI(OP_RMDIR);
8384
8385         case KEY_rindex:
8386             LOP(OP_RINDEX,XTERM);
8387
8388         case KEY_read:
8389             LOP(OP_READ,XTERM);
8390
8391         case KEY_readdir:
8392             UNI(OP_READDIR);
8393
8394         case KEY_readline:
8395             UNIDOR(OP_READLINE);
8396
8397         case KEY_readpipe:
8398             UNIDOR(OP_BACKTICK);
8399
8400         case KEY_rewinddir:
8401             UNI(OP_REWINDDIR);
8402
8403         case KEY_recv:
8404             LOP(OP_RECV,XTERM);
8405
8406         case KEY_reverse:
8407             LOP(OP_REVERSE,XTERM);
8408
8409         case KEY_readlink:
8410             UNIDOR(OP_READLINK);
8411
8412         case KEY_ref:
8413             UNI(OP_REF);
8414
8415         case KEY_s:
8416             s = scan_subst(s);
8417             if (pl_yylval.opval)
8418                 TERM(sublex_start());
8419             else
8420                 TOKEN(1);       /* force error */
8421
8422         case KEY_say:
8423             checkcomma(s,PL_tokenbuf,"filehandle");
8424             LOP(OP_SAY,XREF);
8425
8426         case KEY_chomp:
8427             UNI(OP_CHOMP);
8428
8429         case KEY_scalar:
8430             UNI(OP_SCALAR);
8431
8432         case KEY_select:
8433             LOP(OP_SELECT,XTERM);
8434
8435         case KEY_seek:
8436             LOP(OP_SEEK,XTERM);
8437
8438         case KEY_semctl:
8439             LOP(OP_SEMCTL,XTERM);
8440
8441         case KEY_semget:
8442             LOP(OP_SEMGET,XTERM);
8443
8444         case KEY_semop:
8445             LOP(OP_SEMOP,XTERM);
8446
8447         case KEY_send:
8448             LOP(OP_SEND,XTERM);
8449
8450         case KEY_setpgrp:
8451             LOP(OP_SETPGRP,XTERM);
8452
8453         case KEY_setpriority:
8454             LOP(OP_SETPRIORITY,XTERM);
8455
8456         case KEY_sethostent:
8457             UNI(OP_SHOSTENT);
8458
8459         case KEY_setnetent:
8460             UNI(OP_SNETENT);
8461
8462         case KEY_setservent:
8463             UNI(OP_SSERVENT);
8464
8465         case KEY_setprotoent:
8466             UNI(OP_SPROTOENT);
8467
8468         case KEY_setpwent:
8469             FUN0(OP_SPWENT);
8470
8471         case KEY_setgrent:
8472             FUN0(OP_SGRENT);
8473
8474         case KEY_seekdir:
8475             LOP(OP_SEEKDIR,XTERM);
8476
8477         case KEY_setsockopt:
8478             LOP(OP_SSOCKOPT,XTERM);
8479
8480         case KEY_shift:
8481             UNIDOR(OP_SHIFT);
8482
8483         case KEY_shmctl:
8484             LOP(OP_SHMCTL,XTERM);
8485
8486         case KEY_shmget:
8487             LOP(OP_SHMGET,XTERM);
8488
8489         case KEY_shmread:
8490             LOP(OP_SHMREAD,XTERM);
8491
8492         case KEY_shmwrite:
8493             LOP(OP_SHMWRITE,XTERM);
8494
8495         case KEY_shutdown:
8496             LOP(OP_SHUTDOWN,XTERM);
8497
8498         case KEY_sin:
8499             UNI(OP_SIN);
8500
8501         case KEY_sleep:
8502             UNI(OP_SLEEP);
8503
8504         case KEY_socket:
8505             LOP(OP_SOCKET,XTERM);
8506
8507         case KEY_socketpair:
8508             LOP(OP_SOCKPAIR,XTERM);
8509
8510         case KEY_sort:
8511             checkcomma(s,PL_tokenbuf,"subroutine name");
8512             s = skipspace(s);
8513             PL_expect = XTERM;
8514             s = force_word(s,BAREWORD,TRUE,TRUE);
8515             LOP(OP_SORT,XREF);
8516
8517         case KEY_split:
8518             LOP(OP_SPLIT,XTERM);
8519
8520         case KEY_sprintf:
8521             LOP(OP_SPRINTF,XTERM);
8522
8523         case KEY_splice:
8524             LOP(OP_SPLICE,XTERM);
8525
8526         case KEY_sqrt:
8527             UNI(OP_SQRT);
8528
8529         case KEY_srand:
8530             UNI(OP_SRAND);
8531
8532         case KEY_stat:
8533             UNI(OP_STAT);
8534
8535         case KEY_study:
8536             UNI(OP_STUDY);
8537
8538         case KEY_substr:
8539             LOP(OP_SUBSTR,XTERM);
8540
8541         case KEY_format:
8542         case KEY_sub:
8543           really_sub:
8544             {
8545                 char * const tmpbuf = PL_tokenbuf + 1;
8546                 expectation attrful;
8547                 bool have_name, have_proto;
8548                 const int key = tmp;
8549                 SV *format_name = NULL;
8550
8551                 SSize_t off = s-SvPVX(PL_linestr);
8552                 s = skipspace(s);
8553                 d = SvPVX(PL_linestr)+off;
8554
8555                 if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
8556                     || *s == '\''
8557                     || (*s == ':' && s[1] == ':'))
8558                 {
8559
8560                     PL_expect = XBLOCK;
8561                     attrful = XATTRBLOCK;
8562                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8563                                   &len);
8564                     if (key == KEY_format)
8565                         format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8566                     *PL_tokenbuf = '&';
8567                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
8568                      || pad_findmy_pvn(
8569                             PL_tokenbuf, len + 1, 0
8570                         ) != NOT_IN_PAD)
8571                         sv_setpvn(PL_subname, tmpbuf, len);
8572                     else {
8573                         sv_setsv(PL_subname,PL_curstname);
8574                         sv_catpvs(PL_subname,"::");
8575                         sv_catpvn(PL_subname,tmpbuf,len);
8576                     }
8577                     if (SvUTF8(PL_linestr))
8578                         SvUTF8_on(PL_subname);
8579                     have_name = TRUE;
8580
8581
8582                     s = skipspace(d);
8583                 }
8584                 else {
8585                     if (key == KEY_my || key == KEY_our || key==KEY_state)
8586                     {
8587                         *d = '\0';
8588                         /* diag_listed_as: Missing name in "%s sub" */
8589                         Perl_croak(aTHX_
8590                                   "Missing name in \"%s\"", PL_bufptr);
8591                     }
8592                     PL_expect = XTERMBLOCK;
8593                     attrful = XATTRTERM;
8594                     sv_setpvs(PL_subname,"?");
8595                     have_name = FALSE;
8596                 }
8597
8598                 if (key == KEY_format) {
8599                     if (format_name) {
8600                         NEXTVAL_NEXTTOKE.opval
8601                             = newSVOP(OP_CONST,0, format_name);
8602                         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8603                         force_next(BAREWORD);
8604                     }
8605                     PREBLOCK(FORMAT);
8606                 }
8607
8608                 /* Look for a prototype */
8609                 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
8610                     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8611                     COPLINE_SET_FROM_MULTI_END;
8612                     if (!s)
8613                         Perl_croak(aTHX_ "Prototype not terminated");
8614                     (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8615                     have_proto = TRUE;
8616
8617                     s = skipspace(s);
8618                 }
8619                 else
8620                     have_proto = FALSE;
8621
8622                 if (*s == ':' && s[1] != ':')
8623                     PL_expect = attrful;
8624                 else if ((*s != '{' && *s != '(') && key != KEY_format) {
8625                     assert(key == KEY_sub || key == KEY_AUTOLOAD ||
8626                            key == KEY_DESTROY || key == KEY_BEGIN ||
8627                            key == KEY_UNITCHECK || key == KEY_CHECK ||
8628                            key == KEY_INIT || key == KEY_END ||
8629                            key == KEY_my || key == KEY_state ||
8630                            key == KEY_our);
8631                     if (!have_name)
8632                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8633                     else if (*s != ';' && *s != '}')
8634                         Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
8635                 }
8636
8637                 if (have_proto) {
8638                     NEXTVAL_NEXTTOKE.opval =
8639                         newSVOP(OP_CONST, 0, PL_lex_stuff);
8640                     PL_lex_stuff = NULL;
8641                     force_next(THING);
8642                 }
8643                 if (!have_name) {
8644                     if (PL_curstash)
8645                         sv_setpvs(PL_subname, "__ANON__");
8646                     else
8647                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8648                     TOKEN(ANONSUB);
8649                 }
8650                 force_ident_maybe_lex('&');
8651                 TOKEN(SUB);
8652             }
8653
8654         case KEY_system:
8655             LOP(OP_SYSTEM,XREF);
8656
8657         case KEY_symlink:
8658             LOP(OP_SYMLINK,XTERM);
8659
8660         case KEY_syscall:
8661             LOP(OP_SYSCALL,XTERM);
8662
8663         case KEY_sysopen:
8664             LOP(OP_SYSOPEN,XTERM);
8665
8666         case KEY_sysseek:
8667             LOP(OP_SYSSEEK,XTERM);
8668
8669         case KEY_sysread:
8670             LOP(OP_SYSREAD,XTERM);
8671
8672         case KEY_syswrite:
8673             LOP(OP_SYSWRITE,XTERM);
8674
8675         case KEY_tr:
8676         case KEY_y:
8677             s = scan_trans(s);
8678             TERM(sublex_start());
8679
8680         case KEY_tell:
8681             UNI(OP_TELL);
8682
8683         case KEY_telldir:
8684             UNI(OP_TELLDIR);
8685
8686         case KEY_tie:
8687             LOP(OP_TIE,XTERM);
8688
8689         case KEY_tied:
8690             UNI(OP_TIED);
8691
8692         case KEY_time:
8693             FUN0(OP_TIME);
8694
8695         case KEY_times:
8696             FUN0(OP_TMS);
8697
8698         case KEY_truncate:
8699             LOP(OP_TRUNCATE,XTERM);
8700
8701         case KEY_uc:
8702             UNI(OP_UC);
8703
8704         case KEY_ucfirst:
8705             UNI(OP_UCFIRST);
8706
8707         case KEY_untie:
8708             UNI(OP_UNTIE);
8709
8710         case KEY_until:
8711             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8712                 return REPORT(0);
8713             pl_yylval.ival = CopLINE(PL_curcop);
8714             OPERATOR(UNTIL);
8715
8716         case KEY_unless:
8717             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8718                 return REPORT(0);
8719             pl_yylval.ival = CopLINE(PL_curcop);
8720             OPERATOR(UNLESS);
8721
8722         case KEY_unlink:
8723             LOP(OP_UNLINK,XTERM);
8724
8725         case KEY_undef:
8726             UNIDOR(OP_UNDEF);
8727
8728         case KEY_unpack:
8729             LOP(OP_UNPACK,XTERM);
8730
8731         case KEY_utime:
8732             LOP(OP_UTIME,XTERM);
8733
8734         case KEY_umask:
8735             UNIDOR(OP_UMASK);
8736
8737         case KEY_unshift:
8738             LOP(OP_UNSHIFT,XTERM);
8739
8740         case KEY_use:
8741             s = tokenize_use(1, s);
8742             TOKEN(USE);
8743
8744         case KEY_values:
8745             UNI(OP_VALUES);
8746
8747         case KEY_vec:
8748             LOP(OP_VEC,XTERM);
8749
8750         case KEY_when:
8751             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8752                 return REPORT(0);
8753             pl_yylval.ival = CopLINE(PL_curcop);
8754             Perl_ck_warner_d(aTHX_
8755                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8756                 "when is experimental");
8757             OPERATOR(WHEN);
8758
8759         case KEY_while:
8760             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8761                 return REPORT(0);
8762             pl_yylval.ival = CopLINE(PL_curcop);
8763             OPERATOR(WHILE);
8764
8765         case KEY_warn:
8766             PL_hints |= HINT_BLOCK_SCOPE;
8767             LOP(OP_WARN,XTERM);
8768
8769         case KEY_wait:
8770             FUN0(OP_WAIT);
8771
8772         case KEY_waitpid:
8773             LOP(OP_WAITPID,XTERM);
8774
8775         case KEY_wantarray:
8776             FUN0(OP_WANTARRAY);
8777
8778         case KEY_write:
8779             /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8780              * we use the same number on EBCDIC */
8781             gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8782             UNI(OP_ENTERWRITE);
8783
8784         case KEY_x:
8785             if (PL_expect == XOPERATOR) {
8786                 if (*s == '=' && !PL_lex_allbrackets
8787                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8788                 {
8789                     return REPORT(0);
8790                 }
8791                 Mop(OP_REPEAT);
8792             }
8793             check_uni();
8794             goto just_a_word;
8795
8796         case KEY_xor:
8797             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8798                 return REPORT(0);
8799             pl_yylval.ival = OP_XOR;
8800             OPERATOR(OROP);
8801         }
8802     }}
8803 }
8804
8805 /*
8806   S_pending_ident
8807
8808   Looks up an identifier in the pad or in a package
8809
8810   is_sig indicates that this is a subroutine signature variable
8811   rather than a plain pad var.
8812
8813   Returns:
8814     PRIVATEREF if this is a lexical name.
8815     BAREWORD   if this belongs to a package.
8816
8817   Structure:
8818       if we're in a my declaration
8819           croak if they tried to say my($foo::bar)
8820           build the ops for a my() declaration
8821       if it's an access to a my() variable
8822           build ops for access to a my() variable
8823       if in a dq string, and they've said @foo and we can't find @foo
8824           warn
8825       build ops for a bareword
8826 */
8827
8828 static int
8829 S_pending_ident(pTHX)
8830 {
8831     PADOFFSET tmp = 0;
8832     const char pit = (char)pl_yylval.ival;
8833     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8834     /* All routes through this function want to know if there is a colon.  */
8835     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8836
8837     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8838           "### Pending identifier '%s'\n", PL_tokenbuf); });
8839
8840     /* if we're in a my(), we can't allow dynamics here.
8841        $foo'bar has already been turned into $foo::bar, so
8842        just check for colons.
8843
8844        if it's a legal name, the OP is a PADANY.
8845     */
8846     if (PL_in_my) {
8847         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8848             if (has_colon)
8849                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8850                                   "variable %s in \"our\"",
8851                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8852             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8853         }
8854         else {
8855             OP *o;
8856             if (has_colon) {
8857                 /* "my" variable %s can't be in a package */
8858                 /* PL_no_myglob is constant */
8859                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
8860                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8861                             PL_in_my == KEY_my ? "my" : "state",
8862                             *PL_tokenbuf == '&' ? "subroutin" : "variabl",
8863                             PL_tokenbuf),
8864                             UTF ? SVf_UTF8 : 0);
8865                 GCC_DIAG_RESTORE;
8866             }
8867
8868             if (PL_in_my == KEY_sigvar) {
8869                 /* A signature 'padop' needs in addition, an op_first to
8870                  * point to a child sigdefelem, and an extra field to hold
8871                  * the signature index. We can achieve both by using an
8872                  * UNOP_AUX and (ab)using the op_aux field to hold the
8873                  * index. If we ever need more fields, use a real malloced
8874                  * aux strut instead.
8875                  */
8876                 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
8877                                     INT2PTR(UNOP_AUX_item *,
8878                                         (PL_parser->sig_elems)));
8879                 o->op_private |= (  PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
8880                                   : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
8881                                   :                         OPpARGELEM_HV);
8882             }
8883             else
8884                 o = newOP(OP_PADANY, 0);
8885             o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8886                                                         UTF ? SVf_UTF8 : 0);
8887             if (PL_in_my == KEY_sigvar)
8888                 PL_in_my = 0;
8889
8890             pl_yylval.opval = o;
8891             return PRIVATEREF;
8892         }
8893     }
8894
8895     /*
8896        build the ops for accesses to a my() variable.
8897     */
8898
8899     if (!has_colon) {
8900         if (!PL_in_my)
8901             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8902                                  0);
8903         if (tmp != NOT_IN_PAD) {
8904             /* might be an "our" variable" */
8905             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8906                 /* build ops for a bareword */
8907                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8908                 HEK * const stashname = HvNAME_HEK(stash);
8909                 SV *  const sym = newSVhek(stashname);
8910                 sv_catpvs(sym, "::");
8911                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8912                 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
8913                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8914                 if (pit != '&')
8915                   gv_fetchsv(sym,
8916                     GV_ADDMULTI,
8917                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8918                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8919                      : SVt_PVHV));
8920                 return BAREWORD;
8921             }
8922
8923             pl_yylval.opval = newOP(OP_PADANY, 0);
8924             pl_yylval.opval->op_targ = tmp;
8925             return PRIVATEREF;
8926         }
8927     }
8928
8929     /*
8930        Whine if they've said @foo or @foo{key} in a doublequoted string,
8931        and @foo (or %foo) isn't a variable we can find in the symbol
8932        table.
8933     */
8934     if (ckWARN(WARN_AMBIGUOUS)
8935         && pit == '@'
8936         && PL_lex_state != LEX_NORMAL
8937         && !PL_lex_brackets)
8938     {
8939         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8940                                          ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
8941                                          SVt_PVAV);
8942         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8943            )
8944         {
8945             /* Downgraded from fatal to warning 20000522 mjd */
8946             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8947                         "Possible unintended interpolation of %" UTF8f
8948                         " in string",
8949                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
8950         }
8951     }
8952
8953     /* build ops for a bareword */
8954     pl_yylval.opval = newSVOP(OP_CONST, 0,
8955                                    newSVpvn_flags(PL_tokenbuf + 1,
8956                                                       tokenbuf_len - 1,
8957                                                       UTF ? SVf_UTF8 : 0 ));
8958     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8959     if (pit != '&')
8960         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8961                      (PL_in_eval ? GV_ADDMULTI : GV_ADD)
8962                      | ( UTF ? SVf_UTF8 : 0 ),
8963                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8964                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8965                       : SVt_PVHV));
8966     return BAREWORD;
8967 }
8968
8969 STATIC void
8970 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8971 {
8972     PERL_ARGS_ASSERT_CHECKCOMMA;
8973
8974     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8975         if (ckWARN(WARN_SYNTAX)) {
8976             int level = 1;
8977             const char *w;
8978             for (w = s+2; *w && level; w++) {
8979                 if (*w == '(')
8980                     ++level;
8981                 else if (*w == ')')
8982                     --level;
8983             }
8984             while (isSPACE(*w))
8985                 ++w;
8986             /* the list of chars below is for end of statements or
8987              * block / parens, boolean operators (&&, ||, //) and branch
8988              * constructs (or, and, if, until, unless, while, err, for).
8989              * Not a very solid hack... */
8990             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8991                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8992                             "%s (...) interpreted as function",name);
8993         }
8994     }
8995     while (s < PL_bufend && isSPACE(*s))
8996         s++;
8997     if (*s == '(')
8998         s++;
8999     while (s < PL_bufend && isSPACE(*s))
9000         s++;
9001     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9002         const char * const w = s;
9003         s += UTF ? UTF8SKIP(s) : 1;
9004         while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9005             s += UTF ? UTF8SKIP(s) : 1;
9006         while (s < PL_bufend && isSPACE(*s))
9007             s++;
9008         if (*s == ',') {
9009             GV* gv;
9010             if (keyword(w, s - w, 0))
9011                 return;
9012
9013             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9014             if (gv && GvCVu(gv))
9015                 return;
9016             if (s - w <= 254) {
9017                 PADOFFSET off;
9018                 char tmpbuf[256];
9019                 Copy(w, tmpbuf+1, s - w, char);
9020                 *tmpbuf = '&';
9021                 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9022                 if (off != NOT_IN_PAD) return;
9023             }
9024             Perl_croak(aTHX_ "No comma allowed after %s", what);
9025         }
9026     }
9027 }
9028
9029 /* S_new_constant(): do any overload::constant lookup.
9030
9031    Either returns sv, or mortalizes/frees sv and returns a new SV*.
9032    Best used as sv=new_constant(..., sv, ...).
9033    If s, pv are NULL, calls subroutine with one argument,
9034    and <type> is used with error messages only.
9035    <type> is assumed to be well formed UTF-8 */
9036
9037 STATIC SV *
9038 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9039                SV *sv, SV *pv, const char *type, STRLEN typelen)
9040 {
9041     dSP;
9042     HV * table = GvHV(PL_hintgv);                /* ^H */
9043     SV *res;
9044     SV *errsv = NULL;
9045     SV **cvp;
9046     SV *cv, *typesv;
9047     const char *why1 = "", *why2 = "", *why3 = "";
9048
9049     PERL_ARGS_ASSERT_NEW_CONSTANT;
9050     /* We assume that this is true: */
9051     if (*key == 'c') { assert (strEQ(key, "charnames")); }
9052     assert(type || s);
9053
9054     /* charnames doesn't work well if there have been errors found */
9055     if (PL_error_count > 0 && *key == 'c')
9056     {
9057         SvREFCNT_dec_NN(sv);
9058         return &PL_sv_undef;
9059     }
9060
9061     sv_2mortal(sv);                     /* Parent created it permanently */
9062     if (!table
9063         || ! (PL_hints & HINT_LOCALIZE_HH)
9064         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9065         || ! SvOK(*cvp))
9066     {
9067         char *msg;
9068
9069         /* Here haven't found what we're looking for.  If it is charnames,
9070          * perhaps it needs to be loaded.  Try doing that before giving up */
9071         if (*key == 'c') {
9072             Perl_load_module(aTHX_
9073                             0,
9074                             newSVpvs("_charnames"),
9075                              /* version parameter; no need to specify it, as if
9076                               * we get too early a version, will fail anyway,
9077                               * not being able to find '_charnames' */
9078                             NULL,
9079                             newSVpvs(":full"),
9080                             newSVpvs(":short"),
9081                             NULL);
9082             assert(sp == PL_stack_sp);
9083             table = GvHV(PL_hintgv);
9084             if (table
9085                 && (PL_hints & HINT_LOCALIZE_HH)
9086                 && (cvp = hv_fetch(table, key, keylen, FALSE))
9087                 && SvOK(*cvp))
9088             {
9089                 goto now_ok;
9090             }
9091         }
9092         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9093             msg = Perl_form(aTHX_
9094                                "Constant(%.*s) unknown",
9095                                 (int)(type ? typelen : len),
9096                                 (type ? type: s));
9097         }
9098         else {
9099             why1 = "$^H{";
9100             why2 = key;
9101             why3 = "} is not defined";
9102         report:
9103             if (*key == 'c') {
9104                 msg = Perl_form(aTHX_
9105                             /* The +3 is for '\N{'; -4 for that, plus '}' */
9106                             "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9107                       );
9108             }
9109             else {
9110                 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9111                                     (int)(type ? typelen : len),
9112                                     (type ? type: s), why1, why2, why3);
9113             }
9114         }
9115         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9116         return SvREFCNT_inc_simple_NN(sv);
9117     }
9118   now_ok:
9119     cv = *cvp;
9120     if (!pv && s)
9121         pv = newSVpvn_flags(s, len, SVs_TEMP);
9122     if (type && pv)
9123         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9124     else
9125         typesv = &PL_sv_undef;
9126
9127     PUSHSTACKi(PERLSI_OVERLOAD);
9128     ENTER ;
9129     SAVETMPS;
9130
9131     PUSHMARK(SP) ;
9132     EXTEND(sp, 3);
9133     if (pv)
9134         PUSHs(pv);
9135     PUSHs(sv);
9136     if (pv)
9137         PUSHs(typesv);
9138     PUTBACK;
9139     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9140
9141     SPAGAIN ;
9142
9143     /* Check the eval first */
9144     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9145         STRLEN errlen;
9146         const char * errstr;
9147         sv_catpvs(errsv, "Propagated");
9148         errstr = SvPV_const(errsv, errlen);
9149         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9150         (void)POPs;
9151         res = SvREFCNT_inc_simple_NN(sv);
9152     }
9153     else {
9154         res = POPs;
9155         SvREFCNT_inc_simple_void_NN(res);
9156     }
9157
9158     PUTBACK ;
9159     FREETMPS ;
9160     LEAVE ;
9161     POPSTACK;
9162
9163     if (!SvOK(res)) {
9164         why1 = "Call to &{$^H{";
9165         why2 = key;
9166         why3 = "}} did not return a defined value";
9167         sv = res;
9168         (void)sv_2mortal(sv);
9169         goto report;
9170     }
9171
9172     return res;
9173 }
9174
9175 PERL_STATIC_INLINE void
9176 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9177                     bool is_utf8, bool check_dollar)
9178 {
9179     PERL_ARGS_ASSERT_PARSE_IDENT;
9180
9181     while (*s < PL_bufend) {
9182         if (*d >= e)
9183             Perl_croak(aTHX_ "%s", ident_too_long);
9184         if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9185              /* The UTF-8 case must come first, otherwise things
9186              * like c\N{COMBINING TILDE} would start failing, as the
9187              * isWORDCHAR_A case below would gobble the 'c' up.
9188              */
9189
9190             char *t = *s + UTF8SKIP(*s);
9191             while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
9192                 t += UTF8SKIP(t);
9193             }
9194             if (*d + (t - *s) > e)
9195                 Perl_croak(aTHX_ "%s", ident_too_long);
9196             Copy(*s, *d, t - *s, char);
9197             *d += t - *s;
9198             *s = t;
9199         }
9200         else if ( isWORDCHAR_A(**s) ) {
9201             do {
9202                 *(*d)++ = *(*s)++;
9203             } while (isWORDCHAR_A(**s) && *d < e);
9204         }
9205         else if (   allow_package
9206                  && **s == '\''
9207                  && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
9208         {
9209             *(*d)++ = ':';
9210             *(*d)++ = ':';
9211             (*s)++;
9212         }
9213         else if (allow_package && **s == ':' && (*s)[1] == ':'
9214            /* Disallow things like Foo::$bar. For the curious, this is
9215             * the code path that triggers the "Bad name after" warning
9216             * when looking for barewords.
9217             */
9218            && !(check_dollar && (*s)[2] == '$')) {
9219             *(*d)++ = *(*s)++;
9220             *(*d)++ = *(*s)++;
9221         }
9222         else
9223             break;
9224     }
9225     return;
9226 }
9227
9228 /* Returns a NUL terminated string, with the length of the string written to
9229    *slp
9230    */
9231 STATIC char *
9232 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9233 {
9234     char *d = dest;
9235     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
9236     bool is_utf8 = cBOOL(UTF);
9237
9238     PERL_ARGS_ASSERT_SCAN_WORD;
9239
9240     parse_ident(&s, &d, e, allow_package, is_utf8, TRUE);
9241     *d = '\0';
9242     *slp = d - dest;
9243     return s;
9244 }
9245
9246 /* Is the byte 'd' a legal single character identifier name?  'u' is true
9247  * iff Unicode semantics are to be used.  The legal ones are any of:
9248  *  a) all ASCII characters except:
9249  *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
9250  *          2) '{'
9251  *     The final case currently doesn't get this far in the program, so we
9252  *     don't test for it.  If that were to change, it would be ok to allow it.
9253  *  b) When not under Unicode rules, any upper Latin1 character
9254  *  c) Otherwise, when unicode rules are used, all XIDS characters.
9255  *
9256  *      Because all ASCII characters have the same representation whether
9257  *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
9258  *      '{' without knowing if is UTF-8 or not. */
9259 #define VALID_LEN_ONE_IDENT(s, e, is_utf8)                                  \
9260     (isGRAPH_A(*(s)) || ((is_utf8)                                          \
9261                          ? isIDFIRST_utf8_safe(s, e)                        \
9262                          : (isGRAPH_L1(*s)                                  \
9263                             && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
9264
9265 STATIC char *
9266 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9267 {
9268     I32 herelines = PL_parser->herelines;
9269     SSize_t bracket = -1;
9270     char funny = *s++;
9271     char *d = dest;
9272     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
9273     bool is_utf8 = cBOOL(UTF);
9274     I32 orig_copline = 0, tmp_copline = 0;
9275
9276     PERL_ARGS_ASSERT_SCAN_IDENT;
9277
9278     if (isSPACE(*s) || !*s)
9279         s = skipspace(s);
9280     if (isDIGIT(*s)) {
9281         while (isDIGIT(*s)) {
9282             if (d >= e)
9283                 Perl_croak(aTHX_ "%s", ident_too_long);
9284             *d++ = *s++;
9285         }
9286     }
9287     else {  /* See if it is a "normal" identifier */
9288         parse_ident(&s, &d, e, 1, is_utf8, FALSE);
9289     }
9290     *d = '\0';
9291     d = dest;
9292     if (*d) {
9293         /* Either a digit variable, or parse_ident() found an identifier
9294            (anything valid as a bareword), so job done and return.  */
9295         if (PL_lex_state != LEX_NORMAL)
9296             PL_lex_state = LEX_INTERPENDMAYBE;
9297         return s;
9298     }
9299
9300     /* Here, it is not a run-of-the-mill identifier name */
9301
9302     if (*s == '$' && s[1]
9303         && (   isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
9304             || isDIGIT_A((U8)s[1])
9305             || s[1] == '$'
9306             || s[1] == '{'
9307             || strEQs(s+1,"::")) )
9308     {
9309         /* Dereferencing a value in a scalar variable.
9310            The alternatives are different syntaxes for a scalar variable.
9311            Using ' as a leading package separator isn't allowed. :: is.   */
9312         return s;
9313     }
9314     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
9315     if (*s == '{') {
9316         bracket = s - SvPVX(PL_linestr);
9317         s++;
9318         orig_copline = CopLINE(PL_curcop);
9319         if (s < PL_bufend && isSPACE(*s)) {
9320             s = skipspace(s);
9321         }
9322     }
9323     if ((s <= PL_bufend - (is_utf8)
9324                           ? UTF8SKIP(s)
9325                           : 1)
9326         && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
9327     {
9328         if (is_utf8) {
9329             const STRLEN skip = UTF8SKIP(s);
9330             STRLEN i;
9331             d[skip] = '\0';
9332             for ( i = 0; i < skip; i++ )
9333                 d[i] = *s++;
9334         }
9335         else {
9336             *d = *s++;
9337             d[1] = '\0';
9338         }
9339     }
9340     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9341     if (*d == '^' && *s && isCONTROLVAR(*s)) {
9342         *d = toCTRL(*s);
9343         s++;
9344     }
9345     /* Warn about ambiguous code after unary operators if {...} notation isn't
9346        used.  There's no difference in ambiguity; it's merely a heuristic
9347        about when not to warn.  */
9348     else if (ck_uni && bracket == -1)
9349         check_uni();
9350     if (bracket != -1) {
9351         bool skip;
9352         char *s2;
9353         /* If we were processing {...} notation then...  */
9354         if (isIDFIRST_lazy_if_safe(d, e, is_utf8)) {
9355             /* if it starts as a valid identifier, assume that it is one.
9356                (the later check for } being at the expected point will trap
9357                cases where this doesn't pan out.)  */
9358             d += is_utf8 ? UTF8SKIP(d) : 1;
9359             parse_ident(&s, &d, e, 1, is_utf8, TRUE);
9360             *d = '\0';
9361             tmp_copline = CopLINE(PL_curcop);
9362             if (s < PL_bufend && isSPACE(*s)) {
9363                 s = skipspace(s);
9364             }
9365             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9366                 /* ${foo[0]} and ${foo{bar}} notation.  */
9367                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9368                     const char * const brack =
9369                         (const char *)
9370                         ((*s == '[') ? "[...]" : "{...}");
9371                     orig_copline = CopLINE(PL_curcop);
9372                     CopLINE_set(PL_curcop, tmp_copline);
9373    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9374                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9375                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9376                         funny, dest, brack, funny, dest, brack);
9377                     CopLINE_set(PL_curcop, orig_copline);
9378                 }
9379                 bracket++;
9380                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9381                 PL_lex_allbrackets++;
9382                 return s;
9383             }
9384         }
9385         /* Handle extended ${^Foo} variables
9386          * 1999-02-27 mjd-perl-patch@plover.com */
9387         else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9388                  && isWORDCHAR(*s))
9389         {
9390             d++;
9391             while (isWORDCHAR(*s) && d < e) {
9392                 *d++ = *s++;
9393             }
9394             if (d >= e)
9395                 Perl_croak(aTHX_ "%s", ident_too_long);
9396             *d = '\0';
9397         }
9398
9399         if ( !tmp_copline )
9400             tmp_copline = CopLINE(PL_curcop);
9401         if ((skip = s < PL_bufend && isSPACE(*s)))
9402             /* Avoid incrementing line numbers or resetting PL_linestart,
9403                in case we have to back up.  */
9404             s2 = peekspace(s);
9405         else
9406             s2 = s;
9407
9408         /* Expect to find a closing } after consuming any trailing whitespace.
9409          */
9410         if (*s2 == '}') {
9411             /* Now increment line numbers if applicable.  */
9412             if (skip)
9413                 s = skipspace(s);
9414             s++;
9415             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9416                 PL_lex_state = LEX_INTERPEND;
9417                 PL_expect = XREF;
9418             }
9419             if (PL_lex_state == LEX_NORMAL) {
9420                 if (ckWARN(WARN_AMBIGUOUS)
9421                     && (keyword(dest, d - dest, 0)
9422                         || get_cvn_flags(dest, d - dest, is_utf8
9423                            ? SVf_UTF8
9424                            : 0)))
9425                 {
9426                     SV *tmp = newSVpvn_flags( dest, d - dest,
9427                                         SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9428                     if (funny == '#')
9429                         funny = '@';
9430                     orig_copline = CopLINE(PL_curcop);
9431                     CopLINE_set(PL_curcop, tmp_copline);
9432                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9433                         "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
9434                         funny, SVfARG(tmp), funny, SVfARG(tmp));
9435                     CopLINE_set(PL_curcop, orig_copline);
9436                 }
9437             }
9438         }
9439         else {
9440             /* Didn't find the closing } at the point we expected, so restore
9441                state such that the next thing to process is the opening { and */
9442             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9443             CopLINE_set(PL_curcop, orig_copline);
9444             PL_parser->herelines = herelines;
9445             *dest = '\0';
9446         }
9447     }
9448     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9449         PL_lex_state = LEX_INTERPEND;
9450     return s;
9451 }
9452
9453 static bool
9454 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
9455
9456     /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
9457      * found in the parse starting at 's', based on the subset that are valid
9458      * in this context input to this routine in 'valid_flags'. Advances s.
9459      * Returns TRUE if the input should be treated as a valid flag, so the next
9460      * char may be as well; otherwise FALSE. 'charset' should point to a NUL
9461      * upon first call on the current regex.  This routine will set it to any
9462      * charset modifier found.  The caller shouldn't change it.  This way,
9463      * another charset modifier encountered in the parse can be detected as an
9464      * error, as we have decided to allow only one */
9465
9466     const char c = **s;
9467     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9468
9469     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9470         if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
9471             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9472                        UTF ? SVf_UTF8 : 0);
9473             (*s) += charlen;
9474             /* Pretend that it worked, so will continue processing before
9475              * dieing */
9476             return TRUE;
9477         }
9478         return FALSE;
9479     }
9480
9481     switch (c) {
9482
9483         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
9484         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9485         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9486         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9487         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9488         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9489         case LOCALE_PAT_MOD:
9490             if (*charset) {
9491                 goto multiple_charsets;
9492             }
9493             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9494             *charset = c;
9495             break;
9496         case UNICODE_PAT_MOD:
9497             if (*charset) {
9498                 goto multiple_charsets;
9499             }
9500             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9501             *charset = c;
9502             break;
9503         case ASCII_RESTRICT_PAT_MOD:
9504             if (! *charset) {
9505                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9506             }
9507             else {
9508
9509                 /* Error if previous modifier wasn't an 'a', but if it was, see
9510                  * if, and accept, a second occurrence (only) */
9511                 if (*charset != 'a'
9512                     || get_regex_charset(*pmfl)
9513                         != REGEX_ASCII_RESTRICTED_CHARSET)
9514                 {
9515                         goto multiple_charsets;
9516                 }
9517                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9518             }
9519             *charset = c;
9520             break;
9521         case DEPENDS_PAT_MOD:
9522             if (*charset) {
9523                 goto multiple_charsets;
9524             }
9525             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9526             *charset = c;
9527             break;
9528     }
9529
9530     (*s)++;
9531     return TRUE;
9532
9533     multiple_charsets:
9534         if (*charset != c) {
9535             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9536         }
9537         else if (c == 'a') {
9538   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9539             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9540         }
9541         else {
9542             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9543         }
9544
9545         /* Pretend that it worked, so will continue processing before dieing */
9546         (*s)++;
9547         return TRUE;
9548 }
9549
9550 STATIC char *
9551 S_scan_pat(pTHX_ char *start, I32 type)
9552 {
9553     PMOP *pm;
9554     char *s;
9555     const char * const valid_flags =
9556         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9557     char charset = '\0';    /* character set modifier */
9558     unsigned int x_mod_count = 0;
9559
9560     PERL_ARGS_ASSERT_SCAN_PAT;
9561
9562     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
9563     if (!s)
9564         Perl_croak(aTHX_ "Search pattern not terminated");
9565
9566     pm = (PMOP*)newPMOP(type, 0);
9567     if (PL_multi_open == '?') {
9568         /* This is the only point in the code that sets PMf_ONCE:  */
9569         pm->op_pmflags |= PMf_ONCE;
9570
9571         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9572            allows us to restrict the list needed by reset to just the ??
9573            matches.  */
9574         assert(type != OP_TRANS);
9575         if (PL_curstash) {
9576             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9577             U32 elements;
9578             if (!mg) {
9579                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9580                                  0);
9581             }
9582             elements = mg->mg_len / sizeof(PMOP**);
9583             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9584             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9585             mg->mg_len = elements * sizeof(PMOP**);
9586             PmopSTASH_set(pm,PL_curstash);
9587         }
9588     }
9589
9590     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9591      * anon CV. False positives like qr/[(?{]/ are harmless */
9592
9593     if (type == OP_QR) {
9594         STRLEN len;
9595         char *e, *p = SvPV(PL_lex_stuff, len);
9596         e = p + len;
9597         for (; p < e; p++) {
9598             if (p[0] == '(' && p[1] == '?'
9599                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9600             {
9601                 pm->op_pmflags |= PMf_HAS_CV;
9602                 break;
9603             }
9604         }
9605         pm->op_pmflags |= PMf_IS_QR;
9606     }
9607
9608     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
9609                                 &s, &charset, &x_mod_count))
9610     {};
9611     /* issue a warning if /c is specified,but /g is not */
9612     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9613     {
9614         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9615                        "Use of /c modifier is meaningless without /g" );
9616     }
9617
9618     PL_lex_op = (OP*)pm;
9619     pl_yylval.ival = OP_MATCH;
9620     return s;
9621 }
9622
9623 STATIC char *
9624 S_scan_subst(pTHX_ char *start)
9625 {
9626     char *s;
9627     PMOP *pm;
9628     I32 first_start;
9629     line_t first_line;
9630     line_t linediff = 0;
9631     I32 es = 0;
9632     char charset = '\0';    /* character set modifier */
9633     unsigned int x_mod_count = 0;
9634     char *t;
9635
9636     PERL_ARGS_ASSERT_SCAN_SUBST;
9637
9638     pl_yylval.ival = OP_NULL;
9639
9640     s = scan_str(start, TRUE, FALSE, FALSE, &t);
9641
9642     if (!s)
9643         Perl_croak(aTHX_ "Substitution pattern not terminated");
9644
9645     s = t;
9646
9647     first_start = PL_multi_start;
9648     first_line = CopLINE(PL_curcop);
9649     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9650     if (!s) {
9651         SvREFCNT_dec_NN(PL_lex_stuff);
9652         PL_lex_stuff = NULL;
9653         Perl_croak(aTHX_ "Substitution replacement not terminated");
9654     }
9655     PL_multi_start = first_start;       /* so whole substitution is taken together */
9656
9657     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9658
9659
9660     while (*s) {
9661         if (*s == EXEC_PAT_MOD) {
9662             s++;
9663             es++;
9664         }
9665         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
9666                                   &s, &charset, &x_mod_count))
9667         {
9668             break;
9669         }
9670     }
9671
9672     if ((pm->op_pmflags & PMf_CONTINUE)) {
9673         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9674     }
9675
9676     if (es) {
9677         SV * const repl = newSVpvs("");
9678
9679         PL_multi_end = 0;
9680         pm->op_pmflags |= PMf_EVAL;
9681         while (es-- > 0) {
9682             if (es)
9683                 sv_catpvs(repl, "eval ");
9684             else
9685                 sv_catpvs(repl, "do ");
9686         }
9687         sv_catpvs(repl, "{");
9688         sv_catsv(repl, PL_parser->lex_sub_repl);
9689         sv_catpvs(repl, "}");
9690         SvREFCNT_dec(PL_parser->lex_sub_repl);
9691         PL_parser->lex_sub_repl = repl;
9692         es = 1;
9693     }
9694
9695
9696     linediff = CopLINE(PL_curcop) - first_line;
9697     if (linediff)
9698         CopLINE_set(PL_curcop, first_line);
9699
9700     if (linediff || es) {
9701         /* the IVX field indicates that the replacement string is a s///e;
9702          * the NVX field indicates how many src code lines the replacement
9703          * spreads over */
9704         sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
9705         ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = 0;
9706         ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
9707                                                                     cBOOL(es);
9708     }
9709
9710     PL_lex_op = (OP*)pm;
9711     pl_yylval.ival = OP_SUBST;
9712     return s;
9713 }
9714
9715 STATIC char *
9716 S_scan_trans(pTHX_ char *start)
9717 {
9718     char* s;
9719     OP *o;
9720     U8 squash;
9721     U8 del;
9722     U8 complement;
9723     bool nondestruct = 0;
9724     char *t;
9725
9726     PERL_ARGS_ASSERT_SCAN_TRANS;
9727
9728     pl_yylval.ival = OP_NULL;
9729
9730     s = scan_str(start,FALSE,FALSE,FALSE,&t);
9731     if (!s)
9732         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9733
9734     s = t;
9735
9736     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9737     if (!s) {
9738         SvREFCNT_dec_NN(PL_lex_stuff);
9739         PL_lex_stuff = NULL;
9740         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9741     }
9742
9743     complement = del = squash = 0;
9744     while (1) {
9745         switch (*s) {
9746         case 'c':
9747             complement = OPpTRANS_COMPLEMENT;
9748             break;
9749         case 'd':
9750             del = OPpTRANS_DELETE;
9751             break;
9752         case 's':
9753             squash = OPpTRANS_SQUASH;
9754             break;
9755         case 'r':
9756             nondestruct = 1;
9757             break;
9758         default:
9759             goto no_more;
9760         }
9761         s++;
9762     }
9763   no_more:
9764
9765     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9766     o->op_private &= ~OPpTRANS_ALL;
9767     o->op_private |= del|squash|complement|
9768       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9769       (DO_UTF8(PL_parser->lex_sub_repl) ? OPpTRANS_TO_UTF   : 0);
9770
9771     PL_lex_op = o;
9772     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9773
9774
9775     return s;
9776 }
9777
9778 /* scan_heredoc
9779    Takes a pointer to the first < in <<FOO.
9780    Returns a pointer to the byte following <<FOO.
9781
9782    This function scans a heredoc, which involves different methods
9783    depending on whether we are in a string eval, quoted construct, etc.
9784    This is because PL_linestr could containing a single line of input, or
9785    a whole string being evalled, or the contents of the current quote-
9786    like operator.
9787
9788    The two basic methods are:
9789     - Steal lines from the input stream
9790     - Scan the heredoc in PL_linestr and remove it therefrom
9791
9792    In a file scope or filtered eval, the first method is used; in a
9793    string eval, the second.
9794
9795    In a quote-like operator, we have to choose between the two,
9796    depending on where we can find a newline.  We peek into outer lex-
9797    ing scopes until we find one with a newline in it.  If we reach the
9798    outermost lexing scope and it is a file, we use the stream method.
9799    Otherwise it is treated as an eval.
9800 */
9801
9802 STATIC char *
9803 S_scan_heredoc(pTHX_ char *s)
9804 {
9805     I32 op_type = OP_SCALAR;
9806     I32 len;
9807     SV *tmpstr;
9808     char term;
9809     char *d;
9810     char *e;
9811     char *peek;
9812     char *indent = 0;
9813     I32 indent_len = 0;
9814     bool indented = FALSE;
9815     const bool infile = PL_rsfp || PL_parser->filtered;
9816     const line_t origline = CopLINE(PL_curcop);
9817     LEXSHARED *shared = PL_parser->lex_shared;
9818
9819     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9820
9821     s += 2;
9822     d = PL_tokenbuf + 1;
9823     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9824     *PL_tokenbuf = '\n';
9825     peek = s;
9826     if (*peek == '~') {
9827         indented = TRUE;
9828         peek++; s++;
9829     }
9830     while (SPACE_OR_TAB(*peek))
9831         peek++;
9832     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9833         s = peek;
9834         term = *s++;
9835         s = delimcpy(d, e, s, PL_bufend, term, &len);
9836         if (s == PL_bufend)
9837             Perl_croak(aTHX_ "Unterminated delimiter for here document");
9838         d += len;
9839         s++;
9840     }
9841     else {
9842         if (*s == '\\')
9843             /* <<\FOO is equivalent to <<'FOO' */
9844             s++, term = '\'';
9845         else
9846             term = '"';
9847         if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9848             deprecate_fatal_in("5.28", "Use of bare << to mean <<\"\" is deprecated");
9849         peek = s;
9850         while (
9851                isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF))
9852         {
9853             peek += UTF ? UTF8SKIP(peek) : 1;
9854         }
9855         len = (peek - s >= e - d) ? (e - d) : (peek - s);
9856         Copy(s, d, len, char);
9857         s += len;
9858         d += len;
9859     }
9860     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9861         Perl_croak(aTHX_ "Delimiter for here document is too long");
9862     *d++ = '\n';
9863     *d = '\0';
9864     len = d - PL_tokenbuf;
9865
9866 #ifndef PERL_STRICT_CR
9867     d = strchr(s, '\r');
9868     if (d) {
9869         char * const olds = s;
9870         s = d;
9871         while (s < PL_bufend) {
9872             if (*s == '\r') {
9873                 *d++ = '\n';
9874                 if (*++s == '\n')
9875                     s++;
9876             }
9877             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9878                 *d++ = *s++;
9879                 s++;
9880             }
9881             else
9882                 *d++ = *s++;
9883         }
9884         *d = '\0';
9885         PL_bufend = d;
9886         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9887         s = olds;
9888     }
9889 #endif
9890
9891     tmpstr = newSV_type(SVt_PVIV);
9892     SvGROW(tmpstr, 80);
9893     if (term == '\'') {
9894         op_type = OP_CONST;
9895         SvIV_set(tmpstr, -1);
9896     }
9897     else if (term == '`') {
9898         op_type = OP_BACKTICK;
9899         SvIV_set(tmpstr, '\\');
9900     }
9901
9902     PL_multi_start = origline + 1 + PL_parser->herelines;
9903     PL_multi_open = PL_multi_close = '<';
9904     /* inside a string eval or quote-like operator */
9905     if (!infile || PL_lex_inwhat) {
9906         SV *linestr;
9907         char *bufend;
9908         char * const olds = s;
9909         PERL_CONTEXT * const cx = CX_CUR();
9910         /* These two fields are not set until an inner lexing scope is
9911            entered.  But we need them set here. */
9912         shared->ls_bufptr  = s;
9913         shared->ls_linestr = PL_linestr;
9914         if (PL_lex_inwhat)
9915           /* Look for a newline.  If the current buffer does not have one,
9916              peek into the line buffer of the parent lexing scope, going
9917              up as many levels as necessary to find one with a newline
9918              after bufptr.
9919            */
9920           while (!(s = (char *)memchr(
9921                     (void *)shared->ls_bufptr, '\n',
9922                     SvEND(shared->ls_linestr)-shared->ls_bufptr
9923                 ))) {
9924             shared = shared->ls_prev;
9925             /* shared is only null if we have gone beyond the outermost
9926                lexing scope.  In a file, we will have broken out of the
9927                loop in the previous iteration.  In an eval, the string buf-
9928                fer ends with "\n;", so the while condition above will have
9929                evaluated to false.  So shared can never be null.  Or so you
9930                might think.  Odd syntax errors like s;@{<<; can gobble up
9931                the implicit semicolon at the end of a flie, causing the
9932                file handle to be closed even when we are not in a string
9933                eval.  So shared may be null in that case.
9934                (Closing '}' here to balance the earlier open brace for
9935                editors that look for matched pairs.) */
9936             if (UNLIKELY(!shared))
9937                 goto interminable;
9938             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9939                most lexing scope.  In a file, shared->ls_linestr at that
9940                level is just one line, so there is no body to steal. */
9941             if (infile && !shared->ls_prev) {
9942                 s = olds;
9943                 goto streaming;
9944             }
9945           }
9946         else {  /* eval or we've already hit EOF */
9947             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9948             if (!s)
9949                 goto interminable;
9950         }
9951         linestr = shared->ls_linestr;
9952         bufend = SvEND(linestr);
9953         d = s;
9954         if (indented) {
9955             char *myolds = s;
9956
9957             while (s < bufend - len + 1) {
9958                 if (*s++ == '\n')
9959                     ++PL_parser->herelines;
9960
9961                 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
9962                     char *backup = s;
9963                     indent_len = 0;
9964
9965                     /* Only valid if it's preceded by whitespace only */
9966                     while (backup != myolds && --backup >= myolds) {
9967                         if (! SPACE_OR_TAB(*backup)) {
9968                             break;
9969                         }
9970
9971                         indent_len++;
9972                     }
9973
9974                     /* No whitespace or all! */
9975                     if (backup == s || *backup == '\n') {
9976                         Newxz(indent, indent_len + 1, char);
9977                         memcpy(indent, backup + 1, indent_len);
9978                         s--; /* before our delimiter */
9979                         PL_parser->herelines--; /* this line doesn't count */
9980                         break;
9981                     }
9982                 }
9983             }
9984         } else {
9985             while (s < bufend - len + 1
9986                    && memNE(s,PL_tokenbuf,len) )
9987             {
9988                 if (*s++ == '\n')
9989                     ++PL_parser->herelines;
9990             }
9991         }
9992
9993         if (s >= bufend - len + 1) {
9994             goto interminable;
9995         }
9996         sv_setpvn(tmpstr,d+1,s-d);
9997         s += len - 1;
9998         /* the preceding stmt passes a newline */
9999         PL_parser->herelines++;
10000
10001         /* s now points to the newline after the heredoc terminator.
10002            d points to the newline before the body of the heredoc.
10003          */
10004
10005         /* We are going to modify linestr in place here, so set
10006            aside copies of the string if necessary for re-evals or
10007            (caller $n)[6]. */
10008         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10009            check shared->re_eval_str. */
10010         if (shared->re_eval_start || shared->re_eval_str) {
10011             /* Set aside the rest of the regexp */
10012             if (!shared->re_eval_str)
10013                 shared->re_eval_str =
10014                        newSVpvn(shared->re_eval_start,
10015                                 bufend - shared->re_eval_start);
10016             shared->re_eval_start -= s-d;
10017         }
10018         if (cxstack_ix >= 0
10019             && CxTYPE(cx) == CXt_EVAL
10020             && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10021             && cx->blk_eval.cur_text == linestr)
10022         {
10023             cx->blk_eval.cur_text = newSVsv(linestr);
10024             cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10025         }
10026         /* Copy everything from s onwards back to d. */
10027         Move(s,d,bufend-s + 1,char);
10028         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10029         /* Setting PL_bufend only applies when we have not dug deeper
10030            into other scopes, because sublex_done sets PL_bufend to
10031            SvEND(PL_linestr). */
10032         if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10033         s = olds;
10034     }
10035     else
10036     {
10037       SV *linestr_save;
10038       char *oldbufptr_save;
10039       char *oldoldbufptr_save;
10040      streaming:
10041       SvPVCLEAR(tmpstr);   /* avoid "uninitialized" warning */
10042       term = PL_tokenbuf[1];
10043       len--;
10044       linestr_save = PL_linestr; /* must restore this afterwards */
10045       d = s;                     /* and this */
10046       oldbufptr_save = PL_oldbufptr;
10047       oldoldbufptr_save = PL_oldoldbufptr;
10048       PL_linestr = newSVpvs("");
10049       PL_bufend = SvPVX(PL_linestr);
10050       while (1) {
10051         PL_bufptr = PL_bufend;
10052         CopLINE_set(PL_curcop,
10053                     origline + 1 + PL_parser->herelines);
10054         if (!lex_next_chunk(LEX_NO_TERM)
10055          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10056             /* Simply freeing linestr_save might seem simpler here, as it
10057                does not matter what PL_linestr points to, since we are
10058                about to croak; but in a quote-like op, linestr_save
10059                will have been prospectively freed already, via
10060                SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
10061                restore PL_linestr. */
10062             SvREFCNT_dec_NN(PL_linestr);
10063             PL_linestr = linestr_save;
10064             PL_oldbufptr = oldbufptr_save;
10065             PL_oldoldbufptr = oldoldbufptr_save;
10066             goto interminable;
10067         }
10068         CopLINE_set(PL_curcop, origline);
10069         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10070             s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10071             /* ^That should be enough to avoid this needing to grow:  */
10072             sv_catpvs(PL_linestr, "\n\0");
10073             assert(s == SvPVX(PL_linestr));
10074             PL_bufend = SvEND(PL_linestr);
10075         }
10076         s = PL_bufptr;
10077         PL_parser->herelines++;
10078         PL_last_lop = PL_last_uni = NULL;
10079 #ifndef PERL_STRICT_CR
10080         if (PL_bufend - PL_linestart >= 2) {
10081             if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10082                 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10083             {
10084                 PL_bufend[-2] = '\n';
10085                 PL_bufend--;
10086                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10087             }
10088             else if (PL_bufend[-1] == '\r')
10089                 PL_bufend[-1] = '\n';
10090         }
10091         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10092             PL_bufend[-1] = '\n';
10093 #endif
10094         if (indented && (PL_bufend-s) >= len) {
10095             char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
10096
10097             if (found) {
10098                 char *backup = found;
10099                 indent_len = 0;
10100
10101                 /* Only valid if it's preceded by whitespace only */
10102                 while (backup != s && --backup >= s) {
10103                     if (! SPACE_OR_TAB(*backup)) {
10104                         break;
10105                     }
10106                     indent_len++;
10107                 }
10108
10109                 /* All whitespace or none! */
10110                 if (backup == found || SPACE_OR_TAB(*backup)) {
10111                     Newxz(indent, indent_len + 1, char);
10112                     memcpy(indent, backup, indent_len);
10113                     SvREFCNT_dec(PL_linestr);
10114                     PL_linestr = linestr_save;
10115                     PL_linestart = SvPVX(linestr_save);
10116                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10117                     PL_oldbufptr = oldbufptr_save;
10118                     PL_oldoldbufptr = oldoldbufptr_save;
10119                     s = d;
10120                     break;
10121                 }
10122             }
10123
10124             /* Didn't find it */
10125             sv_catsv(tmpstr,PL_linestr);
10126         } else {
10127             if (*s == term && PL_bufend-s >= len
10128                 && memEQ(s,PL_tokenbuf + 1,len))
10129             {
10130                 SvREFCNT_dec(PL_linestr);
10131                 PL_linestr = linestr_save;
10132                 PL_linestart = SvPVX(linestr_save);
10133                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10134                 PL_oldbufptr = oldbufptr_save;
10135                 PL_oldoldbufptr = oldoldbufptr_save;
10136                 s = d;
10137                 break;
10138             } else {
10139                 sv_catsv(tmpstr,PL_linestr);
10140             }
10141         }
10142       }
10143     }
10144     PL_multi_end = origline + PL_parser->herelines;
10145     if (indented && indent) {
10146         STRLEN linecount = 1;
10147         STRLEN herelen = SvCUR(tmpstr);
10148         char *ss = SvPVX(tmpstr);
10149         char *se = ss + herelen;
10150         SV *newstr = newSV(herelen+1);
10151         SvPOK_on(newstr);
10152
10153         /* Trim leading whitespace */
10154         while (ss < se) {
10155             /* newline only? Copy and move on */
10156             if (*ss == '\n') {
10157                 sv_catpv(newstr,"\n");
10158                 ss++;
10159                 linecount++;
10160
10161             /* Found our indentation? Strip it */
10162             } else if (se - ss >= indent_len
10163                        && memEQ(ss, indent, indent_len))
10164             {
10165                 STRLEN le = 0;
10166
10167                 ss += indent_len;
10168
10169                 while ((ss + le) < se && *(ss + le) != '\n')
10170                     le++;
10171
10172                 sv_catpvn(newstr, ss, le);
10173
10174                 ss += le;
10175
10176             /* Line doesn't begin with our indentation? Croak */
10177             } else {
10178                 Perl_croak(aTHX_
10179                     "Indentation on line %d of here-doc doesn't match delimiter",
10180                     (int)linecount
10181                 );
10182             }
10183         }
10184         /* avoid sv_setsv() as we dont wan't to COW here */
10185         sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
10186         Safefree(indent);
10187         SvREFCNT_dec_NN(newstr);
10188     }
10189     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10190         SvPV_shrink_to_cur(tmpstr);
10191     }
10192     if (!IN_BYTES) {
10193         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10194             SvUTF8_on(tmpstr);
10195     }
10196     PL_lex_stuff = tmpstr;
10197     pl_yylval.ival = op_type;
10198     return s;
10199
10200   interminable:
10201     SvREFCNT_dec(tmpstr);
10202     CopLINE_set(PL_curcop, origline);
10203     missingterm(PL_tokenbuf + 1);
10204 }
10205
10206 /* scan_inputsymbol
10207    takes: position of first '<' in input buffer
10208    returns: position of first char following the matching '>' in
10209             input buffer
10210    side-effects: pl_yylval and lex_op are set.
10211
10212    This code handles:
10213
10214    <>           read from ARGV
10215    <<>>         read from ARGV without magic open
10216    <FH>         read from filehandle
10217    <pkg::FH>    read from package qualified filehandle
10218    <pkg'FH>     read from package qualified filehandle
10219    <$fh>        read from filehandle in $fh
10220    <*.h>        filename glob
10221
10222 */
10223
10224 STATIC char *
10225 S_scan_inputsymbol(pTHX_ char *start)
10226 {
10227     char *s = start;            /* current position in buffer */
10228     char *end;
10229     I32 len;
10230     bool nomagicopen = FALSE;
10231     char *d = PL_tokenbuf;                                      /* start of temp holding space */
10232     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
10233
10234     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10235
10236     end = strchr(s, '\n');
10237     if (!end)
10238         end = PL_bufend;
10239     if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
10240         nomagicopen = TRUE;
10241         *d = '\0';
10242         len = 0;
10243         s += 3;
10244     }
10245     else
10246         s = delimcpy(d, e, s + 1, end, '>', &len);      /* extract until > */
10247
10248     /* die if we didn't have space for the contents of the <>,
10249        or if it didn't end, or if we see a newline
10250     */
10251
10252     if (len >= (I32)sizeof PL_tokenbuf)
10253         Perl_croak(aTHX_ "Excessively long <> operator");
10254     if (s >= end)
10255         Perl_croak(aTHX_ "Unterminated <> operator");
10256
10257     s++;
10258
10259     /* check for <$fh>
10260        Remember, only scalar variables are interpreted as filehandles by
10261        this code.  Anything more complex (e.g., <$fh{$num}>) will be
10262        treated as a glob() call.
10263        This code makes use of the fact that except for the $ at the front,
10264        a scalar variable and a filehandle look the same.
10265     */
10266     if (*d == '$' && d[1]) d++;
10267
10268     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10269     while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
10270         d += UTF ? UTF8SKIP(d) : 1;
10271     }
10272
10273     /* If we've tried to read what we allow filehandles to look like, and
10274        there's still text left, then it must be a glob() and not a getline.
10275        Use scan_str to pull out the stuff between the <> and treat it
10276        as nothing more than a string.
10277     */
10278
10279     if (d - PL_tokenbuf != len) {
10280         pl_yylval.ival = OP_GLOB;
10281         s = scan_str(start,FALSE,FALSE,FALSE,NULL);
10282         if (!s)
10283            Perl_croak(aTHX_ "Glob not terminated");
10284         return s;
10285     }
10286     else {
10287         bool readline_overriden = FALSE;
10288         GV *gv_readline;
10289         /* we're in a filehandle read situation */
10290         d = PL_tokenbuf;
10291
10292         /* turn <> into <ARGV> */
10293         if (!len)
10294             Copy("ARGV",d,5,char);
10295
10296         /* Check whether readline() is overriden */
10297         if ((gv_readline = gv_override("readline",8)))
10298             readline_overriden = TRUE;
10299
10300         /* if <$fh>, create the ops to turn the variable into a
10301            filehandle
10302         */
10303         if (*d == '$') {
10304             /* try to find it in the pad for this block, otherwise find
10305                add symbol table ops
10306             */
10307             const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
10308             if (tmp != NOT_IN_PAD) {
10309                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10310                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10311                     HEK * const stashname = HvNAME_HEK(stash);
10312                     SV * const sym = sv_2mortal(newSVhek(stashname));
10313                     sv_catpvs(sym, "::");
10314                     sv_catpv(sym, d+1);
10315                     d = SvPVX(sym);
10316                     goto intro_sym;
10317                 }
10318                 else {
10319                     OP * const o = newOP(OP_PADSV, 0);
10320                     o->op_targ = tmp;
10321                     PL_lex_op = readline_overriden
10322                         ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10323                                 op_append_elem(OP_LIST, o,
10324                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10325                         : newUNOP(OP_READLINE, 0, o);
10326                 }
10327             }
10328             else {
10329                 GV *gv;
10330                 ++d;
10331               intro_sym:
10332                 gv = gv_fetchpv(d,
10333                                 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
10334                                 SVt_PV);
10335                 PL_lex_op = readline_overriden
10336                     ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10337                             op_append_elem(OP_LIST,
10338                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10339                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10340                     : newUNOP(OP_READLINE, 0,
10341                             newUNOP(OP_RV2SV, 0,
10342                                 newGVOP(OP_GV, 0, gv)));
10343             }
10344             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10345             pl_yylval.ival = OP_NULL;
10346         }
10347
10348         /* If it's none of the above, it must be a literal filehandle
10349            (<Foo::BAR> or <FOO>) so build a simple readline OP */
10350         else {
10351             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10352             PL_lex_op = readline_overriden
10353                 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10354                         op_append_elem(OP_LIST,
10355                             newGVOP(OP_GV, 0, gv),
10356                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10357                 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
10358             pl_yylval.ival = OP_NULL;
10359         }
10360     }
10361
10362     return s;
10363 }
10364
10365
10366 /* scan_str
10367    takes:
10368         start                   position in buffer
10369         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
10370                                 only if they are of the open/close form
10371         keep_delims             preserve the delimiters around the string
10372         re_reparse              compiling a run-time /(?{})/:
10373                                    collapse // to /,  and skip encoding src
10374         delimp                  if non-null, this is set to the position of
10375                                 the closing delimiter, or just after it if
10376                                 the closing and opening delimiters differ
10377                                 (i.e., the opening delimiter of a substitu-
10378                                 tion replacement)
10379    returns: position to continue reading from buffer
10380    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10381         updates the read buffer.
10382
10383    This subroutine pulls a string out of the input.  It is called for:
10384         q               single quotes           q(literal text)
10385         '               single quotes           'literal text'
10386         qq              double quotes           qq(interpolate $here please)
10387         "               double quotes           "interpolate $here please"
10388         qx              backticks               qx(/bin/ls -l)
10389         `               backticks               `/bin/ls -l`
10390         qw              quote words             @EXPORT_OK = qw( func() $spam )
10391         m//             regexp match            m/this/
10392         s///            regexp substitute       s/this/that/
10393         tr///           string transliterate    tr/this/that/
10394         y///            string transliterate    y/this/that/
10395         ($*@)           sub prototypes          sub foo ($)
10396         (stuff)         sub attr parameters     sub foo : attr(stuff)
10397         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
10398
10399    In most of these cases (all but <>, patterns and transliterate)
10400    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
10401    calls scan_str().  s/// makes yylex() call scan_subst() which calls
10402    scan_str().  tr/// and y/// make yylex() call scan_trans() which
10403    calls scan_str().
10404
10405    It skips whitespace before the string starts, and treats the first
10406    character as the delimiter.  If the delimiter is one of ([{< then
10407    the corresponding "close" character )]}> is used as the closing
10408    delimiter.  It allows quoting of delimiters, and if the string has
10409    balanced delimiters ([{<>}]) it allows nesting.
10410
10411    On success, the SV with the resulting string is put into lex_stuff or,
10412    if that is already non-NULL, into lex_repl. The second case occurs only
10413    when parsing the RHS of the special constructs s/// and tr/// (y///).
10414    For convenience, the terminating delimiter character is stuffed into
10415    SvIVX of the SV.
10416 */
10417
10418 STATIC char *
10419 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
10420                  char **delimp
10421     )
10422 {
10423     SV *sv;                     /* scalar value: string */
10424     const char *tmps;           /* temp string, used for delimiter matching */
10425     char *s = start;            /* current position in the buffer */
10426     char term;                  /* terminating character */
10427     char *to;                   /* current position in the sv's data */
10428     I32 brackets = 1;           /* bracket nesting level */
10429     bool has_utf8 = FALSE;      /* is there any utf8 content? */
10430     IV termcode;                /* terminating char. code */
10431     U8 termstr[UTF8_MAXBYTES];  /* terminating string */
10432     STRLEN termlen;             /* length of terminating string */
10433     line_t herelines;
10434
10435     /* The delimiters that have a mirror-image closing one */
10436     const char * opening_delims = "([{<";
10437     const char * closing_delims = ")]}>";
10438
10439     const char * non_grapheme_msg = "Use of unassigned code point or"
10440                                     " non-standalone grapheme for a delimiter"
10441                                     " will be a fatal error starting in Perl"
10442                                     " 5.30";
10443     /* The only non-UTF character that isn't a stand alone grapheme is
10444      * white-space, hence can't be a delimiter.  So can skip for non-UTF-8 */
10445     bool check_grapheme = UTF && ckWARN_d(WARN_DEPRECATED);
10446
10447     PERL_ARGS_ASSERT_SCAN_STR;
10448
10449     /* skip space before the delimiter */
10450     if (isSPACE(*s)) {
10451         s = skipspace(s);
10452     }
10453
10454     /* mark where we are, in case we need to report errors */
10455     CLINE;
10456
10457     /* after skipping whitespace, the next character is the terminator */
10458     term = *s;
10459     if (!UTF || UTF8_IS_INVARIANT(term)) {
10460         termcode = termstr[0] = term;
10461         termlen = 1;
10462     }
10463     else {
10464         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10465         if (check_grapheme) {
10466             if (   UNLIKELY(UNICODE_IS_SUPER(termcode))
10467                 || UNLIKELY(UNICODE_IS_NONCHAR(termcode)))
10468             {
10469                 /* These are considered graphemes, and since the ending
10470                  * delimiter will be the same, we don't have to check the other
10471                  * end */
10472                 check_grapheme = FALSE;
10473             }
10474             else if (UNLIKELY(! _is_grapheme((U8 *) start,
10475                                              (U8 *) s,
10476                                              (U8 *) PL_bufend,
10477                                              termcode)))
10478             {
10479                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "%s", non_grapheme_msg);
10480
10481                 /* Don't have to check the other end, as have already warned at
10482                  * this one */
10483                 check_grapheme = FALSE;
10484             }
10485         }
10486
10487         Copy(s, termstr, termlen, U8);
10488     }
10489
10490     /* mark where we are */
10491     PL_multi_start = CopLINE(PL_curcop);
10492     PL_multi_open = termcode;
10493     herelines = PL_parser->herelines;
10494
10495     /* If the delimiter has a mirror-image closing one, get it */
10496     if (term && (tmps = strchr(opening_delims, term))) {
10497         termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
10498     }
10499
10500     PL_multi_close = termcode;
10501
10502     if (PL_multi_open == PL_multi_close) {
10503         keep_bracketed_quoted = FALSE;
10504     }
10505
10506     /* create a new SV to hold the contents.  79 is the SV's initial length.
10507        What a random number. */
10508     sv = newSV_type(SVt_PVIV);
10509     SvGROW(sv, 80);
10510     SvIV_set(sv, termcode);
10511     (void)SvPOK_only(sv);               /* validate pointer */
10512
10513     /* move past delimiter and try to read a complete string */
10514     if (keep_delims)
10515         sv_catpvn(sv, s, termlen);
10516     s += termlen;
10517     for (;;) {
10518         /* extend sv if need be */
10519         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10520         /* set 'to' to the next character in the sv's string */
10521         to = SvPVX(sv)+SvCUR(sv);
10522
10523         /* if open delimiter is the close delimiter read unbridle */
10524         if (PL_multi_open == PL_multi_close) {
10525             for (; s < PL_bufend; s++,to++) {
10526                 /* embedded newlines increment the current line number */
10527                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10528                     COPLINE_INC_WITH_HERELINES;
10529                 /* handle quoted delimiters */
10530                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10531                     if (!keep_bracketed_quoted
10532                         && (s[1] == term
10533                             || (re_reparse && s[1] == '\\'))
10534                     )
10535                         s++;
10536                     else /* any other quotes are simply copied straight through */
10537                         *to++ = *s++;
10538                 }
10539                 /* terminate when run out of buffer (the for() condition), or
10540                    have found the terminator */
10541                 else if (*s == term) {  /* First byte of terminator matches */
10542                     if (termlen == 1)   /* If is the only byte, are done */
10543                         break;
10544
10545                     /* If the remainder of the terminator matches, also are
10546                      * done, after checking that is a separate grapheme */
10547                     if (   s + termlen <= PL_bufend
10548                         && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
10549                     {
10550                         if (   check_grapheme
10551                             && UNLIKELY(! _is_grapheme((U8 *) start,
10552                                                               (U8 *) s,
10553                                                               (U8 *) PL_bufend,
10554                                                               termcode)))
10555                         {
10556                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10557                                         "%s", non_grapheme_msg);
10558                         }
10559                         break;
10560                     }
10561                 }
10562                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
10563                     has_utf8 = TRUE;
10564                 }
10565
10566                 *to = *s;
10567             }
10568         }
10569
10570         /* if the terminator isn't the same as the start character (e.g.,
10571            matched brackets), we have to allow more in the quoting, and
10572            be prepared for nested brackets.
10573         */
10574         else {
10575             /* read until we run out of string, or we find the terminator */
10576             for (; s < PL_bufend; s++,to++) {
10577                 /* embedded newlines increment the line count */
10578                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10579                     COPLINE_INC_WITH_HERELINES;
10580                 /* backslashes can escape the open or closing characters */
10581                 if (*s == '\\' && s+1 < PL_bufend) {
10582                     if (!keep_bracketed_quoted
10583                        && ( ((UV)s[1] == PL_multi_open)
10584                          || ((UV)s[1] == PL_multi_close) ))
10585                     {
10586                         s++;
10587                     }
10588                     else
10589                         *to++ = *s++;
10590                 }
10591                 /* allow nested opens and closes */
10592                 else if ((UV)*s == PL_multi_close && --brackets <= 0)
10593                     break;
10594                 else if ((UV)*s == PL_multi_open)
10595                     brackets++;
10596                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10597                     has_utf8 = TRUE;
10598                 *to = *s;
10599             }
10600         }
10601         /* terminate the copied string and update the sv's end-of-string */
10602         *to = '\0';
10603         SvCUR_set(sv, to - SvPVX_const(sv));
10604
10605         /*
10606          * this next chunk reads more into the buffer if we're not done yet
10607          */
10608
10609         if (s < PL_bufend)
10610             break;              /* handle case where we are done yet :-) */
10611
10612 #ifndef PERL_STRICT_CR
10613         if (to - SvPVX_const(sv) >= 2) {
10614             if (   (to[-2] == '\r' && to[-1] == '\n')
10615                 || (to[-2] == '\n' && to[-1] == '\r'))
10616             {
10617                 to[-2] = '\n';
10618                 to--;
10619                 SvCUR_set(sv, to - SvPVX_const(sv));
10620             }
10621             else if (to[-1] == '\r')
10622                 to[-1] = '\n';
10623         }
10624         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10625             to[-1] = '\n';
10626 #endif
10627
10628         /* if we're out of file, or a read fails, bail and reset the current
10629            line marker so we can report where the unterminated string began
10630         */
10631         COPLINE_INC_WITH_HERELINES;
10632         PL_bufptr = PL_bufend;
10633         if (!lex_next_chunk(0)) {
10634             sv_free(sv);
10635             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10636             return NULL;
10637         }
10638         s = start = PL_bufptr;
10639     }
10640
10641     /* at this point, we have successfully read the delimited string */
10642
10643     if (keep_delims)
10644             sv_catpvn(sv, s, termlen);
10645     s += termlen;
10646
10647     if (has_utf8)
10648         SvUTF8_on(sv);
10649
10650     PL_multi_end = CopLINE(PL_curcop);
10651     CopLINE_set(PL_curcop, PL_multi_start);
10652     PL_parser->herelines = herelines;
10653
10654     /* if we allocated too much space, give some back */
10655     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10656         SvLEN_set(sv, SvCUR(sv) + 1);
10657         SvPV_renew(sv, SvLEN(sv));
10658     }
10659
10660     /* decide whether this is the first or second quoted string we've read
10661        for this op
10662     */
10663
10664     if (PL_lex_stuff)
10665         PL_parser->lex_sub_repl = sv;
10666     else
10667         PL_lex_stuff = sv;
10668     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10669     return s;
10670 }
10671
10672 /*
10673   scan_num
10674   takes: pointer to position in buffer
10675   returns: pointer to new position in buffer
10676   side-effects: builds ops for the constant in pl_yylval.op
10677
10678   Read a number in any of the formats that Perl accepts:
10679
10680   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10681   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10682   0b[01](_?[01])*                                       binary integers
10683   0[0-7](_?[0-7])*                                      octal integers
10684   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
10685   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
10686
10687   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10688   thing it reads.
10689
10690   If it reads a number without a decimal point or an exponent, it will
10691   try converting the number to an integer and see if it can do so
10692   without loss of precision.
10693 */
10694
10695 char *
10696 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10697 {
10698     const char *s = start;      /* current position in buffer */
10699     char *d;                    /* destination in temp buffer */
10700     char *e;                    /* end of temp buffer */
10701     NV nv;                              /* number read, as a double */
10702     SV *sv = NULL;                      /* place to put the converted number */
10703     bool floatit;                       /* boolean: int or float? */
10704     const char *lastub = NULL;          /* position of last underbar */
10705     static const char* const number_too_long = "Number too long";
10706     bool warned_about_underscore = 0;
10707 #define WARN_ABOUT_UNDERSCORE() \
10708         do { \
10709             if (!warned_about_underscore) { \
10710                 warned_about_underscore = 1; \
10711                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
10712                                "Misplaced _ in number"); \
10713             } \
10714         } while(0)
10715     /* Hexadecimal floating point.
10716      *
10717      * In many places (where we have quads and NV is IEEE 754 double)
10718      * we can fit the mantissa bits of a NV into an unsigned quad.
10719      * (Note that UVs might not be quads even when we have quads.)
10720      * This will not work everywhere, though (either no quads, or
10721      * using long doubles), in which case we have to resort to NV,
10722      * which will probably mean horrible loss of precision due to
10723      * multiple fp operations. */
10724     bool hexfp = FALSE;
10725     int total_bits = 0;
10726     int significant_bits = 0;
10727 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
10728 #  define HEXFP_UQUAD
10729     Uquad_t hexfp_uquad = 0;
10730     int hexfp_frac_bits = 0;
10731 #else
10732 #  define HEXFP_NV
10733     NV hexfp_nv = 0.0;
10734 #endif
10735     NV hexfp_mult = 1.0;
10736     UV high_non_zero = 0; /* highest digit */
10737     int non_zero_integer_digits = 0;
10738
10739     PERL_ARGS_ASSERT_SCAN_NUM;
10740
10741     /* We use the first character to decide what type of number this is */
10742
10743     switch (*s) {
10744     default:
10745         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10746
10747     /* if it starts with a 0, it could be an octal number, a decimal in
10748        0.13 disguise, or a hexadecimal number, or a binary number. */
10749     case '0':
10750         {
10751           /* variables:
10752              u          holds the "number so far"
10753              shift      the power of 2 of the base
10754                         (hex == 4, octal == 3, binary == 1)
10755              overflowed was the number more than we can hold?
10756
10757              Shift is used when we add a digit.  It also serves as an "are
10758              we in octal/hex/binary?" indicator to disallow hex characters
10759              when in octal mode.
10760            */
10761             NV n = 0.0;
10762             UV u = 0;
10763             I32 shift;
10764             bool overflowed = FALSE;
10765             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10766             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10767             static const char* const bases[5] =
10768               { "", "binary", "", "octal", "hexadecimal" };
10769             static const char* const Bases[5] =
10770               { "", "Binary", "", "Octal", "Hexadecimal" };
10771             static const char* const maxima[5] =
10772               { "",
10773                 "0b11111111111111111111111111111111",
10774                 "",
10775                 "037777777777",
10776                 "0xffffffff" };
10777             const char *base, *Base, *max;
10778
10779             /* check for hex */
10780             if (isALPHA_FOLD_EQ(s[1], 'x')) {
10781                 shift = 4;
10782                 s += 2;
10783                 just_zero = FALSE;
10784             } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
10785                 shift = 1;
10786                 s += 2;
10787                 just_zero = FALSE;
10788             }
10789             /* check for a decimal in disguise */
10790             else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
10791                 goto decimal;
10792             /* so it must be octal */
10793             else {
10794                 shift = 3;
10795                 s++;
10796             }
10797
10798             if (*s == '_') {
10799                 WARN_ABOUT_UNDERSCORE();
10800                lastub = s++;
10801             }
10802
10803             base = bases[shift];
10804             Base = Bases[shift];
10805             max  = maxima[shift];
10806
10807             /* read the rest of the number */
10808             for (;;) {
10809                 /* x is used in the overflow test,
10810                    b is the digit we're adding on. */
10811                 UV x, b;
10812
10813                 switch (*s) {
10814
10815                 /* if we don't mention it, we're done */
10816                 default:
10817                     goto out;
10818
10819                 /* _ are ignored -- but warned about if consecutive */
10820                 case '_':
10821                     if (lastub && s == lastub + 1)
10822                         WARN_ABOUT_UNDERSCORE();
10823                     lastub = s++;
10824                     break;
10825
10826                 /* 8 and 9 are not octal */
10827                 case '8': case '9':
10828                     if (shift == 3)
10829                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10830                     /* FALLTHROUGH */
10831
10832                 /* octal digits */
10833                 case '2': case '3': case '4':
10834                 case '5': case '6': case '7':
10835                     if (shift == 1)
10836                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10837                     /* FALLTHROUGH */
10838
10839                 case '0': case '1':
10840                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10841                     goto digit;
10842
10843                 /* hex digits */
10844                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10845                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10846                     /* make sure they said 0x */
10847                     if (shift != 4)
10848                         goto out;
10849                     b = (*s++ & 7) + 9;
10850
10851                     /* Prepare to put the digit we have onto the end
10852                        of the number so far.  We check for overflows.
10853                     */
10854
10855                   digit:
10856                     just_zero = FALSE;
10857                     if (!overflowed) {
10858                         x = u << shift; /* make room for the digit */
10859
10860                         total_bits += shift;
10861
10862                         if ((x >> shift) != u
10863                             && !(PL_hints & HINT_NEW_BINARY)) {
10864                             overflowed = TRUE;
10865                             n = (NV) u;
10866                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10867                                              "Integer overflow in %s number",
10868                                              base);
10869                         } else
10870                             u = x | b;          /* add the digit to the end */
10871                     }
10872                     if (overflowed) {
10873                         n *= nvshift[shift];
10874                         /* If an NV has not enough bits in its
10875                          * mantissa to represent an UV this summing of
10876                          * small low-order numbers is a waste of time
10877                          * (because the NV cannot preserve the
10878                          * low-order bits anyway): we could just
10879                          * remember when did we overflow and in the
10880                          * end just multiply n by the right
10881                          * amount. */
10882                         n += (NV) b;
10883                     }
10884
10885                     if (high_non_zero == 0 && b > 0)
10886                         high_non_zero = b;
10887
10888                     if (high_non_zero)
10889                         non_zero_integer_digits++;
10890
10891                     /* this could be hexfp, but peek ahead
10892                      * to avoid matching ".." */
10893                     if (UNLIKELY(HEXFP_PEEK(s))) {
10894                         goto out;
10895                     }
10896
10897                     break;
10898                 }
10899             }
10900
10901           /* if we get here, we had success: make a scalar value from
10902              the number.
10903           */
10904           out:
10905
10906             /* final misplaced underbar check */
10907             if (s[-1] == '_')
10908                 WARN_ABOUT_UNDERSCORE();
10909
10910             if (UNLIKELY(HEXFP_PEEK(s))) {
10911                 /* Do sloppy (on the underbars) but quick detection
10912                  * (and value construction) for hexfp, the decimal
10913                  * detection will shortly be more thorough with the
10914                  * underbar checks. */
10915                 const char* h = s;
10916                 significant_bits = non_zero_integer_digits * shift;
10917 #ifdef HEXFP_UQUAD
10918                 hexfp_uquad = u;
10919 #else /* HEXFP_NV */
10920                 hexfp_nv = u;
10921 #endif
10922                 /* Ignore the leading zero bits of
10923                  * the high (first) non-zero digit. */
10924                 if (high_non_zero) {
10925                     if (high_non_zero < 0x8)
10926                         significant_bits--;
10927                     if (high_non_zero < 0x4)
10928                         significant_bits--;
10929                     if (high_non_zero < 0x2)
10930                         significant_bits--;
10931                 }
10932
10933                 if (*h == '.') {
10934 #ifdef HEXFP_NV
10935                     NV nv_mult = 1.0;
10936 #endif
10937                     bool accumulate = TRUE;
10938                     for (h++; (isXDIGIT(*h) || *h == '_'); h++) {
10939                         if (isXDIGIT(*h)) {
10940                             U8 b = XDIGIT_VALUE(*h);
10941                             significant_bits += shift;
10942 #ifdef HEXFP_UQUAD
10943                             if (accumulate) {
10944                                 if (significant_bits < NV_MANT_DIG) {
10945                                     /* We are in the long "run" of xdigits,
10946                                      * accumulate the full four bits. */
10947                                     hexfp_uquad <<= shift;
10948                                     hexfp_uquad |= b;
10949                                     hexfp_frac_bits += shift;
10950                                 } else {
10951                                     /* We are at a hexdigit either at,
10952                                      * or straddling, the edge of mantissa.
10953                                      * We will try grabbing as many as
10954                                      * possible bits. */
10955                                     int tail =
10956                                       significant_bits - NV_MANT_DIG;
10957                                     if (tail <= 0)
10958                                        tail += shift;
10959                                     hexfp_uquad <<= tail;
10960                                     hexfp_uquad |= b >> (shift - tail);
10961                                     hexfp_frac_bits += tail;
10962
10963                                     /* Ignore the trailing zero bits
10964                                      * of the last non-zero xdigit.
10965                                      *
10966                                      * The assumption here is that if
10967                                      * one has input of e.g. the xdigit
10968                                      * eight (0x8), there is only one
10969                                      * bit being input, not the full
10970                                      * four bits.  Conversely, if one
10971                                      * specifies a zero xdigit, the
10972                                      * assumption is that one really
10973                                      * wants all those bits to be zero. */
10974                                     if (b) {
10975                                         if ((b & 0x1) == 0x0) {
10976                                             significant_bits--;
10977                                             if ((b & 0x2) == 0x0) {
10978                                                 significant_bits--;
10979                                                 if ((b & 0x4) == 0x0) {
10980                                                     significant_bits--;
10981                                                 }
10982                                             }
10983                                         }
10984                                     }
10985
10986                                     accumulate = FALSE;
10987                                 }
10988                             } else {
10989                                 /* Keep skipping the xdigits, and
10990                                  * accumulating the significant bits,
10991                                  * but do not shift the uquad
10992                                  * (which would catastrophically drop
10993                                  * high-order bits) or accumulate the
10994                                  * xdigits anymore. */
10995                             }
10996 #else /* HEXFP_NV */
10997                             if (accumulate) {
10998                                 nv_mult /= 16.0;
10999                                 if (nv_mult > 0.0)
11000                                     hexfp_nv += b * nv_mult;
11001                                 else
11002                                     accumulate = FALSE;
11003                             }
11004 #endif
11005                         }
11006                         if (significant_bits >= NV_MANT_DIG)
11007                             accumulate = FALSE;
11008                     }
11009                 }
11010
11011                 if ((total_bits > 0 || significant_bits > 0) &&
11012                     isALPHA_FOLD_EQ(*h, 'p')) {
11013                     bool negexp = FALSE;
11014                     h++;
11015                     if (*h == '+')
11016                         h++;
11017                     else if (*h == '-') {
11018                         negexp = TRUE;
11019                         h++;
11020                     }
11021                     if (isDIGIT(*h)) {
11022                         I32 hexfp_exp = 0;
11023                         while (isDIGIT(*h) || *h == '_') {
11024                             if (isDIGIT(*h)) {
11025                                 hexfp_exp *= 10;
11026                                 hexfp_exp += *h - '0';
11027 #ifdef NV_MIN_EXP
11028                                 if (negexp
11029                                     && -hexfp_exp < NV_MIN_EXP - 1) {
11030                                     /* NOTE: this means that the exponent
11031                                      * underflow warning happens for
11032                                      * the IEEE 754 subnormals (denormals),
11033                                      * because DBL_MIN_EXP etc are the lowest
11034                                      * possible binary (or, rather, DBL_RADIX-base)
11035                                      * exponent for normals, not subnormals.
11036                                      *
11037                                      * This may or may not be a good thing. */
11038                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11039                                                    "Hexadecimal float: exponent underflow");
11040                                     break;
11041                                 }
11042 #endif
11043 #ifdef NV_MAX_EXP
11044                                 if (!negexp
11045                                     && hexfp_exp > NV_MAX_EXP - 1) {
11046                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11047                                                    "Hexadecimal float: exponent overflow");
11048                                     break;
11049                                 }
11050 #endif
11051                             }
11052                             h++;
11053                         }
11054                         if (negexp)
11055                             hexfp_exp = -hexfp_exp;
11056 #ifdef HEXFP_UQUAD
11057                         hexfp_exp -= hexfp_frac_bits;
11058 #endif
11059                         hexfp_mult = Perl_pow(2.0, hexfp_exp);
11060                         hexfp = TRUE;
11061                         goto decimal;
11062                     }
11063                 }
11064             }
11065
11066             if (overflowed) {
11067                 if (n > 4294967295.0)
11068                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11069                                    "%s number > %s non-portable",
11070                                    Base, max);
11071                 sv = newSVnv(n);
11072             }
11073             else {
11074 #if UVSIZE > 4
11075                 if (u > 0xffffffff)
11076                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11077                                    "%s number > %s non-portable",
11078                                    Base, max);
11079 #endif
11080                 sv = newSVuv(u);
11081             }
11082             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11083                 sv = new_constant(start, s - start, "integer",
11084                                   sv, NULL, NULL, 0);
11085             else if (PL_hints & HINT_NEW_BINARY)
11086                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
11087         }
11088         break;
11089
11090     /*
11091       handle decimal numbers.
11092       we're also sent here when we read a 0 as the first digit
11093     */
11094     case '1': case '2': case '3': case '4': case '5':
11095     case '6': case '7': case '8': case '9': case '.':
11096       decimal:
11097         d = PL_tokenbuf;
11098         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11099         floatit = FALSE;
11100         if (hexfp) {
11101             floatit = TRUE;
11102             *d++ = '0';
11103             *d++ = 'x';
11104             s = start + 2;
11105         }
11106
11107         /* read next group of digits and _ and copy into d */
11108         while (isDIGIT(*s)
11109                || *s == '_'
11110                || UNLIKELY(hexfp && isXDIGIT(*s)))
11111         {
11112             /* skip underscores, checking for misplaced ones
11113                if -w is on
11114             */
11115             if (*s == '_') {
11116                 if (lastub && s == lastub + 1)
11117                     WARN_ABOUT_UNDERSCORE();
11118                 lastub = s++;
11119             }
11120             else {
11121                 /* check for end of fixed-length buffer */
11122                 if (d >= e)
11123                     Perl_croak(aTHX_ "%s", number_too_long);
11124                 /* if we're ok, copy the character */
11125                 *d++ = *s++;
11126             }
11127         }
11128
11129         /* final misplaced underbar check */
11130         if (lastub && s == lastub + 1)
11131             WARN_ABOUT_UNDERSCORE();
11132
11133         /* read a decimal portion if there is one.  avoid
11134            3..5 being interpreted as the number 3. followed
11135            by .5
11136         */
11137         if (*s == '.' && s[1] != '.') {
11138             floatit = TRUE;
11139             *d++ = *s++;
11140
11141             if (*s == '_') {
11142                 WARN_ABOUT_UNDERSCORE();
11143                 lastub = s;
11144             }
11145
11146             /* copy, ignoring underbars, until we run out of digits.
11147             */
11148             for (; isDIGIT(*s)
11149                    || *s == '_'
11150                    || UNLIKELY(hexfp && isXDIGIT(*s));
11151                  s++)
11152             {
11153                 /* fixed length buffer check */
11154                 if (d >= e)
11155                     Perl_croak(aTHX_ "%s", number_too_long);
11156                 if (*s == '_') {
11157                    if (lastub && s == lastub + 1)
11158                         WARN_ABOUT_UNDERSCORE();
11159                    lastub = s;
11160                 }
11161                 else
11162                     *d++ = *s;
11163             }
11164             /* fractional part ending in underbar? */
11165             if (s[-1] == '_')
11166                 WARN_ABOUT_UNDERSCORE();
11167             if (*s == '.' && isDIGIT(s[1])) {
11168                 /* oops, it's really a v-string, but without the "v" */
11169                 s = start;
11170                 goto vstring;
11171             }
11172         }
11173
11174         /* read exponent part, if present */
11175         if ((isALPHA_FOLD_EQ(*s, 'e')
11176               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
11177             && strchr("+-0123456789_", s[1]))
11178         {
11179             floatit = TRUE;
11180
11181             /* regardless of whether user said 3E5 or 3e5, use lower 'e',
11182                ditto for p (hexfloats) */
11183             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
11184                 /* At least some Mach atof()s don't grok 'E' */
11185                 *d++ = 'e';
11186             }
11187             else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
11188                 *d++ = 'p';
11189             }
11190
11191             s++;
11192
11193
11194             /* stray preinitial _ */
11195             if (*s == '_') {
11196                 WARN_ABOUT_UNDERSCORE();
11197                 lastub = s++;
11198             }
11199
11200             /* allow positive or negative exponent */
11201             if (*s == '+' || *s == '-')
11202                 *d++ = *s++;
11203
11204             /* stray initial _ */
11205             if (*s == '_') {
11206                 WARN_ABOUT_UNDERSCORE();
11207                 lastub = s++;
11208             }
11209
11210             /* read digits of exponent */
11211             while (isDIGIT(*s) || *s == '_') {
11212                 if (isDIGIT(*s)) {
11213                     if (d >= e)
11214                         Perl_croak(aTHX_ "%s", number_too_long);
11215                     *d++ = *s++;
11216                 }
11217                 else {
11218                    if (((lastub && s == lastub + 1)
11219                         || (!isDIGIT(s[1]) && s[1] != '_')))
11220                         WARN_ABOUT_UNDERSCORE();
11221                    lastub = s++;
11222                 }
11223             }
11224         }
11225
11226
11227         /*
11228            We try to do an integer conversion first if no characters
11229            indicating "float" have been found.
11230          */
11231
11232         if (!floatit) {
11233             UV uv;
11234             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11235
11236             if (flags == IS_NUMBER_IN_UV) {
11237               if (uv <= IV_MAX)
11238                 sv = newSViv(uv); /* Prefer IVs over UVs. */
11239               else
11240                 sv = newSVuv(uv);
11241             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11242               if (uv <= (UV) IV_MIN)
11243                 sv = newSViv(-(IV)uv);
11244               else
11245                 floatit = TRUE;
11246             } else
11247               floatit = TRUE;
11248         }
11249         if (floatit) {
11250             STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD();
11251             /* terminate the string */
11252             *d = '\0';
11253             if (UNLIKELY(hexfp)) {
11254 #  ifdef NV_MANT_DIG
11255                 if (significant_bits > NV_MANT_DIG)
11256                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11257                                    "Hexadecimal float: mantissa overflow");
11258 #  endif
11259 #ifdef HEXFP_UQUAD
11260                 nv = hexfp_uquad * hexfp_mult;
11261 #else /* HEXFP_NV */
11262                 nv = hexfp_nv * hexfp_mult;
11263 #endif
11264             } else {
11265                 nv = Atof(PL_tokenbuf);
11266             }
11267             RESTORE_LC_NUMERIC_UNDERLYING();
11268             sv = newSVnv(nv);
11269         }
11270
11271         if ( floatit
11272              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11273             const char *const key = floatit ? "float" : "integer";
11274             const STRLEN keylen = floatit ? 5 : 7;
11275             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11276                                 key, keylen, sv, NULL, NULL, 0);
11277         }
11278         break;
11279
11280     /* if it starts with a v, it could be a v-string */
11281     case 'v':
11282     vstring:
11283                 sv = newSV(5); /* preallocate storage space */
11284                 ENTER_with_name("scan_vstring");
11285                 SAVEFREESV(sv);
11286                 s = scan_vstring(s, PL_bufend, sv);
11287                 SvREFCNT_inc_simple_void_NN(sv);
11288                 LEAVE_with_name("scan_vstring");
11289         break;
11290     }
11291
11292     /* make the op for the constant and return */
11293
11294     if (sv)
11295         lvalp->opval = newSVOP(OP_CONST, 0, sv);
11296     else
11297         lvalp->opval = NULL;
11298
11299     return (char *)s;
11300 }
11301
11302 STATIC char *
11303 S_scan_formline(pTHX_ char *s)
11304 {
11305     SV * const stuff = newSVpvs("");
11306     bool needargs = FALSE;
11307     bool eofmt = FALSE;
11308
11309     PERL_ARGS_ASSERT_SCAN_FORMLINE;
11310
11311     while (!needargs) {
11312         char *eol;
11313         if (*s == '.') {
11314             char *t = s+1;
11315 #ifdef PERL_STRICT_CR
11316             while (SPACE_OR_TAB(*t))
11317                 t++;
11318 #else
11319             while (SPACE_OR_TAB(*t) || *t == '\r')
11320                 t++;
11321 #endif
11322             if (*t == '\n' || t == PL_bufend) {
11323                 eofmt = TRUE;
11324                 break;
11325             }
11326         }
11327         eol = (char *) memchr(s,'\n',PL_bufend-s);
11328         if (!eol++)
11329                 eol = PL_bufend;
11330         if (*s != '#') {
11331             char *t;
11332             for (t = s; t < eol; t++) {
11333                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11334                     needargs = FALSE;
11335                     goto enough;        /* ~~ must be first line in formline */
11336                 }
11337                 if (*t == '@' || *t == '^')
11338                     needargs = TRUE;
11339             }
11340             if (eol > s) {
11341                 sv_catpvn(stuff, s, eol-s);
11342 #ifndef PERL_STRICT_CR
11343                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11344                     char *end = SvPVX(stuff) + SvCUR(stuff);
11345                     end[-2] = '\n';
11346                     end[-1] = '\0';
11347                     SvCUR_set(stuff, SvCUR(stuff) - 1);
11348                 }
11349 #endif
11350             }
11351             else
11352               break;
11353         }
11354         s = (char*)eol;
11355         if ((PL_rsfp || PL_parser->filtered)
11356          && PL_parser->form_lex_state == LEX_NORMAL) {
11357             bool got_some;
11358             PL_bufptr = PL_bufend;
11359             COPLINE_INC_WITH_HERELINES;
11360             got_some = lex_next_chunk(0);
11361             CopLINE_dec(PL_curcop);
11362             s = PL_bufptr;
11363             if (!got_some)
11364                 break;
11365         }
11366         incline(s);
11367     }
11368   enough:
11369     if (!SvCUR(stuff) || needargs)
11370         PL_lex_state = PL_parser->form_lex_state;
11371     if (SvCUR(stuff)) {
11372         PL_expect = XSTATE;
11373         if (needargs) {
11374             const char *s2 = s;
11375             while (isSPACE(*s2) && *s2 != '\n')
11376                 s2++;
11377             if (*s2 == '{') {
11378                 PL_expect = XTERMBLOCK;
11379                 NEXTVAL_NEXTTOKE.ival = 0;
11380                 force_next(DO);
11381             }
11382             NEXTVAL_NEXTTOKE.ival = 0;
11383             force_next(FORMLBRACK);
11384         }
11385         if (!IN_BYTES) {
11386             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11387                 SvUTF8_on(stuff);
11388         }
11389         NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
11390         force_next(THING);
11391     }
11392     else {
11393         SvREFCNT_dec(stuff);
11394         if (eofmt)
11395             PL_lex_formbrack = 0;
11396     }
11397     return s;
11398 }
11399
11400 I32
11401 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11402 {
11403     const I32 oldsavestack_ix = PL_savestack_ix;
11404     CV* const outsidecv = PL_compcv;
11405
11406     SAVEI32(PL_subline);
11407     save_item(PL_subname);
11408     SAVESPTR(PL_compcv);
11409
11410     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11411     CvFLAGS(PL_compcv) |= flags;
11412
11413     PL_subline = CopLINE(PL_curcop);
11414     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11415     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11416     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11417     if (outsidecv && CvPADLIST(outsidecv))
11418         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
11419
11420     return oldsavestack_ix;
11421 }
11422
11423 static int
11424 S_yywarn(pTHX_ const char *const s, U32 flags)
11425 {
11426     PERL_ARGS_ASSERT_YYWARN;
11427
11428     PL_in_eval |= EVAL_WARNONLY;
11429     yyerror_pv(s, flags);
11430     return 0;
11431 }
11432
11433 void
11434 Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
11435 {
11436     PERL_ARGS_ASSERT_ABORT_EXECUTION;
11437
11438     if (PL_minus_c)
11439         Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
11440     else {
11441         Perl_croak(aTHX_
11442                 "%sExecution of %s aborted due to compilation errors.\n", msg, name);
11443     }
11444     NOT_REACHED; /* NOTREACHED */
11445 }
11446
11447 void
11448 Perl_yyquit(pTHX)
11449 {
11450     /* Called, after at least one error has been found, to abort the parse now,
11451      * instead of trying to forge ahead */
11452
11453     yyerror_pvn(NULL, 0, 0);
11454 }
11455
11456 int
11457 Perl_yyerror(pTHX_ const char *const s)
11458 {
11459     PERL_ARGS_ASSERT_YYERROR;
11460     return yyerror_pvn(s, strlen(s), 0);
11461 }
11462
11463 int
11464 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11465 {
11466     PERL_ARGS_ASSERT_YYERROR_PV;
11467     return yyerror_pvn(s, strlen(s), flags);
11468 }
11469
11470 int
11471 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11472 {
11473     const char *context = NULL;
11474     int contlen = -1;
11475     SV *msg;
11476     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11477     int yychar  = PL_parser->yychar;
11478
11479     /* Output error message 's' with length 'len'.  'flags' are SV flags that
11480      * apply.  If the number of errors found is large enough, it abandons
11481      * parsing.  If 's' is NULL, there is no message, and it abandons
11482      * processing unconditionally */
11483
11484     if (s != NULL) {
11485         if (!yychar || (yychar == ';' && !PL_rsfp))
11486             sv_catpvs(where_sv, "at EOF");
11487         else if (   PL_oldoldbufptr
11488                  && PL_bufptr > PL_oldoldbufptr
11489                  && PL_bufptr - PL_oldoldbufptr < 200
11490                  && PL_oldoldbufptr != PL_oldbufptr
11491                  && PL_oldbufptr != PL_bufptr)
11492         {
11493             /*
11494                     Only for NetWare:
11495                     The code below is removed for NetWare because it
11496                     abends/crashes on NetWare when the script has error such as
11497                     not having the closing quotes like:
11498                         if ($var eq "value)
11499                     Checking of white spaces is anyway done in NetWare code.
11500             */
11501 #ifndef NETWARE
11502             while (isSPACE(*PL_oldoldbufptr))
11503                 PL_oldoldbufptr++;
11504 #endif
11505             context = PL_oldoldbufptr;
11506             contlen = PL_bufptr - PL_oldoldbufptr;
11507         }
11508         else if (  PL_oldbufptr
11509                 && PL_bufptr > PL_oldbufptr
11510                 && PL_bufptr - PL_oldbufptr < 200
11511                 && PL_oldbufptr != PL_bufptr) {
11512             /*
11513                     Only for NetWare:
11514                     The code below is removed for NetWare because it
11515                     abends/crashes on NetWare when the script has error such as
11516                     not having the closing quotes like:
11517                         if ($var eq "value)
11518                     Checking of white spaces is anyway done in NetWare code.
11519             */
11520 #ifndef NETWARE
11521             while (isSPACE(*PL_oldbufptr))
11522                 PL_oldbufptr++;
11523 #endif
11524             context = PL_oldbufptr;
11525             contlen = PL_bufptr - PL_oldbufptr;
11526         }
11527         else if (yychar > 255)
11528             sv_catpvs(where_sv, "next token ???");
11529         else if (yychar == YYEMPTY) {
11530             if (PL_lex_state == LEX_NORMAL)
11531                 sv_catpvs(where_sv, "at end of line");
11532             else if (PL_lex_inpat)
11533                 sv_catpvs(where_sv, "within pattern");
11534             else
11535                 sv_catpvs(where_sv, "within string");
11536         }
11537         else {
11538             sv_catpvs(where_sv, "next char ");
11539             if (yychar < 32)
11540                 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11541             else if (isPRINT_LC(yychar)) {
11542                 const char string = yychar;
11543                 sv_catpvn(where_sv, &string, 1);
11544             }
11545             else
11546                 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11547         }
11548         msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11549         Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
11550             OutCopFILE(PL_curcop),
11551             (IV)(PL_parser->preambling == NOLINE
11552                    ? CopLINE(PL_curcop)
11553                    : PL_parser->preambling));
11554         if (context)
11555             Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
11556                                  UTF8fARG(UTF, contlen, context));
11557         else
11558             Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
11559         if (   PL_multi_start < PL_multi_end
11560             && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
11561         {
11562             Perl_sv_catpvf(aTHX_ msg,
11563             "  (Might be a runaway multi-line %c%c string starting on"
11564             " line %" IVdf ")\n",
11565                     (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11566             PL_multi_end = 0;
11567         }
11568         if (PL_in_eval & EVAL_WARNONLY) {
11569             PL_in_eval &= ~EVAL_WARNONLY;
11570             Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
11571         }
11572         else {
11573             qerror(msg);
11574         }
11575     }
11576     if (s == NULL || PL_error_count >= 10) {
11577         const char * msg = "";
11578         const char * const name = OutCopFILE(PL_curcop);
11579
11580         if (PL_in_eval) {
11581             SV * errsv = ERRSV;
11582             if (SvCUR(errsv)) {
11583                 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
11584             }
11585         }
11586
11587         if (s == NULL) {
11588             abort_execution(msg, name);
11589         }
11590         else {
11591             Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
11592         }
11593     }
11594     PL_in_my = 0;
11595     PL_in_my_stash = NULL;
11596     return 0;
11597 }
11598
11599 STATIC char*
11600 S_swallow_bom(pTHX_ U8 *s)
11601 {
11602     const STRLEN slen = SvCUR(PL_linestr);
11603
11604     PERL_ARGS_ASSERT_SWALLOW_BOM;
11605
11606     switch (s[0]) {
11607     case 0xFF:
11608         if (s[1] == 0xFE) {
11609             /* UTF-16 little-endian? (or UTF-32LE?) */
11610             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11611                 /* diag_listed_as: Unsupported script encoding %s */
11612                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11613 #ifndef PERL_NO_UTF16_FILTER
11614             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11615             s += 2;
11616             if (PL_bufend > (char*)s) {
11617                 s = add_utf16_textfilter(s, TRUE);
11618             }
11619 #else
11620             /* diag_listed_as: Unsupported script encoding %s */
11621             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11622 #endif
11623         }
11624         break;
11625     case 0xFE:
11626         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11627 #ifndef PERL_NO_UTF16_FILTER
11628             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11629             s += 2;
11630             if (PL_bufend > (char *)s) {
11631                 s = add_utf16_textfilter(s, FALSE);
11632             }
11633 #else
11634             /* diag_listed_as: Unsupported script encoding %s */
11635             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11636 #endif
11637         }
11638         break;
11639     case BOM_UTF8_FIRST_BYTE: {
11640         const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
11641         if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
11642             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11643             s += len + 1;                      /* UTF-8 */
11644         }
11645         break;
11646     }
11647     case 0:
11648         if (slen > 3) {
11649              if (s[1] == 0) {
11650                   if (s[2] == 0xFE && s[3] == 0xFF) {
11651                        /* UTF-32 big-endian */
11652                        /* diag_listed_as: Unsupported script encoding %s */
11653                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11654                   }
11655              }
11656              else if (s[2] == 0 && s[3] != 0) {
11657                   /* Leading bytes
11658                    * 00 xx 00 xx
11659                    * are a good indicator of UTF-16BE. */
11660 #ifndef PERL_NO_UTF16_FILTER
11661                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11662                   s = add_utf16_textfilter(s, FALSE);
11663 #else
11664                   /* diag_listed_as: Unsupported script encoding %s */
11665                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11666 #endif
11667              }
11668         }
11669         break;
11670
11671     default:
11672          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11673                   /* Leading bytes
11674                    * xx 00 xx 00
11675                    * are a good indicator of UTF-16LE. */
11676 #ifndef PERL_NO_UTF16_FILTER
11677               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11678               s = add_utf16_textfilter(s, TRUE);
11679 #else
11680               /* diag_listed_as: Unsupported script encoding %s */
11681               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11682 #endif
11683          }
11684     }
11685     return (char*)s;
11686 }
11687
11688
11689 #ifndef PERL_NO_UTF16_FILTER
11690 static I32
11691 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11692 {
11693     SV *const filter = FILTER_DATA(idx);
11694     /* We re-use this each time round, throwing the contents away before we
11695        return.  */
11696     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11697     SV *const utf8_buffer = filter;
11698     IV status = IoPAGE(filter);
11699     const bool reverse = cBOOL(IoLINES(filter));
11700     I32 retval;
11701
11702     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11703
11704     /* As we're automatically added, at the lowest level, and hence only called
11705        from this file, we can be sure that we're not called in block mode. Hence
11706        don't bother writing code to deal with block mode.  */
11707     if (maxlen) {
11708         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11709     }
11710     if (status < 0) {
11711         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
11712     }
11713     DEBUG_P(PerlIO_printf(Perl_debug_log,
11714                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
11715                           FPTR2DPTR(void *, S_utf16_textfilter),
11716                           reverse ? 'l' : 'b', idx, maxlen, status,
11717                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11718
11719     while (1) {
11720         STRLEN chars;
11721         STRLEN have;
11722         I32 newlen;
11723         U8 *end;
11724         /* First, look in our buffer of existing UTF-8 data:  */
11725         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11726
11727         if (nl) {
11728             ++nl;
11729         } else if (status == 0) {
11730             /* EOF */
11731             IoPAGE(filter) = 0;
11732             nl = SvEND(utf8_buffer);
11733         }
11734         if (nl) {
11735             STRLEN got = nl - SvPVX(utf8_buffer);
11736             /* Did we have anything to append?  */
11737             retval = got != 0;
11738             sv_catpvn(sv, SvPVX(utf8_buffer), got);
11739             /* Everything else in this code works just fine if SVp_POK isn't
11740                set.  This, however, needs it, and we need it to work, else
11741                we loop infinitely because the buffer is never consumed.  */
11742             sv_chop(utf8_buffer, nl);
11743             break;
11744         }
11745
11746         /* OK, not a complete line there, so need to read some more UTF-16.
11747            Read an extra octect if the buffer currently has an odd number. */
11748         while (1) {
11749             if (status <= 0)
11750                 break;
11751             if (SvCUR(utf16_buffer) >= 2) {
11752                 /* Location of the high octet of the last complete code point.
11753                    Gosh, UTF-16 is a pain. All the benefits of variable length,
11754                    *coupled* with all the benefits of partial reads and
11755                    endianness.  */
11756                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11757                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11758
11759                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11760                     break;
11761                 }
11762
11763                 /* We have the first half of a surrogate. Read more.  */
11764                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11765             }
11766
11767             status = FILTER_READ(idx + 1, utf16_buffer,
11768                                  160 + (SvCUR(utf16_buffer) & 1));
11769             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
11770             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11771             if (status < 0) {
11772                 /* Error */
11773                 IoPAGE(filter) = status;
11774                 return status;
11775             }
11776         }
11777
11778         chars = SvCUR(utf16_buffer) >> 1;
11779         have = SvCUR(utf8_buffer);
11780         SvGROW(utf8_buffer, have + chars * 3 + 1);
11781
11782         if (reverse) {
11783             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11784                                          (U8*)SvPVX_const(utf8_buffer) + have,
11785                                          chars * 2, &newlen);
11786         } else {
11787             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11788                                 (U8*)SvPVX_const(utf8_buffer) + have,
11789                                 chars * 2, &newlen);
11790         }
11791         SvCUR_set(utf8_buffer, have + newlen);
11792         *end = '\0';
11793
11794         /* No need to keep this SV "well-formed" with a '\0' after the end, as
11795            it's private to us, and utf16_to_utf8{,reversed} take a
11796            (pointer,length) pair, rather than a NUL-terminated string.  */
11797         if(SvCUR(utf16_buffer) & 1) {
11798             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11799             SvCUR_set(utf16_buffer, 1);
11800         } else {
11801             SvCUR_set(utf16_buffer, 0);
11802         }
11803     }
11804     DEBUG_P(PerlIO_printf(Perl_debug_log,
11805                           "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
11806                           status,
11807                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11808     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11809     return retval;
11810 }
11811
11812 static U8 *
11813 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11814 {
11815     SV *filter = filter_add(S_utf16_textfilter, NULL);
11816
11817     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11818
11819     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11820     SvPVCLEAR(filter);
11821     IoLINES(filter) = reversed;
11822     IoPAGE(filter) = 1; /* Not EOF */
11823
11824     /* Sadly, we have to return a valid pointer, come what may, so we have to
11825        ignore any error return from this.  */
11826     SvCUR_set(PL_linestr, 0);
11827     if (FILTER_READ(0, PL_linestr, 0)) {
11828         SvUTF8_on(PL_linestr);
11829     } else {
11830         SvUTF8_on(PL_linestr);
11831     }
11832     PL_bufend = SvEND(PL_linestr);
11833     return (U8*)SvPVX(PL_linestr);
11834 }
11835 #endif
11836
11837 /*
11838 Returns a pointer to the next character after the parsed
11839 vstring, as well as updating the passed in sv.
11840
11841 Function must be called like
11842
11843         sv = sv_2mortal(newSV(5));
11844         s = scan_vstring(s,e,sv);
11845
11846 where s and e are the start and end of the string.
11847 The sv should already be large enough to store the vstring
11848 passed in, for performance reasons.
11849
11850 This function may croak if fatal warnings are enabled in the
11851 calling scope, hence the sv_2mortal in the example (to prevent
11852 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
11853 sv_2mortal.
11854
11855 */
11856
11857 char *
11858 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11859 {
11860     const char *pos = s;
11861     const char *start = s;
11862
11863     PERL_ARGS_ASSERT_SCAN_VSTRING;
11864
11865     if (*pos == 'v') pos++;  /* get past 'v' */
11866     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11867         pos++;
11868     if ( *pos != '.') {
11869         /* this may not be a v-string if followed by => */
11870         const char *next = pos;
11871         while (next < e && isSPACE(*next))
11872             ++next;
11873         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11874             /* return string not v-string */
11875             sv_setpvn(sv,(char *)s,pos-s);
11876             return (char *)pos;
11877         }
11878     }
11879
11880     if (!isALPHA(*pos)) {
11881         U8 tmpbuf[UTF8_MAXBYTES+1];
11882
11883         if (*s == 'v')
11884             s++;  /* get past 'v' */
11885
11886         SvPVCLEAR(sv);
11887
11888         for (;;) {
11889             /* this is atoi() that tolerates underscores */
11890             U8 *tmpend;
11891             UV rev = 0;
11892             const char *end = pos;
11893             UV mult = 1;
11894             while (--end >= s) {
11895                 if (*end != '_') {
11896                     const UV orev = rev;
11897                     rev += (*end - '0') * mult;
11898                     mult *= 10;
11899                     if (orev > rev)
11900                         /* diag_listed_as: Integer overflow in %s number */
11901                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11902                                          "Integer overflow in decimal number");
11903                 }
11904             }
11905
11906             /* Append native character for the rev point */
11907             tmpend = uvchr_to_utf8(tmpbuf, rev);
11908             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11909             if (!UVCHR_IS_INVARIANT(rev))
11910                  SvUTF8_on(sv);
11911             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11912                  s = ++pos;
11913             else {
11914                  s = pos;
11915                  break;
11916             }
11917             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11918                  pos++;
11919         }
11920         SvPOK_on(sv);
11921         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11922         SvRMAGICAL_on(sv);
11923     }
11924     return (char *)s;
11925 }
11926
11927 int
11928 Perl_keyword_plugin_standard(pTHX_
11929         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11930 {
11931     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11932     PERL_UNUSED_CONTEXT;
11933     PERL_UNUSED_ARG(keyword_ptr);
11934     PERL_UNUSED_ARG(keyword_len);
11935     PERL_UNUSED_ARG(op_ptr);
11936     return KEYWORD_PLUGIN_DECLINE;
11937 }
11938
11939 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11940 static void
11941 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11942 {
11943     SAVEI32(PL_lex_brackets);
11944     if (PL_lex_brackets > 100)
11945         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11946     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11947     SAVEI32(PL_lex_allbrackets);
11948     PL_lex_allbrackets = 0;
11949     SAVEI8(PL_lex_fakeeof);
11950     PL_lex_fakeeof = (U8)fakeeof;
11951     if(yyparse(gramtype) && !PL_parser->error_count)
11952         qerror(Perl_mess(aTHX_ "Parse error"));
11953 }
11954
11955 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11956 static OP *
11957 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11958 {
11959     OP *o;
11960     ENTER;
11961     SAVEVPTR(PL_eval_root);
11962     PL_eval_root = NULL;
11963     parse_recdescent(gramtype, fakeeof);
11964     o = PL_eval_root;
11965     LEAVE;
11966     return o;
11967 }
11968
11969 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11970 static OP *
11971 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11972 {
11973     OP *exprop;
11974     if (flags & ~PARSE_OPTIONAL)
11975         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11976     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11977     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11978         if (!PL_parser->error_count)
11979             qerror(Perl_mess(aTHX_ "Parse error"));
11980         exprop = newOP(OP_NULL, 0);
11981     }
11982     return exprop;
11983 }
11984
11985 /*
11986 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11987
11988 Parse a Perl arithmetic expression.  This may contain operators of precedence
11989 down to the bit shift operators.  The expression must be followed (and thus
11990 terminated) either by a comparison or lower-precedence operator or by
11991 something that would normally terminate an expression such as semicolon.
11992 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11993 otherwise it is mandatory.  It is up to the caller to ensure that the
11994 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11995 the source of the code to be parsed and the lexical context for the
11996 expression.
11997
11998 The op tree representing the expression is returned.  If an optional
11999 expression is absent, a null pointer is returned, otherwise the pointer
12000 will be non-null.
12001
12002 If an error occurs in parsing or compilation, in most cases a valid op
12003 tree is returned anyway.  The error is reflected in the parser state,
12004 normally resulting in a single exception at the top level of parsing
12005 which covers all the compilation errors that occurred.  Some compilation
12006 errors, however, will throw an exception immediately.
12007
12008 =cut
12009 */
12010
12011 OP *
12012 Perl_parse_arithexpr(pTHX_ U32 flags)
12013 {
12014     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12015 }
12016
12017 /*
12018 =for apidoc Amx|OP *|parse_termexpr|U32 flags
12019
12020 Parse a Perl term expression.  This may contain operators of precedence
12021 down to the assignment operators.  The expression must be followed (and thus
12022 terminated) either by a comma or lower-precedence operator or by
12023 something that would normally terminate an expression such as semicolon.
12024 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12025 otherwise it is mandatory.  It is up to the caller to ensure that the
12026 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12027 the source of the code to be parsed and the lexical context for the
12028 expression.
12029
12030 The op tree representing the expression is returned.  If an optional
12031 expression is absent, a null pointer is returned, otherwise the pointer
12032 will be non-null.
12033
12034 If an error occurs in parsing or compilation, in most cases a valid op
12035 tree is returned anyway.  The error is reflected in the parser state,
12036 normally resulting in a single exception at the top level of parsing
12037 which covers all the compilation errors that occurred.  Some compilation
12038 errors, however, will throw an exception immediately.
12039
12040 =cut
12041 */
12042
12043 OP *
12044 Perl_parse_termexpr(pTHX_ U32 flags)
12045 {
12046     return parse_expr(LEX_FAKEEOF_COMMA, flags);
12047 }
12048
12049 /*
12050 =for apidoc Amx|OP *|parse_listexpr|U32 flags
12051
12052 Parse a Perl list expression.  This may contain operators of precedence
12053 down to the comma operator.  The expression must be followed (and thus
12054 terminated) either by a low-precedence logic operator such as C<or> or by
12055 something that would normally terminate an expression such as semicolon.
12056 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12057 otherwise it is mandatory.  It is up to the caller to ensure that the
12058 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12059 the source of the code to be parsed and the lexical context for the
12060 expression.
12061
12062 The op tree representing the expression is returned.  If an optional
12063 expression is absent, a null pointer is returned, otherwise the pointer
12064 will be non-null.
12065
12066 If an error occurs in parsing or compilation, in most cases a valid op
12067 tree is returned anyway.  The error is reflected in the parser state,
12068 normally resulting in a single exception at the top level of parsing
12069 which covers all the compilation errors that occurred.  Some compilation
12070 errors, however, will throw an exception immediately.
12071
12072 =cut
12073 */
12074
12075 OP *
12076 Perl_parse_listexpr(pTHX_ U32 flags)
12077 {
12078     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12079 }
12080
12081 /*
12082 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
12083
12084 Parse a single complete Perl expression.  This allows the full
12085 expression grammar, including the lowest-precedence operators such
12086 as C<or>.  The expression must be followed (and thus terminated) by a
12087 token that an expression would normally be terminated by: end-of-file,
12088 closing bracketing punctuation, semicolon, or one of the keywords that
12089 signals a postfix expression-statement modifier.  If C<flags> has the
12090 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
12091 mandatory.  It is up to the caller to ensure that the dynamic parser
12092 state (L</PL_parser> et al) is correctly set to reflect the source of
12093 the code to be parsed and the lexical context for the expression.
12094
12095 The op tree representing the expression is returned.  If an optional
12096 expression is absent, a null pointer is returned, otherwise the pointer
12097 will be non-null.
12098
12099 If an error occurs in parsing or compilation, in most cases a valid op
12100 tree is returned anyway.  The error is reflected in the parser state,
12101 normally resulting in a single exception at the top level of parsing
12102 which covers all the compilation errors that occurred.  Some compilation
12103 errors, however, will throw an exception immediately.
12104
12105 =cut
12106 */
12107
12108 OP *
12109 Perl_parse_fullexpr(pTHX_ U32 flags)
12110 {
12111     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12112 }
12113
12114 /*
12115 =for apidoc Amx|OP *|parse_block|U32 flags
12116
12117 Parse a single complete Perl code block.  This consists of an opening
12118 brace, a sequence of statements, and a closing brace.  The block
12119 constitutes a lexical scope, so C<my> variables and various compile-time
12120 effects can be contained within it.  It is up to the caller to ensure
12121 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12122 reflect the source of the code to be parsed and the lexical context for
12123 the statement.
12124
12125 The op tree representing the code block is returned.  This is always a
12126 real op, never a null pointer.  It will normally be a C<lineseq> list,
12127 including C<nextstate> or equivalent ops.  No ops to construct any kind
12128 of runtime scope are included by virtue of it being a block.
12129
12130 If an error occurs in parsing or compilation, in most cases a valid op
12131 tree (most likely null) is returned anyway.  The error is reflected in
12132 the parser state, normally resulting in a single exception at the top
12133 level of parsing which covers all the compilation errors that occurred.
12134 Some compilation errors, however, will throw an exception immediately.
12135
12136 The C<flags> parameter is reserved for future use, and must always
12137 be zero.
12138
12139 =cut
12140 */
12141
12142 OP *
12143 Perl_parse_block(pTHX_ U32 flags)
12144 {
12145     if (flags)
12146         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12147     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12148 }
12149
12150 /*
12151 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12152
12153 Parse a single unadorned Perl statement.  This may be a normal imperative
12154 statement or a declaration that has compile-time effect.  It does not
12155 include any label or other affixture.  It is up to the caller to ensure
12156 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12157 reflect the source of the code to be parsed and the lexical context for
12158 the statement.
12159
12160 The op tree representing the statement is returned.  This may be a
12161 null pointer if the statement is null, for example if it was actually
12162 a subroutine definition (which has compile-time side effects).  If not
12163 null, it will be ops directly implementing the statement, suitable to
12164 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
12165 equivalent op (except for those embedded in a scope contained entirely
12166 within the statement).
12167
12168 If an error occurs in parsing or compilation, in most cases a valid op
12169 tree (most likely null) is returned anyway.  The error is reflected in
12170 the parser state, normally resulting in a single exception at the top
12171 level of parsing which covers all the compilation errors that occurred.
12172 Some compilation errors, however, will throw an exception immediately.
12173
12174 The C<flags> parameter is reserved for future use, and must always
12175 be zero.
12176
12177 =cut
12178 */
12179
12180 OP *
12181 Perl_parse_barestmt(pTHX_ U32 flags)
12182 {
12183     if (flags)
12184         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12185     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12186 }
12187
12188 /*
12189 =for apidoc Amx|SV *|parse_label|U32 flags
12190
12191 Parse a single label, possibly optional, of the type that may prefix a
12192 Perl statement.  It is up to the caller to ensure that the dynamic parser
12193 state (L</PL_parser> et al) is correctly set to reflect the source of
12194 the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
12195 label is optional, otherwise it is mandatory.
12196
12197 The name of the label is returned in the form of a fresh scalar.  If an
12198 optional label is absent, a null pointer is returned.
12199
12200 If an error occurs in parsing, which can only occur if the label is
12201 mandatory, a valid label is returned anyway.  The error is reflected in
12202 the parser state, normally resulting in a single exception at the top
12203 level of parsing which covers all the compilation errors that occurred.
12204
12205 =cut
12206 */
12207
12208 SV *
12209 Perl_parse_label(pTHX_ U32 flags)
12210 {
12211     if (flags & ~PARSE_OPTIONAL)
12212         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12213     if (PL_nexttoke) {
12214         PL_parser->yychar = yylex();
12215         if (PL_parser->yychar == LABEL) {
12216             char * const lpv = pl_yylval.pval;
12217             STRLEN llen = strlen(lpv);
12218             PL_parser->yychar = YYEMPTY;
12219             return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12220         } else {
12221             yyunlex();
12222             goto no_label;
12223         }
12224     } else {
12225         char *s, *t;
12226         STRLEN wlen, bufptr_pos;
12227         lex_read_space(0);
12228         t = s = PL_bufptr;
12229         if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
12230             goto no_label;
12231         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12232         if (word_takes_any_delimiter(s, wlen))
12233             goto no_label;
12234         bufptr_pos = s - SvPVX(PL_linestr);
12235         PL_bufptr = t;
12236         lex_read_space(LEX_KEEP_PREVIOUS);
12237         t = PL_bufptr;
12238         s = SvPVX(PL_linestr) + bufptr_pos;
12239         if (t[0] == ':' && t[1] != ':') {
12240             PL_oldoldbufptr = PL_oldbufptr;
12241             PL_oldbufptr = s;
12242             PL_bufptr = t+1;
12243             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12244         } else {
12245             PL_bufptr = s;
12246             no_label:
12247             if (flags & PARSE_OPTIONAL) {
12248                 return NULL;
12249             } else {
12250                 qerror(Perl_mess(aTHX_ "Parse error"));
12251                 return newSVpvs("x");
12252             }
12253         }
12254     }
12255 }
12256
12257 /*
12258 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12259
12260 Parse a single complete Perl statement.  This may be a normal imperative
12261 statement or a declaration that has compile-time effect, and may include
12262 optional labels.  It is up to the caller to ensure that the dynamic
12263 parser state (L</PL_parser> et al) is correctly set to reflect the source
12264 of the code to be parsed and the lexical context for the statement.
12265
12266 The op tree representing the statement is returned.  This may be a
12267 null pointer if the statement is null, for example if it was actually
12268 a subroutine definition (which has compile-time side effects).  If not
12269 null, it will be the result of a L</newSTATEOP> call, normally including
12270 a C<nextstate> or equivalent op.
12271
12272 If an error occurs in parsing or compilation, in most cases a valid op
12273 tree (most likely null) is returned anyway.  The error is reflected in
12274 the parser state, normally resulting in a single exception at the top
12275 level of parsing which covers all the compilation errors that occurred.
12276 Some compilation errors, however, will throw an exception immediately.
12277
12278 The C<flags> parameter is reserved for future use, and must always
12279 be zero.
12280
12281 =cut
12282 */
12283
12284 OP *
12285 Perl_parse_fullstmt(pTHX_ U32 flags)
12286 {
12287     if (flags)
12288         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12289     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12290 }
12291
12292 /*
12293 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12294
12295 Parse a sequence of zero or more Perl statements.  These may be normal
12296 imperative statements, including optional labels, or declarations
12297 that have compile-time effect, or any mixture thereof.  The statement
12298 sequence ends when a closing brace or end-of-file is encountered in a
12299 place where a new statement could have validly started.  It is up to
12300 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12301 is correctly set to reflect the source of the code to be parsed and the
12302 lexical context for the statements.
12303
12304 The op tree representing the statement sequence is returned.  This may
12305 be a null pointer if the statements were all null, for example if there
12306 were no statements or if there were only subroutine definitions (which
12307 have compile-time side effects).  If not null, it will be a C<lineseq>
12308 list, normally including C<nextstate> or equivalent ops.
12309
12310 If an error occurs in parsing or compilation, in most cases a valid op
12311 tree is returned anyway.  The error is reflected in the parser state,
12312 normally resulting in a single exception at the top level of parsing
12313 which covers all the compilation errors that occurred.  Some compilation
12314 errors, however, will throw an exception immediately.
12315
12316 The C<flags> parameter is reserved for future use, and must always
12317 be zero.
12318
12319 =cut
12320 */
12321
12322 OP *
12323 Perl_parse_stmtseq(pTHX_ U32 flags)
12324 {
12325     OP *stmtseqop;
12326     I32 c;
12327     if (flags)
12328         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12329     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12330     c = lex_peek_unichar(0);
12331     if (c != -1 && c != /*{*/'}')
12332         qerror(Perl_mess(aTHX_ "Parse error"));
12333     return stmtseqop;
12334 }
12335
12336 /*
12337  * ex: set ts=8 sts=4 sw=4 et:
12338  */