toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26 This is the lower layer of the Perl parser, managing characters and tokens.
  27
  28 =for apidoc AmU|yy_parser *|PL_parser
  29
  30 Pointer to a structure encapsulating the state of the parsing operation
  31 currently in progress.  The pointer can be locally changed to perform
  32 a nested parse without interfering with the state of an outer parse.
  33 Individual members of C<PL_parser> have their own documentation.
  34
  35 =cut
  36 */
  37
  38 #include "EXTERN.h"
  39 #define PERL_IN_TOKE_C
  40 #include "perl.h"
  41 #include "dquote_inline.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g)     \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* XXX temporary backwards compatibility */
  49 #define PL_lex_brackets         (PL_parser->lex_brackets)
  50 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  51 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  52 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  53 #define PL_lex_casemods         (PL_parser->lex_casemods)
  54 #define PL_lex_casestack        (PL_parser->lex_casestack)
  55 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  56 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  57 #define PL_lex_inpat            (PL_parser->lex_inpat)
  58 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  59 #define PL_lex_op               (PL_parser->lex_op)
  60 #define PL_lex_repl             (PL_parser->lex_repl)
  61 #define PL_lex_starts           (PL_parser->lex_starts)
  62 #define PL_lex_stuff            (PL_parser->lex_stuff)
  63 #define PL_multi_start          (PL_parser->multi_start)
  64 #define PL_multi_open           (PL_parser->multi_open)
  65 #define PL_multi_close          (PL_parser->multi_close)
  66 #define PL_preambled            (PL_parser->preambled)
  67 #define PL_linestr              (PL_parser->linestr)
  68 #define PL_expect               (PL_parser->expect)
  69 #define PL_copline              (PL_parser->copline)
  70 #define PL_bufptr               (PL_parser->bufptr)
  71 #define PL_oldbufptr            (PL_parser->oldbufptr)
  72 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  73 #define PL_linestart            (PL_parser->linestart)
  74 #define PL_bufend               (PL_parser->bufend)
  75 #define PL_last_uni             (PL_parser->last_uni)
  76 #define PL_last_lop             (PL_parser->last_lop)
  77 #define PL_last_lop_op          (PL_parser->last_lop_op)
  78 #define PL_lex_state            (PL_parser->lex_state)
  79 #define PL_rsfp                 (PL_parser->rsfp)
  80 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  81 #define PL_in_my                (PL_parser->in_my)
  82 #define PL_in_my_stash          (PL_parser->in_my_stash)
  83 #define PL_tokenbuf             (PL_parser->tokenbuf)
  84 #define PL_multi_end            (PL_parser->multi_end)
  85 #define PL_error_count          (PL_parser->error_count)
  86
  87 #  define PL_nexttoke           (PL_parser->nexttoke)
  88 #  define PL_nexttype           (PL_parser->nexttype)
  89 #  define PL_nextval            (PL_parser->nextval)
  90
  91
  92 #define SvEVALED(sv) \
  93     (SvTYPE(sv) >= SVt_PVNV \
  94     && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
  95
  96 static const char* const ident_too_long = "Identifier too long";
  97
  98 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
  99
 100 #define XENUMMASK  0x3f
 101 #define XFAKEEOF   0x40
 102 #define XFAKEBRACK 0x80
 103
 104 #ifdef USE_UTF8_SCRIPTS
 105 #   define UTF cBOOL(!IN_BYTES)
 106 #else
 107 #   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 108 #endif
 109
 110 /* The maximum number of characters preceding the unrecognized one to display */
 111 #define UNRECOGNIZED_PRECEDE_COUNT 10
 112
 113 /* In variables named $^X, these are the legal values for X.
 114  * 1999-02-27 mjd-perl-patch@plover.com */
 115 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 116
 117 #define SPACE_OR_TAB(c) isBLANK_A(c)
 118
 119 #define HEXFP_PEEK(s)     \
 120     (((s[0] == '.') && \
 121       (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
 122      isALPHA_FOLD_EQ(s[0], 'p'))
 123
 124 /* LEX_* are values for PL_lex_state, the state of the lexer.
 125  * They are arranged oddly so that the guard on the switch statement
 126  * can get by with a single comparison (if the compiler is smart enough).
 127  *
 128  * These values refer to the various states within a sublex parse,
 129  * i.e. within a double quotish string
 130  */
 131
 132 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 133
 134 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 135 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 136 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 137 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 138 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 139
 140                                    /* at end of code, eg "$x" followed by:  */
 141 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 142 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 143
 144 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 145                                         string or after \E, $foo, etc       */
 146 #define LEX_INTERPCONST          2 /* NOT USED */
 147 #define LEX_FORMLINE             1 /* expecting a format line               */
 148
 149
 150 #ifdef DEBUGGING
 151 static const char* const lex_state_names[] = {
 152     "KNOWNEXT",
 153     "FORMLINE",
 154     "INTERPCONST",
 155     "INTERPCONCAT",
 156     "INTERPENDMAYBE",
 157     "INTERPEND",
 158     "INTERPSTART",
 159     "INTERPPUSH",
 160     "INTERPCASEMOD",
 161     "INTERPNORMAL",
 162     "NORMAL"
 163 };
 164 #endif
 165
 166 #include "keywords.h"
 167
 168 /* CLINE is a macro that ensures PL_copline has a sane value */
 169
 170 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 171
 172 /*
 173  * Convenience functions to return different tokens and prime the
 174  * lexer for the next token.  They all take an argument.
 175  *
 176  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 177  * OPERATOR     : generic operator
 178  * AOPERATOR    : assignment operator
 179  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 180  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 181  * PREREF       : *EXPR where EXPR is not a simple identifier
 182  * TERM         : expression term
 183  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 184  * LOOPX        : loop exiting command (goto, last, dump, etc)
 185  * FTST         : file test operator
 186  * FUN0         : zero-argument function
 187  * FUN0OP       : zero-argument function, with its op created in this file
 188  * FUN1         : not used, except for not, which isn't a UNIOP
 189  * BOop         : bitwise or or xor
 190  * BAop         : bitwise and
 191  * BCop         : bitwise complement
 192  * SHop         : shift operator
 193  * PWop         : power operator
 194  * PMop         : pattern-matching operator
 195  * Aop          : addition-level operator
 196  * AopNOASSIGN  : addition-level operator that is never part of .=
 197  * Mop          : multiplication-level operator
 198  * Eop          : equality-testing operator
 199  * Rop          : relational operator <= != gt
 200  *
 201  * Also see LOP and lop() below.
 202  */
 203
 204 #ifdef DEBUGGING /* Serve -DT. */
 205 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 206 #else
 207 #   define REPORT(retval) (retval)
 208 #endif
 209
 210 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 211 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 212 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
 213 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 214 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 215 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 216 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 217 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 218 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
 219                          pl_yylval.ival=f, \
 220                          PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
 221                          REPORT((int)LOOPEX))
 222 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 223 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 224 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 225 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 226 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
 227 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
 228 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
 229                        REPORT('~')
 230 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
 231 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
 232 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 233 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
 234 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
 235 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
 236 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 237 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 238
 239 /* This bit of chicanery makes a unary function followed by
 240  * a parenthesis into a function with one argument, highest precedence.
 241  * The UNIDOR macro is for unary functions that can be followed by the //
 242  * operator (such as C<shift // 0>).
 243  */
 244 #define UNI3(f,x,have_x) { \
 245         pl_yylval.ival = f; \
 246         if (have_x) PL_expect = x; \
 247         PL_bufptr = s; \
 248         PL_last_uni = PL_oldbufptr; \
 249         PL_last_lop_op = (f) < 0 ? -(f) : (f); \
 250         if (*s == '(') \
 251             return REPORT( (int)FUNC1 ); \
 252         s = skipspace(s); \
 253         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 254         }
 255 #define UNI(f)    UNI3(f,XTERM,1)
 256 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 257 #define UNIPROTO(f,optional) { \
 258         if (optional) PL_last_uni = PL_oldbufptr; \
 259         OPERATOR(f); \
 260         }
 261
 262 #define UNIBRACK(f) UNI3(f,0,0)
 263
 264 /* grandfather return to old style */
 265 #define OLDLOP(f) \
 266         do { \
 267             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 268                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 269             pl_yylval.ival = (f); \
 270             PL_expect = XTERM; \
 271             PL_bufptr = s; \
 272             return (int)LSTOP; \
 273         } while(0)
 274
 275 #define COPLINE_INC_WITH_HERELINES                  \
 276     STMT_START {                                     \
 277         CopLINE_inc(PL_curcop);                       \
 278         if (PL_parser->herelines)                      \
 279             CopLINE(PL_curcop) += PL_parser->herelines, \
 280             PL_parser->herelines = 0;                    \
 281     } STMT_END
 282 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 283  * is no sublex_push to follow. */
 284 #define COPLINE_SET_FROM_MULTI_END            \
 285     STMT_START {                               \
 286         CopLINE_set(PL_curcop, PL_multi_end);   \
 287         if (PL_multi_end != PL_multi_start)      \
 288             PL_parser->herelines = 0;             \
 289     } STMT_END
 290
 291
 292 #ifdef DEBUGGING
 293
 294 /* how to interpret the pl_yylval associated with the token */
 295 enum token_type {
 296     TOKENTYPE_NONE,
 297     TOKENTYPE_IVAL,
 298     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 299     TOKENTYPE_PVAL,
 300     TOKENTYPE_OPVAL
 301 };
 302
 303 static struct debug_tokens {
 304     const int token;
 305     enum token_type type;
 306     const char *name;
 307 } const debug_tokens[] =
 308 {
 309     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 310     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 311     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 312     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 313     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 314     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 315     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 316     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 317     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 318     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 319     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 320     { DO,               TOKENTYPE_NONE,         "DO" },
 321     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 322     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 323     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 324     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 325     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 326     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 327     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 328     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 329     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 330     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 331     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 332     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 333     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 334     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 335     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 336     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 337     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 338     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 339     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 340     { IF,               TOKENTYPE_IVAL,         "IF" },
 341     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 342     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 343     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 344     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 345     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 346     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 347     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 348     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 349     { MY,               TOKENTYPE_IVAL,         "MY" },
 350     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 351     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 352     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 353     { OROR,             TOKENTYPE_NONE,         "OROR" },
 354     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 355     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 356     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 357     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 358     { POSTJOIN,         TOKENTYPE_NONE,         "POSTJOIN" },
 359     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 360     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 361     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 362     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 363     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 364     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 365     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 366     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 367     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 368     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 369     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 370     { SUB,              TOKENTYPE_NONE,         "SUB" },
 371     { THING,            TOKENTYPE_OPVAL,        "THING" },
 372     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 373     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 374     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 375     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 376     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 377     { USE,              TOKENTYPE_IVAL,         "USE" },
 378     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 379     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 380     { BAREWORD,         TOKENTYPE_OPVAL,        "BAREWORD" },
 381     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 382     { 0,                TOKENTYPE_NONE,         NULL }
 383 };
 384
 385 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 386
 387 STATIC int
 388 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 389 {
 390     PERL_ARGS_ASSERT_TOKEREPORT;
 391
 392     if (DEBUG_T_TEST) {
 393         const char *name = NULL;
 394         enum token_type type = TOKENTYPE_NONE;
 395         const struct debug_tokens *p;
 396         SV* const report = newSVpvs("<== ");
 397
 398         for (p = debug_tokens; p->token; p++) {
 399             if (p->token == (int)rv) {
 400                 name = p->name;
 401                 type = p->type;
 402                 break;
 403             }
 404         }
 405         if (name)
 406             Perl_sv_catpv(aTHX_ report, name);
 407         else if (isGRAPH(rv))
 408         {
 409             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 410             if ((char)rv == 'p')
 411                 sv_catpvs(report, " (pending identifier)");
 412         }
 413         else if (!rv)
 414             sv_catpvs(report, "EOF");
 415         else
 416             Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
 417         switch (type) {
 418         case TOKENTYPE_NONE:
 419             break;
 420         case TOKENTYPE_IVAL:
 421             Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
 422             break;
 423         case TOKENTYPE_OPNUM:
 424             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 425                                     PL_op_name[lvalp->ival]);
 426             break;
 427         case TOKENTYPE_PVAL:
 428             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 429             break;
 430         case TOKENTYPE_OPVAL:
 431             if (lvalp->opval) {
 432                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 433                                     PL_op_name[lvalp->opval->op_type]);
 434                 if (lvalp->opval->op_type == OP_CONST) {
 435                     Perl_sv_catpvf(aTHX_ report, " %s",
 436                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 437                 }
 438
 439             }
 440             else
 441                 sv_catpvs(report, "(opval=null)");
 442             break;
 443         }
 444         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 445     };
 446     return (int)rv;
 447 }
 448
 449
 450 /* print the buffer with suitable escapes */
 451
 452 STATIC void
 453 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 454 {
 455     SV* const tmp = newSVpvs("");
 456
 457     PERL_ARGS_ASSERT_PRINTBUF;
 458
 459     GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
 460     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 461     GCC_DIAG_RESTORE;
 462     SvREFCNT_dec(tmp);
 463 }
 464
 465 #endif
 466
 467 /*
 468  * S_ao
 469  *
 470  * This subroutine looks for an '=' next to the operator that has just been
 471  * parsed and turns it into an ASSIGNOP if it finds one.
 472  */
 473
 474 STATIC int
 475 S_ao(pTHX_ int toketype)
 476 {
 477     if (*PL_bufptr == '=') {
 478         PL_bufptr++;
 479         if (toketype == ANDAND)
 480             pl_yylval.ival = OP_ANDASSIGN;
 481         else if (toketype == OROR)
 482             pl_yylval.ival = OP_ORASSIGN;
 483         else if (toketype == DORDOR)
 484             pl_yylval.ival = OP_DORASSIGN;
 485         toketype = ASSIGNOP;
 486     }
 487     return REPORT(toketype);
 488 }
 489
 490 /*
 491  * S_no_op
 492  * When Perl expects an operator and finds something else, no_op
 493  * prints the warning.  It always prints "<something> found where
 494  * operator expected.  It prints "Missing semicolon on previous line?"
 495  * if the surprise occurs at the start of the line.  "do you need to
 496  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 497  * where the compiler doesn't know if foo is a method call or a function.
 498  * It prints "Missing operator before end of line" if there's nothing
 499  * after the missing operator, or "... before <...>" if there is something
 500  * after the missing operator.
 501  *
 502  * PL_bufptr is expected to point to the start of the thing that was found,
 503  * and s after the next token or partial token.
 504  */
 505
 506 STATIC void
 507 S_no_op(pTHX_ const char *const what, char *s)
 508 {
 509     char * const oldbp = PL_bufptr;
 510     const bool is_first = (PL_oldbufptr == PL_linestart);
 511
 512     PERL_ARGS_ASSERT_NO_OP;
 513
 514     if (!s)
 515         s = oldbp;
 516     else
 517         PL_bufptr = s;
 518     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 519     if (ckWARN_d(WARN_SYNTAX)) {
 520         if (is_first)
 521             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 522                     "\t(Missing semicolon on previous line?)\n");
 523         else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
 524                                                            PL_bufend,
 525                                                            UTF))
 526         {
 527             const char *t;
 528             for (t = PL_oldoldbufptr;
 529                  (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
 530                  t += UTF ? UTF8SKIP(t) : 1)
 531             {
 532                 NOOP;
 533             }
 534             if (t < PL_bufptr && isSPACE(*t))
 535                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 536                         "\t(Do you need to predeclare %" UTF8f "?)\n",
 537                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 538         }
 539         else {
 540             assert(s >= oldbp);
 541             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 542                     "\t(Missing operator before %" UTF8f "?)\n",
 543                      UTF8fARG(UTF, s - oldbp, oldbp));
 544         }
 545     }
 546     PL_bufptr = oldbp;
 547 }
 548
 549 /*
 550  * S_missingterm
 551  * Complain about missing quote/regexp/heredoc terminator.
 552  * If it's called with NULL then it cauterizes the line buffer.
 553  * If we're in a delimited string and the delimiter is a control
 554  * character, it's reformatted into a two-char sequence like ^C.
 555  * This is fatal.
 556  */
 557
 558 STATIC void
 559 S_missingterm(pTHX_ char *s)
 560 {
 561     char tmpbuf[UTF8_MAXBYTES + 1];
 562     char q;
 563     bool uni = FALSE;
 564     SV *sv;
 565     if (s) {
 566         char * const nl = strrchr(s,'\n');
 567         if (nl)
 568             *nl = '\0';
 569         uni = UTF;
 570     }
 571     else if (PL_multi_close < 32) {
 572         *tmpbuf = '^';
 573         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 574         tmpbuf[2] = '\0';
 575         s = tmpbuf;
 576     }
 577     else {
 578         if (LIKELY(PL_multi_close < 256)) {
 579             *tmpbuf = (char)PL_multi_close;
 580             tmpbuf[1] = '\0';
 581         }
 582         else {
 583             uni = TRUE;
 584             *uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close) = 0;
 585         }
 586         s = tmpbuf;
 587     }
 588     q = strchr(s,'"') ? '\'' : '"';
 589     sv = sv_2mortal(newSVpv(s,0));
 590     if (uni)
 591         SvUTF8_on(sv);
 592     Perl_croak(aTHX_ "Can't find string terminator %c%" SVf
 593                      "%c anywhere before EOF",q,SVfARG(sv),q);
 594 }
 595
 596 #include "feature.h"
 597
 598 /*
 599  * Check whether the named feature is enabled.
 600  */
 601 bool
 602 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 603 {
 604     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 605
 606     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 607
 608     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 609
 610     if (namelen > MAX_FEATURE_LEN)
 611         return FALSE;
 612     memcpy(&he_name[8], name, namelen);
 613
 614     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 615                                      REFCOUNTED_HE_EXISTS));
 616 }
 617
 618 /*
 619  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 620  * utf16-to-utf8-reversed.
 621  */
 622
 623 #ifdef PERL_CR_FILTER
 624 static void
 625 strip_return(SV *sv)
 626 {
 627     const char *s = SvPVX_const(sv);
 628     const char * const e = s + SvCUR(sv);
 629
 630     PERL_ARGS_ASSERT_STRIP_RETURN;
 631
 632     /* outer loop optimized to do nothing if there are no CR-LFs */
 633     while (s < e) {
 634         if (*s++ == '\r' && *s == '\n') {
 635             /* hit a CR-LF, need to copy the rest */
 636             char *d = s - 1;
 637             *d++ = *s++;
 638             while (s < e) {
 639                 if (*s == '\r' && s[1] == '\n')
 640                     s++;
 641                 *d++ = *s++;
 642             }
 643             SvCUR(sv) -= s - d;
 644             return;
 645         }
 646     }
 647 }
 648
 649 STATIC I32
 650 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 651 {
 652     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 653     if (count > 0 && !maxlen)
 654         strip_return(sv);
 655     return count;
 656 }
 657 #endif
 658
 659 /*
 660 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 661
 662 Creates and initialises a new lexer/parser state object, supplying
 663 a context in which to lex and parse from a new source of Perl code.
 664 A pointer to the new state object is placed in L</PL_parser>.  An entry
 665 is made on the save stack so that upon unwinding, the new state object
 666 will be destroyed and the former value of L</PL_parser> will be restored.
 667 Nothing else need be done to clean up the parsing context.
 668
 669 The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
 670 non-null, provides a string (in SV form) containing code to be parsed.
 671 A copy of the string is made, so subsequent modification of C<line>
 672 does not affect parsing.  C<rsfp>, if non-null, provides an input stream
 673 from which code will be read to be parsed.  If both are non-null, the
 674 code in C<line> comes first and must consist of complete lines of input,
 675 and C<rsfp> supplies the remainder of the source.
 676
 677 The C<flags> parameter is reserved for future use.  Currently it is only
 678 used by perl internally, so extensions should always pass zero.
 679
 680 =cut
 681 */
 682
 683 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 684    can share filters with the current parser.
 685    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 686    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 687    destruction. This is used to handle the case of defaulting to reading the
 688    script from the standard input because no filename was given on the command
 689    line (without getting confused by situation where STDIN has been closed, so
 690    the script handle is opened on fd 0)  */
 691
 692 void
 693 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 694 {
 695     const char *s = NULL;
 696     yy_parser *parser, *oparser;
 697
 698     if (flags && flags & ~LEX_START_FLAGS)
 699         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 700
 701     /* create and initialise a parser */
 702
 703     Newxz(parser, 1, yy_parser);
 704     parser->old_parser = oparser = PL_parser;
 705     PL_parser = parser;
 706
 707     parser->stack = NULL;
 708     parser->stack_max1 = NULL;
 709     parser->ps = NULL;
 710
 711     /* on scope exit, free this parser and restore any outer one */
 712     SAVEPARSER(parser);
 713     parser->saved_curcop = PL_curcop;
 714
 715     /* initialise lexer state */
 716
 717     parser->nexttoke = 0;
 718     parser->error_count = oparser ? oparser->error_count : 0;
 719     parser->copline = parser->preambling = NOLINE;
 720     parser->lex_state = LEX_NORMAL;
 721     parser->expect = XSTATE;
 722     parser->rsfp = rsfp;
 723     parser->recheck_utf8_validity = FALSE;
 724     parser->rsfp_filters =
 725       !(flags & LEX_START_SAME_FILTER) || !oparser
 726         ? NULL
 727         : MUTABLE_AV(SvREFCNT_inc(
 728             oparser->rsfp_filters
 729              ? oparser->rsfp_filters
 730              : (oparser->rsfp_filters = newAV())
 731           ));
 732
 733     Newx(parser->lex_brackstack, 120, char);
 734     Newx(parser->lex_casestack, 12, char);
 735     *parser->lex_casestack = '\0';
 736     Newxz(parser->lex_shared, 1, LEXSHARED);
 737
 738     if (line) {
 739         STRLEN len;
 740         const U8* first_bad_char_loc;
 741
 742         s = SvPV_const(line, len);
 743
 744         if (   SvUTF8(line)
 745             && UNLIKELY(! is_utf8_string_loc((U8 *) s,
 746                                              SvCUR(line),
 747                                              &first_bad_char_loc)))
 748         {
 749             _force_out_malformed_utf8_message(first_bad_char_loc,
 750                                               (U8 *) s + SvCUR(line),
 751                                               0,
 752                                               1 /* 1 means die */ );
 753             NOT_REACHED; /* NOTREACHED */
 754         }
 755
 756         parser->linestr = flags & LEX_START_COPIED
 757                             ? SvREFCNT_inc_simple_NN(line)
 758                             : newSVpvn_flags(s, len, SvUTF8(line));
 759         if (!rsfp)
 760             sv_catpvs(parser->linestr, "\n;");
 761     } else {
 762         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 763     }
 764
 765     parser->oldoldbufptr =
 766         parser->oldbufptr =
 767         parser->bufptr =
 768         parser->linestart = SvPVX(parser->linestr);
 769     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 770     parser->last_lop = parser->last_uni = NULL;
 771
 772     STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 773                                                         |LEX_DONT_CLOSE_RSFP));
 774     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 775                                                         |LEX_DONT_CLOSE_RSFP));
 776
 777     parser->in_pod = parser->filtered = 0;
 778 }
 779
 780
 781 /* delete a parser object */
 782
 783 void
 784 Perl_parser_free(pTHX_  const yy_parser *parser)
 785 {
 786     PERL_ARGS_ASSERT_PARSER_FREE;
 787
 788     PL_curcop = parser->saved_curcop;
 789     SvREFCNT_dec(parser->linestr);
 790
 791     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 792         PerlIO_clearerr(parser->rsfp);
 793     else if (parser->rsfp && (!parser->old_parser
 794           || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 795         PerlIO_close(parser->rsfp);
 796     SvREFCNT_dec(parser->rsfp_filters);
 797     SvREFCNT_dec(parser->lex_stuff);
 798     SvREFCNT_dec(parser->lex_sub_repl);
 799
 800     Safefree(parser->lex_brackstack);
 801     Safefree(parser->lex_casestack);
 802     Safefree(parser->lex_shared);
 803     PL_parser = parser->old_parser;
 804     Safefree(parser);
 805 }
 806
 807 void
 808 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 809 {
 810     I32 nexttoke = parser->nexttoke;
 811     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 812     while (nexttoke--) {
 813         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 814          && parser->nextval[nexttoke].opval
 815          && parser->nextval[nexttoke].opval->op_slabbed
 816          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 817             op_free(parser->nextval[nexttoke].opval);
 818             parser->nextval[nexttoke].opval = NULL;
 819         }
 820     }
 821 }
 822
 823
 824 /*
 825 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 826
 827 Buffer scalar containing the chunk currently under consideration of the
 828 text currently being lexed.  This is always a plain string scalar (for
 829 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 830 normal scalar means; instead refer to the buffer directly by the pointer
 831 variables described below.
 832
 833 The lexer maintains various C<char*> pointers to things in the
 834 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 835 reallocated, all of these pointers must be updated.  Don't attempt to
 836 do this manually, but rather use L</lex_grow_linestr> if you need to
 837 reallocate the buffer.
 838
 839 The content of the text chunk in the buffer is commonly exactly one
 840 complete line of input, up to and including a newline terminator,
 841 but there are situations where it is otherwise.  The octets of the
 842 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 843 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 844 flag on this scalar, which may disagree with it.
 845
 846 For direct examination of the buffer, the variable
 847 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 848 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 849 of these pointers is usually preferable to examination of the scalar
 850 through normal scalar means.
 851
 852 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 853
 854 Direct pointer to the end of the chunk of text currently being lexed, the
 855 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 856 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
 857 always located at the end of the buffer, and does not count as part of
 858 the buffer's contents.
 859
 860 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 861
 862 Points to the current position of lexing inside the lexer buffer.
 863 Characters around this point may be freely examined, within
 864 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 865 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 866 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 867
 868 Lexing code (whether in the Perl core or not) moves this pointer past
 869 the characters that it consumes.  It is also expected to perform some
 870 bookkeeping whenever a newline character is consumed.  This movement
 871 can be more conveniently performed by the function L</lex_read_to>,
 872 which handles newlines appropriately.
 873
 874 Interpretation of the buffer's octets can be abstracted out by
 875 using the slightly higher-level functions L</lex_peek_unichar> and
 876 L</lex_read_unichar>.
 877
 878 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 879
 880 Points to the start of the current line inside the lexer buffer.
 881 This is useful for indicating at which column an error occurred, and
 882 not much else.  This must be updated by any lexing code that consumes
 883 a newline; the function L</lex_read_to> handles this detail.
 884
 885 =cut
 886 */
 887
 888 /*
 889 =for apidoc Amx|bool|lex_bufutf8
 890
 891 Indicates whether the octets in the lexer buffer
 892 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 893 of Unicode characters.  If not, they should be interpreted as Latin-1
 894 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 895
 896 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 897 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 898 encoding.
 899
 900 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 901 is significant, but not the whole story regarding the input character
 902 encoding.  Normally, when a file is being read, the scalar contains octets
 903 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 904 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 905 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 906 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 907 is in effect.  This logic may change in the future; use this function
 908 instead of implementing the logic yourself.
 909
 910 =cut
 911 */
 912
 913 bool
 914 Perl_lex_bufutf8(pTHX)
 915 {
 916     return UTF;
 917 }
 918
 919 /*
 920 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 921
 922 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 923 at least C<len> octets (including terminating C<NUL>).  Returns a
 924 pointer to the reallocated buffer.  This is necessary before making
 925 any direct modification of the buffer that would increase its length.
 926 L</lex_stuff_pvn> provides a more convenient way to insert text into
 927 the buffer.
 928
 929 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 930 this function updates all of the lexer's variables that point directly
 931 into the buffer.
 932
 933 =cut
 934 */
 935
 936 char *
 937 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 938 {
 939     SV *linestr;
 940     char *buf;
 941     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 942     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 943     bool current;
 944
 945     linestr = PL_parser->linestr;
 946     buf = SvPVX(linestr);
 947     if (len <= SvLEN(linestr))
 948         return buf;
 949
 950     /* Is the lex_shared linestr SV the same as the current linestr SV?
 951      * Only in this case does re_eval_start need adjusting, since it
 952      * points within lex_shared->ls_linestr's buffer */
 953     current = (   !PL_parser->lex_shared->ls_linestr
 954                || linestr == PL_parser->lex_shared->ls_linestr);
 955
 956     bufend_pos = PL_parser->bufend - buf;
 957     bufptr_pos = PL_parser->bufptr - buf;
 958     oldbufptr_pos = PL_parser->oldbufptr - buf;
 959     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 960     linestart_pos = PL_parser->linestart - buf;
 961     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 962     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 963     re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
 964                             PL_parser->lex_shared->re_eval_start - buf : 0;
 965
 966     buf = sv_grow(linestr, len);
 967
 968     PL_parser->bufend = buf + bufend_pos;
 969     PL_parser->bufptr = buf + bufptr_pos;
 970     PL_parser->oldbufptr = buf + oldbufptr_pos;
 971     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 972     PL_parser->linestart = buf + linestart_pos;
 973     if (PL_parser->last_uni)
 974         PL_parser->last_uni = buf + last_uni_pos;
 975     if (PL_parser->last_lop)
 976         PL_parser->last_lop = buf + last_lop_pos;
 977     if (current && PL_parser->lex_shared->re_eval_start)
 978         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 979     return buf;
 980 }
 981
 982 /*
 983 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 984
 985 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 986 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 987 reallocating the buffer if necessary.  This means that lexing code that
 988 runs later will see the characters as if they had appeared in the input.
 989 It is not recommended to do this as part of normal parsing, and most
 990 uses of this facility run the risk of the inserted characters being
 991 interpreted in an unintended manner.
 992
 993 The string to be inserted is represented by C<len> octets starting
 994 at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 995 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
 996 The characters are recoded for the lexer buffer, according to how the
 997 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 998 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
 999 function is more convenient.
1000
1001 =cut
1002 */
1003
1004 void
1005 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1006 {
1007     dVAR;
1008     char *bufptr;
1009     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1010     if (flags & ~(LEX_STUFF_UTF8))
1011         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1012     if (UTF) {
1013         if (flags & LEX_STUFF_UTF8) {
1014             goto plain_copy;
1015         } else {
1016             STRLEN highhalf = 0;    /* Count of variants */
1017             const char *p, *e = pv+len;
1018             for (p = pv; p != e; p++) {
1019                 if (! UTF8_IS_INVARIANT(*p)) {
1020                     highhalf++;
1021                 }
1022             }
1023             if (!highhalf)
1024                 goto plain_copy;
1025             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1026             bufptr = PL_parser->bufptr;
1027             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1028             SvCUR_set(PL_parser->linestr,
1029                 SvCUR(PL_parser->linestr) + len+highhalf);
1030             PL_parser->bufend += len+highhalf;
1031             for (p = pv; p != e; p++) {
1032                 U8 c = (U8)*p;
1033                 if (! UTF8_IS_INVARIANT(c)) {
1034                     *bufptr++ = UTF8_TWO_BYTE_HI(c);
1035                     *bufptr++ = UTF8_TWO_BYTE_LO(c);
1036                 } else {
1037                     *bufptr++ = (char)c;
1038                 }
1039             }
1040         }
1041     } else {
1042         if (flags & LEX_STUFF_UTF8) {
1043             STRLEN highhalf = 0;
1044             const char *p, *e = pv+len;
1045             for (p = pv; p != e; p++) {
1046                 U8 c = (U8)*p;
1047                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1048                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1049                                 "non-Latin-1 character into Latin-1 input");
1050                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1051                     p++;
1052                     highhalf++;
1053                 } else assert(UTF8_IS_INVARIANT(c));
1054             }
1055             if (!highhalf)
1056                 goto plain_copy;
1057             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1058             bufptr = PL_parser->bufptr;
1059             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1060             SvCUR_set(PL_parser->linestr,
1061                 SvCUR(PL_parser->linestr) + len-highhalf);
1062             PL_parser->bufend += len-highhalf;
1063             p = pv;
1064             while (p < e) {
1065                 if (UTF8_IS_INVARIANT(*p)) {
1066                     *bufptr++ = *p;
1067                     p++;
1068                 }
1069                 else {
1070                     assert(p < e -1 );
1071                     *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1072                     p += 2;
1073                 }
1074             }
1075         } else {
1076           plain_copy:
1077             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1078             bufptr = PL_parser->bufptr;
1079             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1080             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1081             PL_parser->bufend += len;
1082             Copy(pv, bufptr, len, char);
1083         }
1084     }
1085 }
1086
1087 /*
1088 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1089
1090 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1091 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1092 reallocating the buffer if necessary.  This means that lexing code that
1093 runs later will see the characters as if they had appeared in the input.
1094 It is not recommended to do this as part of normal parsing, and most
1095 uses of this facility run the risk of the inserted characters being
1096 interpreted in an unintended manner.
1097
1098 The string to be inserted is represented by octets starting at C<pv>
1099 and continuing to the first nul.  These octets are interpreted as either
1100 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1101 in C<flags>.  The characters are recoded for the lexer buffer, according
1102 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1103 If it is not convenient to nul-terminate a string to be inserted, the
1104 L</lex_stuff_pvn> function is more appropriate.
1105
1106 =cut
1107 */
1108
1109 void
1110 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1111 {
1112     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1113     lex_stuff_pvn(pv, strlen(pv), flags);
1114 }
1115
1116 /*
1117 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1118
1119 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1120 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1121 reallocating the buffer if necessary.  This means that lexing code that
1122 runs later will see the characters as if they had appeared in the input.
1123 It is not recommended to do this as part of normal parsing, and most
1124 uses of this facility run the risk of the inserted characters being
1125 interpreted in an unintended manner.
1126
1127 The string to be inserted is the string value of C<sv>.  The characters
1128 are recoded for the lexer buffer, according to how the buffer is currently
1129 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1130 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1131 need to construct a scalar.
1132
1133 =cut
1134 */
1135
1136 void
1137 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1138 {
1139     char *pv;
1140     STRLEN len;
1141     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1142     if (flags)
1143         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1144     pv = SvPV(sv, len);
1145     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1146 }
1147
1148 /*
1149 =for apidoc Amx|void|lex_unstuff|char *ptr
1150
1151 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1152 C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
1153 This hides the discarded text from any lexing code that runs later,
1154 as if the text had never appeared.
1155
1156 This is not the normal way to consume lexed text.  For that, use
1157 L</lex_read_to>.
1158
1159 =cut
1160 */
1161
1162 void
1163 Perl_lex_unstuff(pTHX_ char *ptr)
1164 {
1165     char *buf, *bufend;
1166     STRLEN unstuff_len;
1167     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1168     buf = PL_parser->bufptr;
1169     if (ptr < buf)
1170         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1171     if (ptr == buf)
1172         return;
1173     bufend = PL_parser->bufend;
1174     if (ptr > bufend)
1175         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1176     unstuff_len = ptr - buf;
1177     Move(ptr, buf, bufend+1-ptr, char);
1178     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1179     PL_parser->bufend = bufend - unstuff_len;
1180 }
1181
1182 /*
1183 =for apidoc Amx|void|lex_read_to|char *ptr
1184
1185 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1186 to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1187 performing the correct bookkeeping whenever a newline character is passed.
1188 This is the normal way to consume lexed text.
1189
1190 Interpretation of the buffer's octets can be abstracted out by
1191 using the slightly higher-level functions L</lex_peek_unichar> and
1192 L</lex_read_unichar>.
1193
1194 =cut
1195 */
1196
1197 void
1198 Perl_lex_read_to(pTHX_ char *ptr)
1199 {
1200     char *s;
1201     PERL_ARGS_ASSERT_LEX_READ_TO;
1202     s = PL_parser->bufptr;
1203     if (ptr < s || ptr > PL_parser->bufend)
1204         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1205     for (; s != ptr; s++)
1206         if (*s == '\n') {
1207             COPLINE_INC_WITH_HERELINES;
1208             PL_parser->linestart = s+1;
1209         }
1210     PL_parser->bufptr = ptr;
1211 }
1212
1213 /*
1214 =for apidoc Amx|void|lex_discard_to|char *ptr
1215
1216 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1217 up to C<ptr>.  The remaining content of the buffer will be moved, and
1218 all pointers into the buffer updated appropriately.  C<ptr> must not
1219 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1220 it is not permitted to discard text that has yet to be lexed.
1221
1222 Normally it is not necessarily to do this directly, because it suffices to
1223 use the implicit discarding behaviour of L</lex_next_chunk> and things
1224 based on it.  However, if a token stretches across multiple lines,
1225 and the lexing code has kept multiple lines of text in the buffer for
1226 that purpose, then after completion of the token it would be wise to
1227 explicitly discard the now-unneeded earlier lines, to avoid future
1228 multi-line tokens growing the buffer without bound.
1229
1230 =cut
1231 */
1232
1233 void
1234 Perl_lex_discard_to(pTHX_ char *ptr)
1235 {
1236     char *buf;
1237     STRLEN discard_len;
1238     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1239     buf = SvPVX(PL_parser->linestr);
1240     if (ptr < buf)
1241         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1242     if (ptr == buf)
1243         return;
1244     if (ptr > PL_parser->bufptr)
1245         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1246     discard_len = ptr - buf;
1247     if (PL_parser->oldbufptr < ptr)
1248         PL_parser->oldbufptr = ptr;
1249     if (PL_parser->oldoldbufptr < ptr)
1250         PL_parser->oldoldbufptr = ptr;
1251     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1252         PL_parser->last_uni = NULL;
1253     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1254         PL_parser->last_lop = NULL;
1255     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1256     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1257     PL_parser->bufend -= discard_len;
1258     PL_parser->bufptr -= discard_len;
1259     PL_parser->oldbufptr -= discard_len;
1260     PL_parser->oldoldbufptr -= discard_len;
1261     if (PL_parser->last_uni)
1262         PL_parser->last_uni -= discard_len;
1263     if (PL_parser->last_lop)
1264         PL_parser->last_lop -= discard_len;
1265 }
1266
1267 void
1268 Perl_notify_parser_that_changed_to_utf8(pTHX)
1269 {
1270     /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1271      * off to on.  At compile time, this has the effect of entering a 'use
1272      * utf8' section.  This means that any input was not previously checked for
1273      * UTF-8 (because it was off), but now we do need to check it, or our
1274      * assumptions about the input being sane could be wrong, and we could
1275      * segfault.  This routine just sets a flag so that the next time we look
1276      * at the input we do the well-formed UTF-8 check.  If we aren't in the
1277      * proper phase, there may not be a parser object, but if there is, setting
1278      * the flag is harmless */
1279
1280     if (PL_parser) {
1281         PL_parser->recheck_utf8_validity = TRUE;
1282     }
1283 }
1284
1285 /*
1286 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1287
1288 Reads in the next chunk of text to be lexed, appending it to
1289 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1290 looked to the end of the current chunk and wants to know more.  It is
1291 usual, but not necessary, for lexing to have consumed the entirety of
1292 the current chunk at this time.
1293
1294 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1295 chunk (i.e., the current chunk has been entirely consumed), normally the
1296 current chunk will be discarded at the same time that the new chunk is
1297 read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1298 will not be discarded.  If the current chunk has not been entirely
1299 consumed, then it will not be discarded regardless of the flag.
1300
1301 Returns true if some new text was added to the buffer, or false if the
1302 buffer has reached the end of the input text.
1303
1304 =cut
1305 */
1306
1307 #define LEX_FAKE_EOF 0x80000000
1308 #define LEX_NO_TERM  0x40000000 /* here-doc */
1309
1310 bool
1311 Perl_lex_next_chunk(pTHX_ U32 flags)
1312 {
1313     SV *linestr;
1314     char *buf;
1315     STRLEN old_bufend_pos, new_bufend_pos;
1316     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1317     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1318     bool got_some_for_debugger = 0;
1319     bool got_some;
1320
1321     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1322         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1323     if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1324         return FALSE;
1325     linestr = PL_parser->linestr;
1326     buf = SvPVX(linestr);
1327     if (!(flags & LEX_KEEP_PREVIOUS)
1328           && PL_parser->bufptr == PL_parser->bufend)
1329     {
1330         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1331         linestart_pos = 0;
1332         if (PL_parser->last_uni != PL_parser->bufend)
1333             PL_parser->last_uni = NULL;
1334         if (PL_parser->last_lop != PL_parser->bufend)
1335             PL_parser->last_lop = NULL;
1336         last_uni_pos = last_lop_pos = 0;
1337         *buf = 0;
1338         SvCUR(linestr) = 0;
1339     } else {
1340         old_bufend_pos = PL_parser->bufend - buf;
1341         bufptr_pos = PL_parser->bufptr - buf;
1342         oldbufptr_pos = PL_parser->oldbufptr - buf;
1343         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1344         linestart_pos = PL_parser->linestart - buf;
1345         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1346         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1347     }
1348     if (flags & LEX_FAKE_EOF) {
1349         goto eof;
1350     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1351         got_some = 0;
1352     } else if (filter_gets(linestr, old_bufend_pos)) {
1353         got_some = 1;
1354         got_some_for_debugger = 1;
1355     } else if (flags & LEX_NO_TERM) {
1356         got_some = 0;
1357     } else {
1358         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1359             SvPVCLEAR(linestr);
1360         eof:
1361         /* End of real input.  Close filehandle (unless it was STDIN),
1362          * then add implicit termination.
1363          */
1364         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1365             PerlIO_clearerr(PL_parser->rsfp);
1366         else if (PL_parser->rsfp)
1367             (void)PerlIO_close(PL_parser->rsfp);
1368         PL_parser->rsfp = NULL;
1369         PL_parser->in_pod = PL_parser->filtered = 0;
1370         if (!PL_in_eval && PL_minus_p) {
1371             sv_catpvs(linestr,
1372                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1373             PL_minus_n = PL_minus_p = 0;
1374         } else if (!PL_in_eval && PL_minus_n) {
1375             sv_catpvs(linestr, /*{*/";}");
1376             PL_minus_n = 0;
1377         } else
1378             sv_catpvs(linestr, ";");
1379         got_some = 1;
1380     }
1381     buf = SvPVX(linestr);
1382     new_bufend_pos = SvCUR(linestr);
1383     PL_parser->bufend = buf + new_bufend_pos;
1384     PL_parser->bufptr = buf + bufptr_pos;
1385
1386     if (UTF) {
1387         const U8* first_bad_char_loc;
1388         if (UNLIKELY(! is_utf8_string_loc(
1389                             (U8 *) PL_parser->bufptr,
1390                                    PL_parser->bufend - PL_parser->bufptr,
1391                                    &first_bad_char_loc)))
1392         {
1393             _force_out_malformed_utf8_message(first_bad_char_loc,
1394                                               (U8 *) PL_parser->bufend,
1395                                               0,
1396                                               1 /* 1 means die */ );
1397             NOT_REACHED; /* NOTREACHED */
1398         }
1399     }
1400
1401     PL_parser->oldbufptr = buf + oldbufptr_pos;
1402     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1403     PL_parser->linestart = buf + linestart_pos;
1404     if (PL_parser->last_uni)
1405         PL_parser->last_uni = buf + last_uni_pos;
1406     if (PL_parser->last_lop)
1407         PL_parser->last_lop = buf + last_lop_pos;
1408     if (PL_parser->preambling != NOLINE) {
1409         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1410         PL_parser->preambling = NOLINE;
1411     }
1412     if (   got_some_for_debugger
1413         && PERLDB_LINE_OR_SAVESRC
1414         && PL_curstash != PL_debstash)
1415     {
1416         /* debugger active and we're not compiling the debugger code,
1417          * so store the line into the debugger's array of lines
1418          */
1419         update_debugger_info(NULL, buf+old_bufend_pos,
1420             new_bufend_pos-old_bufend_pos);
1421     }
1422     return got_some;
1423 }
1424
1425 /*
1426 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1427
1428 Looks ahead one (Unicode) character in the text currently being lexed.
1429 Returns the codepoint (unsigned integer value) of the next character,
1430 or -1 if lexing has reached the end of the input text.  To consume the
1431 peeked character, use L</lex_read_unichar>.
1432
1433 If the next character is in (or extends into) the next chunk of input
1434 text, the next chunk will be read in.  Normally the current chunk will be
1435 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1436 bit set, then the current chunk will not be discarded.
1437
1438 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1439 is encountered, an exception is generated.
1440
1441 =cut
1442 */
1443
1444 I32
1445 Perl_lex_peek_unichar(pTHX_ U32 flags)
1446 {
1447     dVAR;
1448     char *s, *bufend;
1449     if (flags & ~(LEX_KEEP_PREVIOUS))
1450         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1451     s = PL_parser->bufptr;
1452     bufend = PL_parser->bufend;
1453     if (UTF) {
1454         U8 head;
1455         I32 unichar;
1456         STRLEN len, retlen;
1457         if (s == bufend) {
1458             if (!lex_next_chunk(flags))
1459                 return -1;
1460             s = PL_parser->bufptr;
1461             bufend = PL_parser->bufend;
1462         }
1463         head = (U8)*s;
1464         if (UTF8_IS_INVARIANT(head))
1465             return head;
1466         if (UTF8_IS_START(head)) {
1467             len = UTF8SKIP(&head);
1468             while ((STRLEN)(bufend-s) < len) {
1469                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1470                     break;
1471                 s = PL_parser->bufptr;
1472                 bufend = PL_parser->bufend;
1473             }
1474         }
1475         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1476         if (retlen == (STRLEN)-1) {
1477             _force_out_malformed_utf8_message((U8 *) s,
1478                                               (U8 *) bufend,
1479                                               0,
1480                                               1 /* 1 means die */ );
1481             NOT_REACHED; /* NOTREACHED */
1482         }
1483         return unichar;
1484     } else {
1485         if (s == bufend) {
1486             if (!lex_next_chunk(flags))
1487                 return -1;
1488             s = PL_parser->bufptr;
1489         }
1490         return (U8)*s;
1491     }
1492 }
1493
1494 /*
1495 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1496
1497 Reads the next (Unicode) character in the text currently being lexed.
1498 Returns the codepoint (unsigned integer value) of the character read,
1499 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1500 if lexing has reached the end of the input text.  To non-destructively
1501 examine the next character, use L</lex_peek_unichar> instead.
1502
1503 If the next character is in (or extends into) the next chunk of input
1504 text, the next chunk will be read in.  Normally the current chunk will be
1505 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1506 bit set, then the current chunk will not be discarded.
1507
1508 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1509 is encountered, an exception is generated.
1510
1511 =cut
1512 */
1513
1514 I32
1515 Perl_lex_read_unichar(pTHX_ U32 flags)
1516 {
1517     I32 c;
1518     if (flags & ~(LEX_KEEP_PREVIOUS))
1519         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1520     c = lex_peek_unichar(flags);
1521     if (c != -1) {
1522         if (c == '\n')
1523             COPLINE_INC_WITH_HERELINES;
1524         if (UTF)
1525             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1526         else
1527             ++(PL_parser->bufptr);
1528     }
1529     return c;
1530 }
1531
1532 /*
1533 =for apidoc Amx|void|lex_read_space|U32 flags
1534
1535 Reads optional spaces, in Perl style, in the text currently being
1536 lexed.  The spaces may include ordinary whitespace characters and
1537 Perl-style comments.  C<#line> directives are processed if encountered.
1538 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1539 at a non-space character (or the end of the input text).
1540
1541 If spaces extend into the next chunk of input text, the next chunk will
1542 be read in.  Normally the current chunk will be discarded at the same
1543 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1544 chunk will not be discarded.
1545
1546 =cut
1547 */
1548
1549 #define LEX_NO_INCLINE    0x40000000
1550 #define LEX_NO_NEXT_CHUNK 0x80000000
1551
1552 void
1553 Perl_lex_read_space(pTHX_ U32 flags)
1554 {
1555     char *s, *bufend;
1556     const bool can_incline = !(flags & LEX_NO_INCLINE);
1557     bool need_incline = 0;
1558     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1559         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1560     s = PL_parser->bufptr;
1561     bufend = PL_parser->bufend;
1562     while (1) {
1563         char c = *s;
1564         if (c == '#') {
1565             do {
1566                 c = *++s;
1567             } while (!(c == '\n' || (c == 0 && s == bufend)));
1568         } else if (c == '\n') {
1569             s++;
1570             if (can_incline) {
1571                 PL_parser->linestart = s;
1572                 if (s == bufend)
1573                     need_incline = 1;
1574                 else
1575                     incline(s);
1576             }
1577         } else if (isSPACE(c)) {
1578             s++;
1579         } else if (c == 0 && s == bufend) {
1580             bool got_more;
1581             line_t l;
1582             if (flags & LEX_NO_NEXT_CHUNK)
1583                 break;
1584             PL_parser->bufptr = s;
1585             l = CopLINE(PL_curcop);
1586             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1587             got_more = lex_next_chunk(flags);
1588             CopLINE_set(PL_curcop, l);
1589             s = PL_parser->bufptr;
1590             bufend = PL_parser->bufend;
1591             if (!got_more)
1592                 break;
1593             if (can_incline && need_incline && PL_parser->rsfp) {
1594                 incline(s);
1595                 need_incline = 0;
1596             }
1597         } else if (!c) {
1598             s++;
1599         } else {
1600             break;
1601         }
1602     }
1603     PL_parser->bufptr = s;
1604 }
1605
1606 /*
1607
1608 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1609
1610 This function performs syntax checking on a prototype, C<proto>.
1611 If C<warn> is true, any illegal characters or mismatched brackets
1612 will trigger illegalproto warnings, declaring that they were
1613 detected in the prototype for C<name>.
1614
1615 The return value is C<true> if this is a valid prototype, and
1616 C<false> if it is not, regardless of whether C<warn> was C<true> or
1617 C<false>.
1618
1619 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1620
1621 =cut
1622
1623  */
1624
1625 bool
1626 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1627 {
1628     STRLEN len, origlen;
1629     char *p;
1630     bool bad_proto = FALSE;
1631     bool in_brackets = FALSE;
1632     bool after_slash = FALSE;
1633     char greedy_proto = ' ';
1634     bool proto_after_greedy_proto = FALSE;
1635     bool must_be_last = FALSE;
1636     bool underscore = FALSE;
1637     bool bad_proto_after_underscore = FALSE;
1638
1639     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1640
1641     if (!proto)
1642         return TRUE;
1643
1644     p = SvPV(proto, len);
1645     origlen = len;
1646     for (; len--; p++) {
1647         if (!isSPACE(*p)) {
1648             if (must_be_last)
1649                 proto_after_greedy_proto = TRUE;
1650             if (underscore) {
1651                 if (!strchr(";@%", *p))
1652                     bad_proto_after_underscore = TRUE;
1653                 underscore = FALSE;
1654             }
1655             if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1656                 bad_proto = TRUE;
1657             }
1658             else {
1659                 if (*p == '[')
1660                     in_brackets = TRUE;
1661                 else if (*p == ']')
1662                     in_brackets = FALSE;
1663                 else if ((*p == '@' || *p == '%')
1664                          && !after_slash
1665                          && !in_brackets )
1666                 {
1667                     must_be_last = TRUE;
1668                     greedy_proto = *p;
1669                 }
1670                 else if (*p == '_')
1671                     underscore = TRUE;
1672             }
1673             if (*p == '\\')
1674                 after_slash = TRUE;
1675             else
1676                 after_slash = FALSE;
1677         }
1678     }
1679
1680     if (warn) {
1681         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1682         p -= origlen;
1683         p = SvUTF8(proto)
1684             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1685                              origlen, UNI_DISPLAY_ISPRINT)
1686             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1687
1688         if (proto_after_greedy_proto)
1689             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1690                         "Prototype after '%c' for %" SVf " : %s",
1691                         greedy_proto, SVfARG(name), p);
1692         if (in_brackets)
1693             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1694                         "Missing ']' in prototype for %" SVf " : %s",
1695                         SVfARG(name), p);
1696         if (bad_proto)
1697             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1698                         "Illegal character in prototype for %" SVf " : %s",
1699                         SVfARG(name), p);
1700         if (bad_proto_after_underscore)
1701             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1702                         "Illegal character after '_' in prototype for %" SVf " : %s",
1703                         SVfARG(name), p);
1704     }
1705
1706     return (! (proto_after_greedy_proto || bad_proto) );
1707 }
1708
1709 /*
1710  * S_incline
1711  * This subroutine has nothing to do with tilting, whether at windmills
1712  * or pinball tables.  Its name is short for "increment line".  It
1713  * increments the current line number in CopLINE(PL_curcop) and checks
1714  * to see whether the line starts with a comment of the form
1715  *    # line 500 "foo.pm"
1716  * If so, it sets the current line number and file to the values in the comment.
1717  */
1718
1719 STATIC void
1720 S_incline(pTHX_ const char *s)
1721 {
1722     const char *t;
1723     const char *n;
1724     const char *e;
1725     line_t line_num;
1726     UV uv;
1727
1728     PERL_ARGS_ASSERT_INCLINE;
1729
1730     COPLINE_INC_WITH_HERELINES;
1731     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1732      && s+1 == PL_bufend && *s == ';') {
1733         /* fake newline in string eval */
1734         CopLINE_dec(PL_curcop);
1735         return;
1736     }
1737     if (*s++ != '#')
1738         return;
1739     while (SPACE_OR_TAB(*s))
1740         s++;
1741     if (strEQs(s, "line"))
1742         s += 4;
1743     else
1744         return;
1745     if (SPACE_OR_TAB(*s))
1746         s++;
1747     else
1748         return;
1749     while (SPACE_OR_TAB(*s))
1750         s++;
1751     if (!isDIGIT(*s))
1752         return;
1753
1754     n = s;
1755     while (isDIGIT(*s))
1756         s++;
1757     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1758         return;
1759     while (SPACE_OR_TAB(*s))
1760         s++;
1761     if (*s == '"' && (t = strchr(s+1, '"'))) {
1762         s++;
1763         e = t + 1;
1764     }
1765     else {
1766         t = s;
1767         while (*t && !isSPACE(*t))
1768             t++;
1769         e = t;
1770     }
1771     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1772         e++;
1773     if (*e != '\n' && *e != '\0')
1774         return;         /* false alarm */
1775
1776     if (!grok_atoUV(n, &uv, &e))
1777         return;
1778     line_num = ((line_t)uv) - 1;
1779
1780     if (t - s > 0) {
1781         const STRLEN len = t - s;
1782
1783         if (!PL_rsfp && !PL_parser->filtered) {
1784             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1785              * to *{"::_<newfilename"} */
1786             /* However, the long form of evals is only turned on by the
1787                debugger - usually they're "(eval %lu)" */
1788             GV * const cfgv = CopFILEGV(PL_curcop);
1789             if (cfgv) {
1790                 char smallbuf[128];
1791                 STRLEN tmplen2 = len;
1792                 char *tmpbuf2;
1793                 GV *gv2;
1794
1795                 if (tmplen2 + 2 <= sizeof smallbuf)
1796                     tmpbuf2 = smallbuf;
1797                 else
1798                     Newx(tmpbuf2, tmplen2 + 2, char);
1799
1800                 tmpbuf2[0] = '_';
1801                 tmpbuf2[1] = '<';
1802
1803                 memcpy(tmpbuf2 + 2, s, tmplen2);
1804                 tmplen2 += 2;
1805
1806                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1807                 if (!isGV(gv2)) {
1808                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1809                     /* adjust ${"::_<newfilename"} to store the new file name */
1810                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1811                     /* The line number may differ. If that is the case,
1812                        alias the saved lines that are in the array.
1813                        Otherwise alias the whole array. */
1814                     if (CopLINE(PL_curcop) == line_num) {
1815                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1816                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1817                     }
1818                     else if (GvAV(cfgv)) {
1819                         AV * const av = GvAV(cfgv);
1820                         const I32 start = CopLINE(PL_curcop)+1;
1821                         I32 items = AvFILLp(av) - start;
1822                         if (items > 0) {
1823                             AV * const av2 = GvAVn(gv2);
1824                             SV **svp = AvARRAY(av) + start;
1825                             I32 l = (I32)line_num+1;
1826                             while (items--)
1827                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1828                         }
1829                     }
1830                 }
1831
1832                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1833             }
1834         }
1835         CopFILE_free(PL_curcop);
1836         CopFILE_setn(PL_curcop, s, len);
1837     }
1838     CopLINE_set(PL_curcop, line_num);
1839 }
1840
1841 STATIC void
1842 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1843 {
1844     AV *av = CopFILEAVx(PL_curcop);
1845     if (av) {
1846         SV * sv;
1847         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1848         else {
1849             sv = *av_fetch(av, 0, 1);
1850             SvUPGRADE(sv, SVt_PVMG);
1851         }
1852         if (!SvPOK(sv)) SvPVCLEAR(sv);
1853         if (orig_sv)
1854             sv_catsv(sv, orig_sv);
1855         else
1856             sv_catpvn(sv, buf, len);
1857         if (!SvIOK(sv)) {
1858             (void)SvIOK_on(sv);
1859             SvIV_set(sv, 0);
1860         }
1861         if (PL_parser->preambling == NOLINE)
1862             av_store(av, CopLINE(PL_curcop), sv);
1863     }
1864 }
1865
1866 /*
1867  * skipspace
1868  * Called to gobble the appropriate amount and type of whitespace.
1869  * Skips comments as well.
1870  * Returns the next character after the whitespace that is skipped.
1871  *
1872  * peekspace
1873  * Same thing, but look ahead without incrementing line numbers or
1874  * adjusting PL_linestart.
1875  */
1876
1877 #define skipspace(s) skipspace_flags(s, 0)
1878 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1879
1880 STATIC char *
1881 S_skipspace_flags(pTHX_ char *s, U32 flags)
1882 {
1883     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1884     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1885         while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1886             s++;
1887     } else {
1888         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1889         PL_bufptr = s;
1890         lex_read_space(flags | LEX_KEEP_PREVIOUS |
1891                 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1892                     LEX_NO_NEXT_CHUNK : 0));
1893         s = PL_bufptr;
1894         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1895         if (PL_linestart > PL_bufptr)
1896             PL_bufptr = PL_linestart;
1897         return s;
1898     }
1899     return s;
1900 }
1901
1902 /*
1903  * S_check_uni
1904  * Check the unary operators to ensure there's no ambiguity in how they're
1905  * used.  An ambiguous piece of code would be:
1906  *     rand + 5
1907  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1908  * the +5 is its argument.
1909  */
1910
1911 STATIC void
1912 S_check_uni(pTHX)
1913 {
1914     const char *s;
1915     const char *t;
1916
1917     if (PL_oldoldbufptr != PL_last_uni)
1918         return;
1919     while (isSPACE(*PL_last_uni))
1920         PL_last_uni++;
1921     s = PL_last_uni;
1922     while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
1923         s += UTF ? UTF8SKIP(s) : 1;
1924     if ((t = strchr(s, '(')) && t < PL_bufptr)
1925         return;
1926
1927     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1928                      "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
1929                      UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1930 }
1931
1932 /*
1933  * LOP : macro to build a list operator.  Its behaviour has been replaced
1934  * with a subroutine, S_lop() for which LOP is just another name.
1935  */
1936
1937 #define LOP(f,x) return lop(f,x,s)
1938
1939 /*
1940  * S_lop
1941  * Build a list operator (or something that might be one).  The rules:
1942  *  - if we have a next token, then it's a list operator (no parens) for
1943  *    which the next token has already been parsed; e.g.,
1944  *       sort foo @args
1945  *       sort foo (@args)
1946  *  - if the next thing is an opening paren, then it's a function
1947  *  - else it's a list operator
1948  */
1949
1950 STATIC I32
1951 S_lop(pTHX_ I32 f, U8 x, char *s)
1952 {
1953     PERL_ARGS_ASSERT_LOP;
1954
1955     pl_yylval.ival = f;
1956     CLINE;
1957     PL_bufptr = s;
1958     PL_last_lop = PL_oldbufptr;
1959     PL_last_lop_op = (OPCODE)f;
1960     if (PL_nexttoke)
1961         goto lstop;
1962     PL_expect = x;
1963     if (*s == '(')
1964         return REPORT(FUNC);
1965     s = skipspace(s);
1966     if (*s == '(')
1967         return REPORT(FUNC);
1968     else {
1969         lstop:
1970         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1971             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1972         return REPORT(LSTOP);
1973     }
1974 }
1975
1976 /*
1977  * S_force_next
1978  * When the lexer realizes it knows the next token (for instance,
1979  * it is reordering tokens for the parser) then it can call S_force_next
1980  * to know what token to return the next time the lexer is called.  Caller
1981  * will need to set PL_nextval[] and possibly PL_expect to ensure
1982  * the lexer handles the token correctly.
1983  */
1984
1985 STATIC void
1986 S_force_next(pTHX_ I32 type)
1987 {
1988 #ifdef DEBUGGING
1989     if (DEBUG_T_TEST) {
1990         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1991         tokereport(type, &NEXTVAL_NEXTTOKE);
1992     }
1993 #endif
1994     assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
1995     PL_nexttype[PL_nexttoke] = type;
1996     PL_nexttoke++;
1997 }
1998
1999 /*
2000  * S_postderef
2001  *
2002  * This subroutine handles postfix deref syntax after the arrow has already
2003  * been emitted.  @* $* etc. are emitted as two separate token right here.
2004  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2005  * only the first, leaving yylex to find the next.
2006  */
2007
2008 static int
2009 S_postderef(pTHX_ int const funny, char const next)
2010 {
2011     assert(funny == DOLSHARP || strchr("$@%&*", funny));
2012     if (next == '*') {
2013         PL_expect = XOPERATOR;
2014         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2015             assert('@' == funny || '$' == funny || DOLSHARP == funny);
2016             PL_lex_state = LEX_INTERPEND;
2017             if ('@' == funny)
2018                 force_next(POSTJOIN);
2019         }
2020         force_next(next);
2021         PL_bufptr+=2;
2022     }
2023     else {
2024         if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2025          && !PL_lex_brackets)
2026             PL_lex_dojoin = 2;
2027         PL_expect = XOPERATOR;
2028         PL_bufptr++;
2029     }
2030     return funny;
2031 }
2032
2033 void
2034 Perl_yyunlex(pTHX)
2035 {
2036     int yyc = PL_parser->yychar;
2037     if (yyc != YYEMPTY) {
2038         if (yyc) {
2039             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2040             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2041                 PL_lex_allbrackets--;
2042                 PL_lex_brackets--;
2043                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2044             } else if (yyc == '('/*)*/) {
2045                 PL_lex_allbrackets--;
2046                 yyc |= (2<<24);
2047             }
2048             force_next(yyc);
2049         }
2050         PL_parser->yychar = YYEMPTY;
2051     }
2052 }
2053
2054 STATIC SV *
2055 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2056 {
2057     SV * const sv = newSVpvn_utf8(start, len,
2058                           !IN_BYTES
2059                           && UTF
2060                           && !is_utf8_invariant_string((const U8*)start, len)
2061                           && is_utf8_string((const U8*)start, len));
2062     return sv;
2063 }
2064
2065 /*
2066  * S_force_word
2067  * When the lexer knows the next thing is a word (for instance, it has
2068  * just seen -> and it knows that the next char is a word char, then
2069  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2070  * lookahead.
2071  *
2072  * Arguments:
2073  *   char *start : buffer position (must be within PL_linestr)
2074  *   int token   : PL_next* will be this type of bare word
2075  *                 (e.g., METHOD,BAREWORD)
2076  *   int check_keyword : if true, Perl checks to make sure the word isn't
2077  *       a keyword (do this if the word is a label, e.g. goto FOO)
2078  *   int allow_pack : if true, : characters will also be allowed (require,
2079  *       use, etc. do this)
2080  */
2081
2082 STATIC char *
2083 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2084 {
2085     char *s;
2086     STRLEN len;
2087
2088     PERL_ARGS_ASSERT_FORCE_WORD;
2089
2090     start = skipspace(start);
2091     s = start;
2092     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2093         || (allow_pack && *s == ':' && s[1] == ':') )
2094     {
2095         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2096         if (check_keyword) {
2097           char *s2 = PL_tokenbuf;
2098           STRLEN len2 = len;
2099           if (allow_pack && len > 6 && strEQs(s2, "CORE::"))
2100             s2 += 6, len2 -= 6;
2101           if (keyword(s2, len2, 0))
2102             return start;
2103         }
2104         if (token == METHOD) {
2105             s = skipspace(s);
2106             if (*s == '(')
2107                 PL_expect = XTERM;
2108             else {
2109                 PL_expect = XOPERATOR;
2110             }
2111         }
2112         NEXTVAL_NEXTTOKE.opval
2113             = newSVOP(OP_CONST,0,
2114                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2115         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2116         force_next(token);
2117     }
2118     return s;
2119 }
2120
2121 /*
2122  * S_force_ident
2123  * Called when the lexer wants $foo *foo &foo etc, but the program
2124  * text only contains the "foo" portion.  The first argument is a pointer
2125  * to the "foo", and the second argument is the type symbol to prefix.
2126  * Forces the next token to be a "BAREWORD".
2127  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2128  */
2129
2130 STATIC void
2131 S_force_ident(pTHX_ const char *s, int kind)
2132 {
2133     PERL_ARGS_ASSERT_FORCE_IDENT;
2134
2135     if (s[0]) {
2136         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2137         OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2138                                                                 UTF ? SVf_UTF8 : 0));
2139         NEXTVAL_NEXTTOKE.opval = o;
2140         force_next(BAREWORD);
2141         if (kind) {
2142             o->op_private = OPpCONST_ENTERED;
2143             /* XXX see note in pp_entereval() for why we forgo typo
2144                warnings if the symbol must be introduced in an eval.
2145                GSAR 96-10-12 */
2146             gv_fetchpvn_flags(s, len,
2147                               (PL_in_eval ? GV_ADDMULTI
2148                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2149                               kind == '$' ? SVt_PV :
2150                               kind == '@' ? SVt_PVAV :
2151                               kind == '%' ? SVt_PVHV :
2152                               SVt_PVGV
2153                               );
2154         }
2155     }
2156 }
2157
2158 static void
2159 S_force_ident_maybe_lex(pTHX_ char pit)
2160 {
2161     NEXTVAL_NEXTTOKE.ival = pit;
2162     force_next('p');
2163 }
2164
2165 NV
2166 Perl_str_to_version(pTHX_ SV *sv)
2167 {
2168     NV retval = 0.0;
2169     NV nshift = 1.0;
2170     STRLEN len;
2171     const char *start = SvPV_const(sv,len);
2172     const char * const end = start + len;
2173     const bool utf = cBOOL(SvUTF8(sv));
2174
2175     PERL_ARGS_ASSERT_STR_TO_VERSION;
2176
2177     while (start < end) {
2178         STRLEN skip;
2179         UV n;
2180         if (utf)
2181             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2182         else {
2183             n = *(U8*)start;
2184             skip = 1;
2185         }
2186         retval += ((NV)n)/nshift;
2187         start += skip;
2188         nshift *= 1000;
2189     }
2190     return retval;
2191 }
2192
2193 /*
2194  * S_force_version
2195  * Forces the next token to be a version number.
2196  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2197  * and if "guessing" is TRUE, then no new token is created (and the caller
2198  * must use an alternative parsing method).
2199  */
2200
2201 STATIC char *
2202 S_force_version(pTHX_ char *s, int guessing)
2203 {
2204     OP *version = NULL;
2205     char *d;
2206
2207     PERL_ARGS_ASSERT_FORCE_VERSION;
2208
2209     s = skipspace(s);
2210
2211     d = s;
2212     if (*d == 'v')
2213         d++;
2214     if (isDIGIT(*d)) {
2215         while (isDIGIT(*d) || *d == '_' || *d == '.')
2216             d++;
2217         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2218             SV *ver;
2219             s = scan_num(s, &pl_yylval);
2220             version = pl_yylval.opval;
2221             ver = cSVOPx(version)->op_sv;
2222             if (SvPOK(ver) && !SvNIOK(ver)) {
2223                 SvUPGRADE(ver, SVt_PVNV);
2224                 SvNV_set(ver, str_to_version(ver));
2225                 SvNOK_on(ver);          /* hint that it is a version */
2226             }
2227         }
2228         else if (guessing) {
2229             return s;
2230         }
2231     }
2232
2233     /* NOTE: The parser sees the package name and the VERSION swapped */
2234     NEXTVAL_NEXTTOKE.opval = version;
2235     force_next(BAREWORD);
2236
2237     return s;
2238 }
2239
2240 /*
2241  * S_force_strict_version
2242  * Forces the next token to be a version number using strict syntax rules.
2243  */
2244
2245 STATIC char *
2246 S_force_strict_version(pTHX_ char *s)
2247 {
2248     OP *version = NULL;
2249     const char *errstr = NULL;
2250
2251     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2252
2253     while (isSPACE(*s)) /* leading whitespace */
2254         s++;
2255
2256     if (is_STRICT_VERSION(s,&errstr)) {
2257         SV *ver = newSV(0);
2258         s = (char *)scan_version(s, ver, 0);
2259         version = newSVOP(OP_CONST, 0, ver);
2260     }
2261     else if ((*s != ';' && *s != '{' && *s != '}' )
2262              && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2263     {
2264         PL_bufptr = s;
2265         if (errstr)
2266             yyerror(errstr); /* version required */
2267         return s;
2268     }
2269
2270     /* NOTE: The parser sees the package name and the VERSION swapped */
2271     NEXTVAL_NEXTTOKE.opval = version;
2272     force_next(BAREWORD);
2273
2274     return s;
2275 }
2276
2277 /*
2278  * S_tokeq
2279  * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2280  * modified as necessary.  However, if HINT_NEW_STRING is on, 'sv' is
2281  * unchanged, and a new SV containing the modified input is returned.
2282  */
2283
2284 STATIC SV *
2285 S_tokeq(pTHX_ SV *sv)
2286 {
2287     char *s;
2288     char *send;
2289     char *d;
2290     SV *pv = sv;
2291
2292     PERL_ARGS_ASSERT_TOKEQ;
2293
2294     assert (SvPOK(sv));
2295     assert (SvLEN(sv));
2296     assert (!SvIsCOW(sv));
2297     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2298         goto finish;
2299     s = SvPVX(sv);
2300     send = SvEND(sv);
2301     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2302     while (s < send && !(*s == '\\' && s[1] == '\\'))
2303         s++;
2304     if (s == send)
2305         goto finish;
2306     d = s;
2307     if ( PL_hints & HINT_NEW_STRING ) {
2308         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2309                             SVs_TEMP | SvUTF8(sv));
2310     }
2311     while (s < send) {
2312         if (*s == '\\') {
2313             if (s + 1 < send && (s[1] == '\\'))
2314                 s++;            /* all that, just for this */
2315         }
2316         *d++ = *s++;
2317     }
2318     *d = '\0';
2319     SvCUR_set(sv, d - SvPVX_const(sv));
2320   finish:
2321     if ( PL_hints & HINT_NEW_STRING )
2322        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2323     return sv;
2324 }
2325
2326 /*
2327  * Now come three functions related to double-quote context,
2328  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2329  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2330  * interact with PL_lex_state, and create fake ( ... ) argument lists
2331  * to handle functions and concatenation.
2332  * For example,
2333  *   "foo\lbar"
2334  * is tokenised as
2335  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2336  */
2337
2338 /*
2339  * S_sublex_start
2340  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2341  *
2342  * Pattern matching will set PL_lex_op to the pattern-matching op to
2343  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2344  *
2345  * OP_CONST is easy--just make the new op and return.
2346  *
2347  * Everything else becomes a FUNC.
2348  *
2349  * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2350  * had an OP_CONST.  This just sets us up for a
2351  * call to S_sublex_push().
2352  */
2353
2354 STATIC I32
2355 S_sublex_start(pTHX)
2356 {
2357     const I32 op_type = pl_yylval.ival;
2358
2359     if (op_type == OP_NULL) {
2360         pl_yylval.opval = PL_lex_op;
2361         PL_lex_op = NULL;
2362         return THING;
2363     }
2364     if (op_type == OP_CONST) {
2365         SV *sv = PL_lex_stuff;
2366         PL_lex_stuff = NULL;
2367         sv = tokeq(sv);
2368
2369         if (SvTYPE(sv) == SVt_PVIV) {
2370             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2371             STRLEN len;
2372             const char * const p = SvPV_const(sv, len);
2373             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2374             SvREFCNT_dec(sv);
2375             sv = nsv;
2376         }
2377         pl_yylval.opval = newSVOP(op_type, 0, sv);
2378         return THING;
2379     }
2380
2381     PL_parser->lex_super_state = PL_lex_state;
2382     PL_parser->lex_sub_inwhat = (U16)op_type;
2383     PL_parser->lex_sub_op = PL_lex_op;
2384     PL_lex_state = LEX_INTERPPUSH;
2385
2386     PL_expect = XTERM;
2387     if (PL_lex_op) {
2388         pl_yylval.opval = PL_lex_op;
2389         PL_lex_op = NULL;
2390         return PMFUNC;
2391     }
2392     else
2393         return FUNC;
2394 }
2395
2396 /*
2397  * S_sublex_push
2398  * Create a new scope to save the lexing state.  The scope will be
2399  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2400  * to the uc, lc, etc. found before.
2401  * Sets PL_lex_state to LEX_INTERPCONCAT.
2402  */
2403
2404 STATIC I32
2405 S_sublex_push(pTHX)
2406 {
2407     LEXSHARED *shared;
2408     const bool is_heredoc = PL_multi_close == '<';
2409     ENTER;
2410
2411     PL_lex_state = PL_parser->lex_super_state;
2412     SAVEI8(PL_lex_dojoin);
2413     SAVEI32(PL_lex_brackets);
2414     SAVEI32(PL_lex_allbrackets);
2415     SAVEI32(PL_lex_formbrack);
2416     SAVEI8(PL_lex_fakeeof);
2417     SAVEI32(PL_lex_casemods);
2418     SAVEI32(PL_lex_starts);
2419     SAVEI8(PL_lex_state);
2420     SAVESPTR(PL_lex_repl);
2421     SAVEVPTR(PL_lex_inpat);
2422     SAVEI16(PL_lex_inwhat);
2423     if (is_heredoc)
2424     {
2425         SAVECOPLINE(PL_curcop);
2426         SAVEI32(PL_multi_end);
2427         SAVEI32(PL_parser->herelines);
2428         PL_parser->herelines = 0;
2429     }
2430     SAVEIV(PL_multi_close);
2431     SAVEPPTR(PL_bufptr);
2432     SAVEPPTR(PL_bufend);
2433     SAVEPPTR(PL_oldbufptr);
2434     SAVEPPTR(PL_oldoldbufptr);
2435     SAVEPPTR(PL_last_lop);
2436     SAVEPPTR(PL_last_uni);
2437     SAVEPPTR(PL_linestart);
2438     SAVESPTR(PL_linestr);
2439     SAVEGENERICPV(PL_lex_brackstack);
2440     SAVEGENERICPV(PL_lex_casestack);
2441     SAVEGENERICPV(PL_parser->lex_shared);
2442     SAVEBOOL(PL_parser->lex_re_reparsing);
2443     SAVEI32(PL_copline);
2444
2445     /* The here-doc parser needs to be able to peek into outer lexing
2446        scopes to find the body of the here-doc.  So we put PL_linestr and
2447        PL_bufptr into lex_shared, to ‘share’ those values.
2448      */
2449     PL_parser->lex_shared->ls_linestr = PL_linestr;
2450     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2451
2452     PL_linestr = PL_lex_stuff;
2453     PL_lex_repl = PL_parser->lex_sub_repl;
2454     PL_lex_stuff = NULL;
2455     PL_parser->lex_sub_repl = NULL;
2456
2457     /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2458        set for an inner quote-like operator and then an error causes scope-
2459        popping.  We must not have a PL_lex_stuff value left dangling, as
2460        that breaks assumptions elsewhere.  See bug #123617.  */
2461     SAVEGENERICSV(PL_lex_stuff);
2462     SAVEGENERICSV(PL_parser->lex_sub_repl);
2463
2464     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2465         = SvPVX(PL_linestr);
2466     PL_bufend += SvCUR(PL_linestr);
2467     PL_last_lop = PL_last_uni = NULL;
2468     SAVEFREESV(PL_linestr);
2469     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2470
2471     PL_lex_dojoin = FALSE;
2472     PL_lex_brackets = PL_lex_formbrack = 0;
2473     PL_lex_allbrackets = 0;
2474     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2475     Newx(PL_lex_brackstack, 120, char);
2476     Newx(PL_lex_casestack, 12, char);
2477     PL_lex_casemods = 0;
2478     *PL_lex_casestack = '\0';
2479     PL_lex_starts = 0;
2480     PL_lex_state = LEX_INTERPCONCAT;
2481     if (is_heredoc)
2482         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2483     PL_copline = NOLINE;
2484
2485     Newxz(shared, 1, LEXSHARED);
2486     shared->ls_prev = PL_parser->lex_shared;
2487     PL_parser->lex_shared = shared;
2488
2489     PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2490     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2491     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2492         PL_lex_inpat = PL_parser->lex_sub_op;
2493     else
2494         PL_lex_inpat = NULL;
2495
2496     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2497     PL_in_eval &= ~EVAL_RE_REPARSING;
2498
2499     return '(';
2500 }
2501
2502 /*
2503  * S_sublex_done
2504  * Restores lexer state after a S_sublex_push.
2505  */
2506
2507 STATIC I32
2508 S_sublex_done(pTHX)
2509 {
2510     if (!PL_lex_starts++) {
2511         SV * const sv = newSVpvs("");
2512         if (SvUTF8(PL_linestr))
2513             SvUTF8_on(sv);
2514         PL_expect = XOPERATOR;
2515         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2516         return THING;
2517     }
2518
2519     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2520         PL_lex_state = LEX_INTERPCASEMOD;
2521         return yylex();
2522     }
2523
2524     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2525     assert(PL_lex_inwhat != OP_TRANSR);
2526     if (PL_lex_repl) {
2527         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2528         PL_linestr = PL_lex_repl;
2529         PL_lex_inpat = 0;
2530         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2531         PL_bufend += SvCUR(PL_linestr);
2532         PL_last_lop = PL_last_uni = NULL;
2533         PL_lex_dojoin = FALSE;
2534         PL_lex_brackets = 0;
2535         PL_lex_allbrackets = 0;
2536         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2537         PL_lex_casemods = 0;
2538         *PL_lex_casestack = '\0';
2539         PL_lex_starts = 0;
2540         if (SvEVALED(PL_lex_repl)) {
2541             PL_lex_state = LEX_INTERPNORMAL;
2542             PL_lex_starts++;
2543             /*  we don't clear PL_lex_repl here, so that we can check later
2544                 whether this is an evalled subst; that means we rely on the
2545                 logic to ensure sublex_done() is called again only via the
2546                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2547         }
2548         else {
2549             PL_lex_state = LEX_INTERPCONCAT;
2550             PL_lex_repl = NULL;
2551         }
2552         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2553             CopLINE(PL_curcop) +=
2554                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2555                  + PL_parser->herelines;
2556             PL_parser->herelines = 0;
2557         }
2558         return '/';
2559     }
2560     else {
2561         const line_t l = CopLINE(PL_curcop);
2562         LEAVE;
2563         if (PL_multi_close == '<')
2564             PL_parser->herelines += l - PL_multi_end;
2565         PL_bufend = SvPVX(PL_linestr);
2566         PL_bufend += SvCUR(PL_linestr);
2567         PL_expect = XOPERATOR;
2568         return ')';
2569     }
2570 }
2571
2572 STATIC SV*
2573 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2574 {
2575     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2576      * interior, hence to the "}".  Finds what the name resolves to, returning
2577      * an SV* containing it; NULL if no valid one found */
2578
2579     SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2580
2581     HV * table;
2582     SV **cvp;
2583     SV *cv;
2584     SV *rv;
2585     HV *stash;
2586     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2587
2588     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2589
2590     if (!SvCUR(res)) {
2591         /* diag_listed_as: Unknown charname '%s' */
2592         yyerror("Unknown charname ''");
2593         return NULL;
2594     }
2595
2596     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2597                         /* include the <}> */
2598                         e - backslash_ptr + 1);
2599     if (! SvPOK(res)) {
2600         SvREFCNT_dec_NN(res);
2601         return NULL;
2602     }
2603
2604     /* See if the charnames handler is the Perl core's, and if so, we can skip
2605      * the validation needed for a user-supplied one, as Perl's does its own
2606      * validation. */
2607     table = GvHV(PL_hintgv);             /* ^H */
2608     cvp = hv_fetchs(table, "charnames", FALSE);
2609     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2610         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2611     {
2612         const char * const name = HvNAME(stash);
2613         if (HvNAMELEN(stash) == sizeof("_charnames")-1
2614          && strEQ(name, "_charnames")) {
2615            return res;
2616        }
2617     }
2618
2619     /* Here, it isn't Perl's charname handler.  We can't rely on a
2620      * user-supplied handler to validate the input name.  For non-ut8 input,
2621      * look to see that the first character is legal.  Then loop through the
2622      * rest checking that each is a continuation */
2623
2624     /* This code makes the reasonable assumption that the only Latin1-range
2625      * characters that begin a character name alias are alphabetic, otherwise
2626      * would have to create a isCHARNAME_BEGIN macro */
2627
2628     if (! UTF) {
2629         if (! isALPHAU(*s)) {
2630             goto bad_charname;
2631         }
2632         s++;
2633         while (s < e) {
2634             if (! isCHARNAME_CONT(*s)) {
2635                 goto bad_charname;
2636             }
2637             if (*s == ' ' && *(s-1) == ' ') {
2638                 goto multi_spaces;
2639             }
2640             s++;
2641         }
2642     }
2643     else {
2644         /* Similarly for utf8.  For invariants can check directly; for other
2645          * Latin1, can calculate their code point and check; otherwise  use a
2646          * swash */
2647         if (UTF8_IS_INVARIANT(*s)) {
2648             if (! isALPHAU(*s)) {
2649                 goto bad_charname;
2650             }
2651             s++;
2652         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2653             if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2654                 goto bad_charname;
2655             }
2656             s += 2;
2657         }
2658         else {
2659             if (! PL_utf8_charname_begin) {
2660                 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2661                 PL_utf8_charname_begin = _core_swash_init("utf8",
2662                                                         "_Perl_Charname_Begin",
2663                                                         &PL_sv_undef,
2664                                                         1, 0, NULL, &flags);
2665             }
2666             if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2667                 goto bad_charname;
2668             }
2669             s += UTF8SKIP(s);
2670         }
2671
2672         while (s < e) {
2673             if (UTF8_IS_INVARIANT(*s)) {
2674                 if (! isCHARNAME_CONT(*s)) {
2675                     goto bad_charname;
2676                 }
2677                 if (*s == ' ' && *(s-1) == ' ') {
2678                     goto multi_spaces;
2679                 }
2680                 s++;
2681             }
2682             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2683                 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2684                 {
2685                     goto bad_charname;
2686                 }
2687                 s += 2;
2688             }
2689             else {
2690                 if (! PL_utf8_charname_continue) {
2691                     U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2692                     PL_utf8_charname_continue = _core_swash_init("utf8",
2693                                                 "_Perl_Charname_Continue",
2694                                                 &PL_sv_undef,
2695                                                 1, 0, NULL, &flags);
2696                 }
2697                 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2698                     goto bad_charname;
2699                 }
2700                 s += UTF8SKIP(s);
2701             }
2702         }
2703     }
2704     if (*(s-1) == ' ') {
2705         yyerror_pv(
2706             Perl_form(aTHX_
2707             "charnames alias definitions may not contain trailing "
2708             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2709             (int)(s - backslash_ptr + 1), backslash_ptr,
2710             (int)(e - s + 1), s + 1
2711             ),
2712         UTF ? SVf_UTF8 : 0);
2713         return NULL;
2714     }
2715
2716     if (SvUTF8(res)) { /* Don't accept malformed input */
2717         const U8* first_bad_char_loc;
2718         STRLEN len;
2719         const char* const str = SvPV_const(res, len);
2720         if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2721                                           &first_bad_char_loc)))
2722         {
2723             _force_out_malformed_utf8_message(first_bad_char_loc,
2724                                               (U8 *) PL_parser->bufend,
2725                                               0,
2726                                               0 /* 0 means don't die */ );
2727             yyerror_pv(
2728               Perl_form(aTHX_
2729                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2730                  (int) (e - backslash_ptr + 1), backslash_ptr,
2731                  (int) ((char *) first_bad_char_loc - str), str
2732               ),
2733               SVf_UTF8);
2734             return NULL;
2735         }
2736     }
2737
2738     return res;
2739
2740   bad_charname: {
2741
2742         /* The final %.*s makes sure that should the trailing NUL be missing
2743          * that this print won't run off the end of the string */
2744         yyerror_pv(
2745           Perl_form(aTHX_
2746             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2747             (int)(s - backslash_ptr + 1), backslash_ptr,
2748             (int)(e - s + 1), s + 1
2749           ),
2750           UTF ? SVf_UTF8 : 0);
2751         return NULL;
2752     }
2753
2754   multi_spaces:
2755         yyerror_pv(
2756           Perl_form(aTHX_
2757             "charnames alias definitions may not contain a sequence of "
2758             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2759             (int)(s - backslash_ptr + 1), backslash_ptr,
2760             (int)(e - s + 1), s + 1
2761           ),
2762           UTF ? SVf_UTF8 : 0);
2763         return NULL;
2764 }
2765
2766 /*
2767   scan_const
2768
2769   Extracts the next constant part of a pattern, double-quoted string,
2770   or transliteration.  This is terrifying code.
2771
2772   For example, in parsing the double-quoted string "ab\x63$d", it would
2773   stop at the '$' and return an OP_CONST containing 'abc'.
2774
2775   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2776   processing a pattern (PL_lex_inpat is true), a transliteration
2777   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2778
2779   Returns a pointer to the character scanned up to. If this is
2780   advanced from the start pointer supplied (i.e. if anything was
2781   successfully parsed), will leave an OP_CONST for the substring scanned
2782   in pl_yylval. Caller must intuit reason for not parsing further
2783   by looking at the next characters herself.
2784
2785   In patterns:
2786     expand:
2787       \N{FOO}  => \N{U+hex_for_character_FOO}
2788       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2789
2790     pass through:
2791         all other \-char, including \N and \N{ apart from \N{ABC}
2792
2793     stops on:
2794         @ and $ where it appears to be a var, but not for $ as tail anchor
2795         \l \L \u \U \Q \E
2796         (?{  or  (??{
2797
2798   In transliterations:
2799     characters are VERY literal, except for - not at the start or end
2800     of the string, which indicates a range.  However some backslash sequences
2801     are recognized: \r, \n, and the like
2802                     \007 \o{}, \x{}, \N{}
2803     If all elements in the transliteration are below 256,
2804     scan_const expands the range to the full set of intermediate
2805     characters. If the range is in utf8, the hyphen is replaced with
2806     a certain range mark which will be handled by pmtrans() in op.c.
2807
2808   In double-quoted strings:
2809     backslashes:
2810       all those recognized in transliterations
2811       deprecated backrefs: \1 (in substitution replacements)
2812       case and quoting: \U \Q \E
2813     stops on @ and $
2814
2815   scan_const does *not* construct ops to handle interpolated strings.
2816   It stops processing as soon as it finds an embedded $ or @ variable
2817   and leaves it to the caller to work out what's going on.
2818
2819   embedded arrays (whether in pattern or not) could be:
2820       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2821
2822   $ in double-quoted strings must be the symbol of an embedded scalar.
2823
2824   $ in pattern could be $foo or could be tail anchor.  Assumption:
2825   it's a tail anchor if $ is the last thing in the string, or if it's
2826   followed by one of "()| \r\n\t"
2827
2828   \1 (backreferences) are turned into $1 in substitutions
2829
2830   The structure of the code is
2831       while (there's a character to process) {
2832           handle transliteration ranges
2833           skip regexp comments /(?#comment)/ and codes /(?{code})/
2834           skip #-initiated comments in //x patterns
2835           check for embedded arrays
2836           check for embedded scalars
2837           if (backslash) {
2838               deprecate \1 in substitution replacements
2839               handle string-changing backslashes \l \U \Q \E, etc.
2840               switch (what was escaped) {
2841                   handle \- in a transliteration (becomes a literal -)
2842                   if a pattern and not \N{, go treat as regular character
2843                   handle \132 (octal characters)
2844                   handle \x15 and \x{1234} (hex characters)
2845                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2846                   handle \cV (control characters)
2847                   handle printf-style backslashes (\f, \r, \n, etc)
2848               } (end switch)
2849               continue
2850           } (end if backslash)
2851           handle regular character
2852     } (end while character to read)
2853
2854 */
2855
2856 STATIC char *
2857 S_scan_const(pTHX_ char *start)
2858 {
2859     char *send = PL_bufend;             /* end of the constant */
2860     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
2861                                            on sizing. */
2862     char *s = start;                    /* start of the constant */
2863     char *d = SvPVX(sv);                /* destination for copies */
2864     bool dorange = FALSE;               /* are we in a translit range? */
2865     bool didrange = FALSE;              /* did we just finish a range? */
2866     bool in_charclass = FALSE;          /* within /[...]/ */
2867     bool has_utf8 = FALSE;              /* Output constant is UTF8 */
2868     bool  this_utf8 = cBOOL(UTF);       /* Is the source string assumed to be
2869                                            UTF8?  But, this can show as true
2870                                            when the source isn't utf8, as for
2871                                            example when it is entirely composed
2872                                            of hex constants */
2873     STRLEN utf8_variant_count = 0;      /* When not in UTF-8, this counts the
2874                                            number of characters found so far
2875                                            that will expand (into 2 bytes)
2876                                            should we have to convert to
2877                                            UTF-8) */
2878     SV *res;                            /* result from charnames */
2879     STRLEN offset_to_max;   /* The offset in the output to where the range
2880                                high-end character is temporarily placed */
2881
2882     /* Does something require special handling in tr/// ?  This avoids extra
2883      * work in a less likely case.  As such, khw didn't feel it was worth
2884      * adding any branches to the more mainline code to handle this, which
2885      * means that this doesn't get set in some circumstances when things like
2886      * \x{100} get expanded out.  As a result there needs to be extra testing
2887      * done in the tr code */
2888     bool has_above_latin1 = FALSE;
2889
2890     /* Note on sizing:  The scanned constant is placed into sv, which is
2891      * initialized by newSV() assuming one byte of output for every byte of
2892      * input.  This routine expects newSV() to allocate an extra byte for a
2893      * trailing NUL, which this routine will append if it gets to the end of
2894      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2895      * CAPITAL LETTER A}), or more output than input if the constant ends up
2896      * recoded to utf8, but each time a construct is found that might increase
2897      * the needed size, SvGROW() is called.  Its size parameter each time is
2898      * based on the best guess estimate at the time, namely the length used so
2899      * far, plus the length the current construct will occupy, plus room for
2900      * the trailing NUL, plus one byte for every input byte still unscanned */
2901
2902     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2903                        before set */
2904 #ifdef EBCDIC
2905     int backslash_N = 0;            /* ? was the character from \N{} */
2906     int non_portable_endpoint = 0;  /* ? In a range is an endpoint
2907                                        platform-specific like \x65 */
2908 #endif
2909
2910     PERL_ARGS_ASSERT_SCAN_CONST;
2911
2912     assert(PL_lex_inwhat != OP_TRANSR);
2913     if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
2914         /* If we are doing a trans and we know we want UTF8 set expectation */
2915         has_utf8   = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2916         this_utf8  = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2917     }
2918
2919     /* Protect sv from errors and fatal warnings. */
2920     ENTER_with_name("scan_const");
2921     SAVEFREESV(sv);
2922
2923     while (s < send
2924            || dorange   /* Handle tr/// range at right edge of input */
2925     ) {
2926
2927         /* get transliterations out of the way (they're most literal) */
2928         if (PL_lex_inwhat == OP_TRANS) {
2929
2930             /* But there isn't any special handling necessary unless there is a
2931              * range, so for most cases we just drop down and handle the value
2932              * as any other.  There are two exceptions.
2933              *
2934              * 1.  A hyphen indicates that we are actually going to have a
2935              *     range.  In this case, skip the '-', set a flag, then drop
2936              *     down to handle what should be the end range value.
2937              * 2.  After we've handled that value, the next time through, that
2938              *     flag is set and we fix up the range.
2939              *
2940              * Ranges entirely within Latin1 are expanded out entirely, in
2941              * order to make the transliteration a simple table look-up.
2942              * Ranges that extend above Latin1 have to be done differently, so
2943              * there is no advantage to expanding them here, so they are
2944              * stored here as Min, ILLEGAL_UTF8_BYTE, Max.  The illegal byte
2945              * signifies a hyphen without any possible ambiguity.  On EBCDIC
2946              * machines, if the range is expressed as Unicode, the Latin1
2947              * portion is expanded out even if the range extends above
2948              * Latin1.  This is because each code point in it has to be
2949              * processed here individually to get its native translation */
2950
2951             if (! dorange) {
2952
2953                 /* Here, we don't think we're in a range.  If the new character
2954                  * is not a hyphen; or if it is a hyphen, but it's too close to
2955                  * either edge to indicate a range, then it's a regular
2956                  * character. */
2957                 if (*s != '-' || s >= send - 1 || s == start) {
2958
2959                     /* A regular character.  Process like any other, but first
2960                      * clear any flags */
2961                     didrange = FALSE;
2962                     dorange = FALSE;
2963 #ifdef EBCDIC
2964                     non_portable_endpoint = 0;
2965                     backslash_N = 0;
2966 #endif
2967                     /* The tests here for being above Latin1 and similar ones
2968                      * in the following 'else' suffice to find all such
2969                      * occurences in the constant, except those added by a
2970                      * backslash escape sequence, like \x{100}.  Mostly, those
2971                      * set 'has_above_latin1' as appropriate */
2972                     if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
2973                         has_above_latin1 = TRUE;
2974                     }
2975
2976                     /* Drops down to generic code to process current byte */
2977                 }
2978                 else {  /* Is a '-' in the context where it means a range */
2979                     if (didrange) { /* Something like y/A-C-Z// */
2980                         Perl_croak(aTHX_ "Ambiguous range in transliteration"
2981                                          " operator");
2982                     }
2983
2984                     dorange = TRUE;
2985
2986                     s++;    /* Skip past the hyphen */
2987
2988                     /* d now points to where the end-range character will be
2989                      * placed.  Save it so won't have to go finding it later,
2990                      * and drop down to get that character.  (Actually we
2991                      * instead save the offset, to handle the case where a
2992                      * realloc in the meantime could change the actual
2993                      * pointer).  We'll finish processing the range the next
2994                      * time through the loop */
2995                     offset_to_max = d - SvPVX_const(sv);
2996
2997                     if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
2998                         has_above_latin1 = TRUE;
2999                     }
3000
3001                     /* Drops down to generic code to process current byte */
3002                 }
3003             }  /* End of not a range */
3004             else {
3005                 /* Here we have parsed a range.  Now must handle it.  At this
3006                  * point:
3007                  * 'sv' is a SV* that contains the output string we are
3008                  *      constructing.  The final two characters in that string
3009                  *      are the range start and range end, in order.
3010                  * 'd'  points to just beyond the range end in the 'sv' string,
3011                  *      where we would next place something
3012                  * 'offset_to_max' is the offset in 'sv' at which the character
3013                  *      (the range's maximum end point) before 'd'  begins.
3014                  */
3015                 char * max_ptr = SvPVX(sv) + offset_to_max;
3016                 char * min_ptr;
3017                 IV range_min;
3018                 IV range_max;   /* last character in range */
3019                 STRLEN grow;
3020                 Size_t offset_to_min = 0;
3021                 Size_t extras = 0;
3022 #ifdef EBCDIC
3023                 bool convert_unicode;
3024                 IV real_range_max = 0;
3025 #endif
3026                 /* Get the code point values of the range ends. */
3027                 if (has_utf8) {
3028                     /* We know the utf8 is valid, because we just constructed
3029                      * it ourselves in previous loop iterations */
3030                     min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3031                     range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3032                     range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3033
3034                     /* This compensates for not all code setting
3035                      * 'has_above_latin1', so that we don't skip stuff that
3036                      * should be executed */
3037                     if (range_max > 255) {
3038                         has_above_latin1 = TRUE;
3039                     }
3040                 }
3041                 else {
3042                     min_ptr = max_ptr - 1;
3043                     range_min = * (U8*) min_ptr;
3044                     range_max = * (U8*) max_ptr;
3045                 }
3046
3047                 /* If the range is just a single code point, like tr/a-a/.../,
3048                  * that code point is already in the output, twice.  We can
3049                  * just back up over the second instance and avoid all the rest
3050                  * of the work.  But if it is a variant character, it's been
3051                  * counted twice, so decrement.  (This unlikely scenario is
3052                  * special cased, like the one for a range of 2 code points
3053                  * below, only because the main-line code below needs a range
3054                  * of 3 or more to work without special casing.  Might as well
3055                  * get it out of the way now.) */
3056                 if (UNLIKELY(range_max == range_min)) {
3057                     d = max_ptr;
3058                     if (! has_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3059                         utf8_variant_count--;
3060                     }
3061                     goto range_done;
3062                 }
3063
3064 #ifdef EBCDIC
3065                 /* On EBCDIC platforms, we may have to deal with portable
3066                  * ranges.  These happen if at least one range endpoint is a
3067                  * Unicode value (\N{...}), or if the range is a subset of
3068                  * [A-Z] or [a-z], and both ends are literal characters,
3069                  * like 'A', and not like \x{C1} */
3070                 convert_unicode =
3071                                cBOOL(backslash_N)   /* \N{} forces Unicode,
3072                                                        hence portable range */
3073                     || (     ! non_portable_endpoint
3074                         && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
3075                            || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3076                 if (convert_unicode) {
3077
3078                     /* Special handling is needed for these portable ranges.
3079                      * They are defined to be in Unicode terms, which includes
3080                      * all the Unicode code points between the end points.
3081                      * Convert to Unicode to get the Unicode range.  Later we
3082                      * will convert each code point in the range back to
3083                      * native.  */
3084                     range_min = NATIVE_TO_UNI(range_min);
3085                     range_max = NATIVE_TO_UNI(range_max);
3086                 }
3087 #endif
3088
3089                 if (range_min > range_max) {
3090 #ifdef EBCDIC
3091                     if (convert_unicode) {
3092                         /* Need to convert back to native for meaningful
3093                          * messages for this platform */
3094                         range_min = UNI_TO_NATIVE(range_min);
3095                         range_max = UNI_TO_NATIVE(range_max);
3096                     }
3097 #endif
3098                     /* Use the characters themselves for the error message if
3099                      * ASCII printables; otherwise some visible representation
3100                      * of them */
3101                     if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3102                         Perl_croak(aTHX_
3103                          "Invalid range \"%c-%c\" in transliteration operator",
3104                          (char)range_min, (char)range_max);
3105                     }
3106 #ifdef EBCDIC
3107                     else if (convert_unicode) {
3108         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3109                         Perl_croak(aTHX_
3110                            "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3111                            UVXf "}\" in transliteration operator",
3112                            range_min, range_max);
3113                     }
3114 #endif
3115                     else {
3116         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3117                         Perl_croak(aTHX_
3118                            "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3119                            " in transliteration operator",
3120                            range_min, range_max);
3121                     }
3122                 }
3123
3124                 /* If the range is exactly two code points long, they are
3125                  * already both in the output */
3126                 if (UNLIKELY(range_min + 1 == range_max)) {
3127                     goto range_done;
3128                 }
3129
3130                 /* Here the range contains at least 3 code points */
3131
3132                 if (has_utf8) {
3133
3134                     /* If everything in the transliteration is below 256, we
3135                      * can avoid special handling later.  A translation table
3136                      * for each of those bytes is created by op.c.  So we
3137                      * expand out all ranges to their constituent code points.
3138                      * But if we've encountered something above 255, the
3139                      * expanding won't help, so skip doing that.  But if it's
3140                      * EBCDIC, we may have to look at each character below 256
3141                      * if we have to convert to/from Unicode values */
3142                     if (   has_above_latin1
3143 #ifdef EBCDIC
3144                         && (range_min > 255 || ! convert_unicode)
3145 #endif
3146                     ) {
3147                         /* Move the high character one byte to the right; then
3148                          * insert between it and the range begin, an illegal
3149                          * byte which serves to indicate this is a range (using
3150                          * a '-' would be ambiguous). */
3151                         char *e = d++;
3152                         while (e-- > max_ptr) {
3153                             *(e + 1) = *e;
3154                         }
3155                         *(e + 1) = (char) ILLEGAL_UTF8_BYTE;
3156                         goto range_done;
3157                     }
3158
3159                     /* Here, we're going to expand out the range.  For EBCDIC
3160                      * the range can extend above 255 (not so in ASCII), so
3161                      * for EBCDIC, split it into the parts above and below
3162                      * 255/256 */
3163 #ifdef EBCDIC
3164                     if (range_max > 255) {
3165                         real_range_max = range_max;
3166                         range_max = 255;
3167                     }
3168 #endif
3169                 }
3170
3171                 /* Here we need to expand out the string to contain each
3172                  * character in the range.  Grow the output to handle this.
3173                  * For non-UTF8, we need a byte for each code point in the
3174                  * range, minus the three that we've already allocated for: the
3175                  * hyphen, the min, and the max.  For UTF-8, we need this
3176                  * plus an extra byte for each code point that occupies two
3177                  * bytes (is variant) when in UTF-8 (except we've already
3178                  * allocated for the end points, including if they are
3179                  * variants).  For ASCII platforms and Unicode ranges on EBCDIC
3180                  * platforms, it's easy to calculate a precise number.  To
3181                  * start, we count the variants in the range, which we need
3182                  * elsewhere in this function anyway.  (For the case where it
3183                  * isn't easy to calculate, 'extras' has been initialized to 0,
3184                  * and the calculation is done in a loop further down.) */
3185 #ifdef EBCDIC
3186                 if (convert_unicode)
3187 #endif
3188                 {
3189                     /* This is executed unconditionally on ASCII, and for
3190                      * Unicode ranges on EBCDIC.  Under these conditions, all
3191                      * code points above a certain value are variant; and none
3192                      * under that value are.  We just need to find out how much
3193                      * of the range is above that value.  We don't count the
3194                      * end points here, as they will already have been counted
3195                      * as they were parsed. */
3196                     if (range_min >= UTF_CONTINUATION_MARK) {
3197
3198                         /* The whole range is made up of variants */
3199                         extras = (range_max - 1) - (range_min + 1) + 1;
3200                     }
3201                     else if (range_max >= UTF_CONTINUATION_MARK) {
3202
3203                         /* Only the higher portion of the range is variants */
3204                         extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3205                     }
3206
3207                     utf8_variant_count += extras;
3208                 }
3209
3210                 /* The base growth is the number of code points in the range,
3211                  * not including the endpoints, which have already been sized
3212                  * for (and output).  We don't subtract for the hyphen, as it
3213                  * has been parsed but not output, and the SvGROW below is
3214                  * based only on what's been output plus what's left to parse.
3215                  * */
3216                 grow = (range_max - 1) - (range_min + 1) + 1;
3217
3218                 if (has_utf8) {
3219 #ifdef EBCDIC
3220                     /* In some cases in EBCDIC, we haven't yet calculated a
3221                      * precise amount needed for the UTF-8 variants.  Just
3222                      * assume the worst case, that everything will expand by a
3223                      * byte */
3224                     if (! convert_unicode) {
3225                         grow *= 2;
3226                     }
3227                     else
3228 #endif
3229                     {
3230                         /* Otherwise we know exactly how many variants there
3231                          * are in the range. */
3232                         grow += extras;
3233                     }
3234                 }
3235
3236                 /* Grow, but position the output to overwrite the range min end
3237                  * point, because in some cases we overwrite that */
3238                 SvCUR_set(sv, d - SvPVX_const(sv));
3239                 offset_to_min = min_ptr - SvPVX_const(sv);
3240
3241                 /* See Note on sizing above. */
3242                 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3243                                              + (send - s)
3244                                              + grow
3245                                              + 1 /* Trailing NUL */ );
3246
3247                 /* Now, we can expand out the range. */
3248 #ifdef EBCDIC
3249                 if (convert_unicode) {
3250                     SSize_t i;
3251
3252                     /* Recall that the min and max are now in Unicode terms, so
3253                      * we have to convert each character to its native
3254                      * equivalent */
3255                     if (has_utf8) {
3256                         for (i = range_min; i <= range_max; i++) {
3257                             append_utf8_from_native_byte(
3258                                                     LATIN1_TO_NATIVE((U8) i),
3259                                                     (U8 **) &d);
3260                         }
3261                     }
3262                     else {
3263                         for (i = range_min; i <= range_max; i++) {
3264                             *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3265                         }
3266                     }
3267                 }
3268                 else
3269 #endif
3270                 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3271                 {
3272                     /* Here, no conversions are necessary, which means that the
3273                      * first character in the range is already in 'd' and
3274                      * valid, so we can skip overwriting it */
3275                     if (has_utf8) {
3276                         SSize_t i;
3277                         d += UTF8SKIP(d);
3278                         for (i = range_min + 1; i <= range_max; i++) {
3279                             append_utf8_from_native_byte((U8) i, (U8 **) &d);
3280                         }
3281                     }
3282                     else {
3283                         SSize_t i;
3284                         d++;
3285                         assert(range_min + 1 <= range_max);
3286                         for (i = range_min + 1; i < range_max; i++) {
3287 #ifdef EBCDIC
3288                             /* In this case on EBCDIC, we haven't calculated
3289                              * the variants.  Do it here, as we go along */
3290                             if (! UVCHR_IS_INVARIANT(i)) {
3291                                 utf8_variant_count++;
3292                             }
3293 #endif
3294                             *d++ = (char)i;
3295                         }
3296
3297                         /* The range_max is done outside the loop so as to
3298                          * avoid having to special case not incrementing
3299                          * 'utf8_variant_count' on EBCDIC (it's already been
3300                          * counted when originally parsed) */
3301                         *d++ = (char) range_max;
3302                     }
3303                 }
3304
3305 #ifdef EBCDIC
3306                 /* If the original range extended above 255, add in that
3307                  * portion. */
3308                 if (real_range_max) {
3309                     *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3310                     *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3311                     if (real_range_max > 0x100) {
3312                         if (real_range_max > 0x101) {
3313                             *d++ = (char) ILLEGAL_UTF8_BYTE;
3314                         }
3315                         d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3316                     }
3317                 }
3318 #endif
3319
3320               range_done:
3321                 /* mark the range as done, and continue */
3322                 didrange = TRUE;
3323                 dorange = FALSE;
3324 #ifdef EBCDIC
3325                 non_portable_endpoint = 0;
3326                 backslash_N = 0;
3327 #endif
3328                 continue;
3329             } /* End of is a range */
3330         } /* End of transliteration.  Joins main code after these else's */
3331         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3332             char *s1 = s-1;
3333             int esc = 0;
3334             while (s1 >= start && *s1-- == '\\')
3335                 esc = !esc;
3336             if (!esc)
3337                 in_charclass = TRUE;
3338         }
3339         else if (*s == ']' && PL_lex_inpat && in_charclass) {
3340             char *s1 = s-1;
3341             int esc = 0;
3342             while (s1 >= start && *s1-- == '\\')
3343                 esc = !esc;
3344             if (!esc)
3345                 in_charclass = FALSE;
3346         }
3347             /* skip for regexp comments /(?#comment)/, except for the last
3348              * char, which will be done separately.  Stop on (?{..}) and
3349              * friends */
3350         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3351             if (s[2] == '#') {
3352                 while (s+1 < send && *s != ')')
3353                     *d++ = *s++;
3354             }
3355             else if (!PL_lex_casemods
3356                      && (    s[2] == '{' /* This should match regcomp.c */
3357                          || (s[2] == '?' && s[3] == '{')))
3358             {
3359                 break;
3360             }
3361         }
3362             /* likewise skip #-initiated comments in //x patterns */
3363         else if (*s == '#'
3364                  && PL_lex_inpat
3365                  && !in_charclass
3366                  && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3367         {
3368             while (s < send && *s != '\n')
3369                 *d++ = *s++;
3370         }
3371             /* no further processing of single-quoted regex */
3372         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3373             goto default_action;
3374
3375             /* check for embedded arrays
3376              * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3377              */
3378         else if (*s == '@' && s[1]) {
3379             if (UTF
3380                ? isIDFIRST_utf8_safe(s+1, send)
3381                : isWORDCHAR_A(s[1]))
3382             {
3383                 break;
3384             }
3385             if (strchr(":'{$", s[1]))
3386                 break;
3387             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3388                 break; /* in regexp, neither @+ nor @- are interpolated */
3389         }
3390             /* check for embedded scalars.  only stop if we're sure it's a
3391              * variable.  */
3392         else if (*s == '$') {
3393             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3394                 break;
3395             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3396                 if (s[1] == '\\') {
3397                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3398                                    "Possible unintended interpolation of $\\ in regex");
3399                 }
3400                 break;          /* in regexp, $ might be tail anchor */
3401             }
3402         }
3403
3404         /* End of else if chain - OP_TRANS rejoin rest */
3405
3406         if (UNLIKELY(s >= send)) {
3407             assert(s == send);
3408             break;
3409         }
3410
3411         /* backslashes */
3412         if (*s == '\\' && s+1 < send) {
3413             char* e;    /* Can be used for ending '}', etc. */
3414
3415             s++;
3416
3417             /* warn on \1 - \9 in substitution replacements, but note that \11
3418              * is an octal; and \19 is \1 followed by '9' */
3419             if (PL_lex_inwhat == OP_SUBST
3420                 && !PL_lex_inpat
3421                 && isDIGIT(*s)
3422                 && *s != '0'
3423                 && !isDIGIT(s[1]))
3424             {
3425                 /* diag_listed_as: \%d better written as $%d */
3426                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3427                 *--s = '$';
3428                 break;
3429             }
3430
3431             /* string-change backslash escapes */
3432             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3433                 --s;
3434                 break;
3435             }
3436             /* In a pattern, process \N, but skip any other backslash escapes.
3437              * This is because we don't want to translate an escape sequence
3438              * into a meta symbol and have the regex compiler use the meta
3439              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3440              * in spite of this, we do have to process \N here while the proper
3441              * charnames handler is in scope.  See bugs #56444 and #62056.
3442              *
3443              * There is a complication because \N in a pattern may also stand
3444              * for 'match a non-nl', and not mean a charname, in which case its
3445              * processing should be deferred to the regex compiler.  To be a
3446              * charname it must be followed immediately by a '{', and not look
3447              * like \N followed by a curly quantifier, i.e., not something like
3448              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3449              * quantifier */
3450             else if (PL_lex_inpat
3451                     && (*s != 'N'
3452                         || s[1] != '{'
3453                         || regcurly(s + 1)))
3454             {
3455                 *d++ = '\\';
3456                 goto default_action;
3457             }
3458
3459             switch (*s) {
3460             default:
3461                 {
3462                     if ((isALPHANUMERIC(*s)))
3463                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3464                                        "Unrecognized escape \\%c passed through",
3465                                        *s);
3466                     /* default action is to copy the quoted character */
3467                     goto default_action;
3468                 }
3469
3470             /* eg. \132 indicates the octal constant 0132 */
3471             case '0': case '1': case '2': case '3':
3472             case '4': case '5': case '6': case '7':
3473                 {
3474                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3475                     STRLEN len = 3;
3476                     uv = grok_oct(s, &len, &flags, NULL);
3477                     s += len;
3478                     if (len < 3 && s < send && isDIGIT(*s)
3479                         && ckWARN(WARN_MISC))
3480                     {
3481                         Perl_warner(aTHX_ packWARN(WARN_MISC),
3482                                     "%s", form_short_octal_warning(s, len));
3483                     }
3484                 }
3485                 goto NUM_ESCAPE_INSERT;
3486
3487             /* eg. \o{24} indicates the octal constant \024 */
3488             case 'o':
3489                 {
3490                     const char* error;
3491
3492                     bool valid = grok_bslash_o(&s, &uv, &error,
3493                                                TRUE, /* Output warning */
3494                                                FALSE, /* Not strict */
3495                                                TRUE, /* Output warnings for
3496                                                          non-portables */
3497                                                UTF);
3498                     if (! valid) {
3499                         yyerror(error);
3500                         uv = 0; /* drop through to ensure range ends are set */
3501                     }
3502                     goto NUM_ESCAPE_INSERT;
3503                 }
3504
3505             /* eg. \x24 indicates the hex constant 0x24 */
3506             case 'x':
3507                 {
3508                     const char* error;
3509
3510                     bool valid = grok_bslash_x(&s, &uv, &error,
3511                                                TRUE, /* Output warning */
3512                                                FALSE, /* Not strict */
3513                                                TRUE,  /* Output warnings for
3514                                                          non-portables */
3515                                                UTF);
3516                     if (! valid) {
3517                         yyerror(error);
3518                         uv = 0; /* drop through to ensure range ends are set */
3519                     }
3520                 }
3521
3522               NUM_ESCAPE_INSERT:
3523                 /* Insert oct or hex escaped character. */
3524
3525                 /* Here uv is the ordinal of the next character being added */
3526                 if (UVCHR_IS_INVARIANT(uv)) {
3527                     *d++ = (char) uv;
3528                 }
3529                 else {
3530                     if (!has_utf8 && uv > 255) {
3531
3532                         /* Here, 'uv' won't fit unless we convert to UTF-8.
3533                          * If we've only seen invariants so far, all we have to
3534                          * do is turn on the flag */
3535                         if (utf8_variant_count == 0) {
3536                             SvUTF8_on(sv);
3537                         }
3538                         else {
3539                             SvCUR_set(sv, d - SvPVX_const(sv));
3540                             SvPOK_on(sv);
3541                             *d = '\0';
3542
3543                             sv_utf8_upgrade_flags_grow(
3544                                            sv,
3545                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3546
3547                                            /* Since we're having to grow here,
3548                                             * make sure we have enough room for
3549                                             * this escape and a NUL, so the
3550                                             * code immediately below won't have
3551                                             * to actually grow again */
3552                                           UVCHR_SKIP(uv)
3553                                         + (STRLEN)(send - s) + 1);
3554                             d = SvPVX(sv) + SvCUR(sv);
3555                         }
3556
3557                         has_above_latin1 = TRUE;
3558                         has_utf8 = TRUE;
3559                     }
3560
3561                     if (! has_utf8) {
3562                         *d++ = (char)uv;
3563                         utf8_variant_count++;
3564                     }
3565                     else {
3566                        /* Usually, there will already be enough room in 'sv'
3567                         * since such escapes are likely longer than any UTF-8
3568                         * sequence they can end up as.  This isn't the case on
3569                         * EBCDIC where \x{40000000} contains 12 bytes, and the
3570                         * UTF-8 for it contains 14.  And, we have to allow for
3571                         * a trailing NUL.  It probably can't happen on ASCII
3572                         * platforms, but be safe.  See Note on sizing above. */
3573                         const STRLEN needed = d - SvPVX(sv)
3574                                             + UVCHR_SKIP(uv)
3575                                             + (send - s)
3576                                             + 1;
3577                         if (UNLIKELY(needed > SvLEN(sv))) {
3578                             SvCUR_set(sv, d - SvPVX_const(sv));
3579                             d = SvCUR(sv) + SvGROW(sv, needed);
3580                         }
3581
3582                         d = (char*)uvchr_to_utf8((U8*)d, uv);
3583                         if (PL_lex_inwhat == OP_TRANS
3584                             && PL_parser->lex_sub_op)
3585                         {
3586                             PL_parser->lex_sub_op->op_private |=
3587                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3588                                              : OPpTRANS_TO_UTF);
3589                         }
3590                     }
3591                 }
3592 #ifdef EBCDIC
3593                 non_portable_endpoint++;
3594 #endif
3595                 continue;
3596
3597             case 'N':
3598                 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3599                  * named character, like \N{LATIN SMALL LETTER A}, or a named
3600                  * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3601                  * GRAVE} (except y/// can't handle the latter, croaking).  For
3602                  * convenience all three forms are referred to as "named
3603                  * characters" below.
3604                  *
3605                  * For patterns, \N also can mean to match a non-newline.  Code
3606                  * before this 'switch' statement should already have handled
3607                  * this situation, and hence this code only has to deal with
3608                  * the named character cases.
3609                  *
3610                  * For non-patterns, the named characters are converted to
3611                  * their string equivalents.  In patterns, named characters are
3612                  * not converted to their ultimate forms for the same reasons
3613                  * that other escapes aren't.  Instead, they are converted to
3614                  * the \N{U+...} form to get the value from the charnames that
3615                  * is in effect right now, while preserving the fact that it
3616                  * was a named character, so that the regex compiler knows
3617                  * this.
3618                  *
3619                  * The structure of this section of code (besides checking for
3620                  * errors and upgrading to utf8) is:
3621                  *    If the named character is of the form \N{U+...}, pass it
3622                  *      through if a pattern; otherwise convert the code point
3623                  *      to utf8
3624                  *    Otherwise must be some \N{NAME}: convert to
3625                  *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3626                  *
3627                  * Transliteration is an exception.  The conversion to utf8 is
3628                  * only done if the code point requires it to be representable.
3629                  *
3630                  * Here, 's' points to the 'N'; the test below is guaranteed to
3631                  * succeed if we are being called on a pattern, as we already
3632                  * know from a test above that the next character is a '{'.  A
3633                  * non-pattern \N must mean 'named character', which requires
3634                  * braces */
3635                 s++;
3636                 if (*s != '{') {
3637                     yyerror("Missing braces on \\N{}");
3638                     *d++ = '\0';
3639                     continue;
3640                 }
3641                 s++;
3642
3643                 /* If there is no matching '}', it is an error. */
3644                 if (! (e = strchr(s, '}'))) {
3645                     if (! PL_lex_inpat) {
3646                         yyerror("Missing right brace on \\N{}");
3647                     } else {
3648                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3649                     }
3650                     yyquit(); /* Have exhausted the input. */
3651                 }
3652
3653                 /* Here it looks like a named character */
3654
3655                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3656                     s += 2;         /* Skip to next char after the 'U+' */
3657                     if (PL_lex_inpat) {
3658
3659                         /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3660                         /* Check the syntax.  */
3661                         const char *orig_s;
3662                         orig_s = s - 5;
3663                         if (!isXDIGIT(*s)) {
3664                           bad_NU:
3665                             yyerror(
3666                                 "Invalid hexadecimal number in \\N{U+...}"
3667                             );
3668                             s = e + 1;
3669                             *d++ = '\0';
3670                             continue;
3671                         }
3672                         while (++s < e) {
3673                             if (isXDIGIT(*s))
3674                                 continue;
3675                             else if ((*s == '.' || *s == '_')
3676                                   && isXDIGIT(s[1]))
3677                                 continue;
3678                             goto bad_NU;
3679                         }
3680
3681                         /* Pass everything through unchanged.
3682                          * +1 is for the '}' */
3683                         Copy(orig_s, d, e - orig_s + 1, char);
3684                         d += e - orig_s + 1;
3685                     }
3686                     else {  /* Not a pattern: convert the hex to string */
3687                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3688                                 | PERL_SCAN_SILENT_ILLDIGIT
3689                                 | PERL_SCAN_DISALLOW_PREFIX;
3690                         STRLEN len = e - s;
3691                         uv = grok_hex(s, &len, &flags, NULL);
3692                         if (len == 0 || (len != (STRLEN)(e - s)))
3693                             goto bad_NU;
3694
3695                          /* For non-tr///, if the destination is not in utf8,
3696                           * unconditionally recode it to be so.  This is
3697                           * because \N{} implies Unicode semantics, and scalars
3698                           * have to be in utf8 to guarantee those semantics.
3699                           * tr/// doesn't care about Unicode rules, so no need
3700                           * there to upgrade to UTF-8 for small enough code
3701                           * points */
3702                         if (! has_utf8 && (   uv > 0xFF
3703                                            || PL_lex_inwhat != OP_TRANS))
3704                         {
3705                             /* See Note on sizing above.  */
3706                             const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1;
3707
3708                             SvCUR_set(sv, d - SvPVX_const(sv));
3709                             SvPOK_on(sv);
3710                             *d = '\0';
3711
3712                             if (utf8_variant_count == 0) {
3713                                 SvUTF8_on(sv);
3714                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3715                             }
3716                             else {
3717                                 sv_utf8_upgrade_flags_grow(
3718                                                sv,
3719                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3720                                                extra);
3721                                 d = SvPVX(sv) + SvCUR(sv);
3722                             }
3723
3724                             has_utf8 = TRUE;
3725                             has_above_latin1 = TRUE;
3726                         }
3727
3728                         /* Add the (Unicode) code point to the output. */
3729                         if (! has_utf8 || OFFUNI_IS_INVARIANT(uv)) {
3730                             *d++ = (char) LATIN1_TO_NATIVE(uv);
3731                         }
3732                         else {
3733                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3734                         }
3735                     }
3736                 }
3737                 else /* Here is \N{NAME} but not \N{U+...}. */
3738                      if ((res = get_and_check_backslash_N_name(s, e)))
3739                 {
3740                     STRLEN len;
3741                     const char *str = SvPV_const(res, len);
3742                     if (PL_lex_inpat) {
3743
3744                         if (! len) { /* The name resolved to an empty string */
3745                             Copy("\\N{}", d, 4, char);
3746                             d += 4;
3747                         }
3748                         else {
3749                             /* In order to not lose information for the regex
3750                             * compiler, pass the result in the specially made
3751                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3752                             * the code points in hex of each character
3753                             * returned by charnames */
3754
3755                             const char *str_end = str + len;
3756                             const STRLEN off = d - SvPVX_const(sv);
3757
3758                             if (! SvUTF8(res)) {
3759                                 /* For the non-UTF-8 case, we can determine the
3760                                  * exact length needed without having to parse
3761                                  * through the string.  Each character takes up
3762                                  * 2 hex digits plus either a trailing dot or
3763                                  * the "}" */
3764                                 const char initial_text[] = "\\N{U+";
3765                                 const STRLEN initial_len = sizeof(initial_text)
3766                                                            - 1;
3767                                 d = off + SvGROW(sv, off
3768                                                     + 3 * len
3769
3770                                                     /* +1 for trailing NUL */
3771                                                     + initial_len + 1
3772
3773                                                     + (STRLEN)(send - e));
3774                                 Copy(initial_text, d, initial_len, char);
3775                                 d += initial_len;
3776                                 while (str < str_end) {
3777                                     char hex_string[4];
3778                                     int len =
3779                                         my_snprintf(hex_string,
3780                                                   sizeof(hex_string),
3781                                                   "%02X.",
3782
3783                                                   /* The regex compiler is
3784                                                    * expecting Unicode, not
3785                                                    * native */
3786                                                   NATIVE_TO_LATIN1(*str));
3787                                     PERL_MY_SNPRINTF_POST_GUARD(len,
3788                                                            sizeof(hex_string));
3789                                     Copy(hex_string, d, 3, char);
3790                                     d += 3;
3791                                     str++;
3792                                 }
3793                                 d--;    /* Below, we will overwrite the final
3794                                            dot with a right brace */
3795                             }
3796                             else {
3797                                 STRLEN char_length; /* cur char's byte length */
3798
3799                                 /* and the number of bytes after this is
3800                                  * translated into hex digits */
3801                                 STRLEN output_length;
3802
3803                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3804                                  * for max('U+', '.'); and 1 for NUL */
3805                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3806
3807                                 /* Get the first character of the result. */
3808                                 U32 uv = utf8n_to_uvchr((U8 *) str,
3809                                                         len,
3810                                                         &char_length,
3811                                                         UTF8_ALLOW_ANYUV);
3812                                 /* Convert first code point to Unicode hex,
3813                                  * including the boiler plate before it. */
3814                                 output_length =
3815                                     my_snprintf(hex_string, sizeof(hex_string),
3816                                              "\\N{U+%X",
3817                                              (unsigned int) NATIVE_TO_UNI(uv));
3818
3819                                 /* Make sure there is enough space to hold it */
3820                                 d = off + SvGROW(sv, off
3821                                                     + output_length
3822                                                     + (STRLEN)(send - e)
3823                                                     + 2);       /* '}' + NUL */
3824                                 /* And output it */
3825                                 Copy(hex_string, d, output_length, char);
3826                                 d += output_length;
3827
3828                                 /* For each subsequent character, append dot and
3829                                 * its Unicode code point in hex */
3830                                 while ((str += char_length) < str_end) {
3831                                     const STRLEN off = d - SvPVX_const(sv);
3832                                     U32 uv = utf8n_to_uvchr((U8 *) str,
3833                                                             str_end - str,
3834                                                             &char_length,
3835                                                             UTF8_ALLOW_ANYUV);
3836                                     output_length =
3837                                         my_snprintf(hex_string,
3838                                              sizeof(hex_string),
3839                                              ".%X",
3840                                              (unsigned int) NATIVE_TO_UNI(uv));
3841
3842                                     d = off + SvGROW(sv, off
3843                                                         + output_length
3844                                                         + (STRLEN)(send - e)
3845                                                         + 2);   /* '}' +  NUL */
3846                                     Copy(hex_string, d, output_length, char);
3847                                     d += output_length;
3848                                 }
3849                             }
3850
3851                             *d++ = '}'; /* Done.  Add the trailing brace */
3852                         }
3853                     }
3854                     else { /* Here, not in a pattern.  Convert the name to a
3855                             * string. */
3856
3857                         if (PL_lex_inwhat == OP_TRANS) {
3858                             str = SvPV_const(res, len);
3859                             if (len > ((SvUTF8(res))
3860                                        ? UTF8SKIP(str)
3861                                        : 1U))
3862                             {
3863                                 yyerror(Perl_form(aTHX_
3864                                     "%.*s must not be a named sequence"
3865                                     " in transliteration operator",
3866                                         /*  +1 to include the "}" */
3867                                     (int) (e + 1 - start), start));
3868                                 *d++ = '\0';
3869                                 goto end_backslash_N;
3870                             }
3871
3872                             if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
3873                                 has_above_latin1 = TRUE;
3874                             }
3875
3876                         }
3877                         else if (! SvUTF8(res)) {
3878                             /* Make sure \N{} return is UTF-8.  This is because
3879                              * \N{} implies Unicode semantics, and scalars have
3880                              * to be in utf8 to guarantee those semantics; but
3881                              * not needed in tr/// */
3882                             sv_utf8_upgrade_flags(res, 0);
3883                             str = SvPV_const(res, len);
3884                         }
3885
3886                          /* Upgrade destination to be utf8 if this new
3887                           * component is */
3888                         if (! has_utf8 && SvUTF8(res)) {
3889                             /* See Note on sizing above.  */
3890                             const STRLEN extra = len + (send - s) + 1;
3891
3892                             SvCUR_set(sv, d - SvPVX_const(sv));
3893                             SvPOK_on(sv);
3894                             *d = '\0';
3895
3896                             if (utf8_variant_count == 0) {
3897                                 SvUTF8_on(sv);
3898                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3899                             }
3900                             else {
3901                                 sv_utf8_upgrade_flags_grow(sv,
3902                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3903                                                 extra);
3904                                 d = SvPVX(sv) + SvCUR(sv);
3905                             }
3906                             has_utf8 = TRUE;
3907                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3908
3909                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3910                              * set correctly here). */
3911                             const STRLEN extra = len + (send - e) + 1;
3912                             const STRLEN off = d - SvPVX_const(sv);
3913                             d = off + SvGROW(sv, off + extra);
3914                         }
3915                         Copy(str, d, len, char);
3916                         d += len;
3917                     }
3918
3919                     SvREFCNT_dec(res);
3920
3921                 } /* End \N{NAME} */
3922
3923               end_backslash_N:
3924 #ifdef EBCDIC
3925                 backslash_N++; /* \N{} is defined to be Unicode */
3926 #endif
3927                 s = e + 1;  /* Point to just after the '}' */
3928                 continue;
3929
3930             /* \c is a control character */
3931             case 'c':
3932                 s++;
3933                 if (s < send) {
3934                     *d++ = grok_bslash_c(*s, 1);
3935                 }
3936                 else {
3937                     yyerror("Missing control char name in \\c");
3938                     yyquit();   /* Are at end of input, no sense continuing */
3939                 }
3940 #ifdef EBCDIC
3941                 non_portable_endpoint++;
3942 #endif
3943                 break;
3944
3945             /* printf-style backslashes, formfeeds, newlines, etc */
3946             case 'b':
3947                 *d++ = '\b';
3948                 break;
3949             case 'n':
3950                 *d++ = '\n';
3951                 break;
3952             case 'r':
3953                 *d++ = '\r';
3954                 break;
3955             case 'f':
3956                 *d++ = '\f';
3957                 break;
3958             case 't':
3959                 *d++ = '\t';
3960                 break;
3961             case 'e':
3962                 *d++ = ESC_NATIVE;
3963                 break;
3964             case 'a':
3965                 *d++ = '\a';
3966                 break;
3967             } /* end switch */
3968
3969             s++;
3970             continue;
3971         } /* end if (backslash) */
3972
3973     default_action:
3974         /* Just copy the input to the output, though we may have to convert
3975          * to/from UTF-8.
3976          *
3977          * If the input has the same representation in UTF-8 as not, it will be
3978          * a single byte, and we don't care about UTF8ness; just copy the byte */
3979         if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
3980             *d++ = *s++;
3981         }
3982         else if (! this_utf8 && ! has_utf8) {
3983             /* If neither source nor output is UTF-8, is also a single byte,
3984              * just copy it; but this byte counts should we later have to
3985              * convert to UTF-8 */
3986             *d++ = *s++;
3987             utf8_variant_count++;
3988         }
3989         else if (this_utf8 && has_utf8) {   /* Both UTF-8, can just copy */
3990             const STRLEN len = UTF8SKIP(s);
3991
3992             /* We expect the source to have already been checked for
3993              * malformedness */
3994             assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
3995
3996             Copy(s, d, len, U8);
3997             d += len;
3998             s += len;
3999         }
4000         else { /* UTF8ness matters and doesn't match, need to convert */
4001             STRLEN len = 1;
4002             const UV nextuv   = (this_utf8)
4003                                 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
4004                                 : (UV) ((U8) *s);
4005             STRLEN need = UVCHR_SKIP(nextuv);
4006
4007             if (!has_utf8) {
4008                 SvCUR_set(sv, d - SvPVX_const(sv));
4009                 SvPOK_on(sv);
4010                 *d = '\0';
4011
4012                 /* See Note on sizing above. */
4013                 need += (STRLEN)(send - s) + 1;
4014
4015                 if (utf8_variant_count == 0) {
4016                     SvUTF8_on(sv);
4017                     d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4018                 }
4019                 else {
4020                     sv_utf8_upgrade_flags_grow(sv,
4021                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4022                                                need);
4023                     d = SvPVX(sv) + SvCUR(sv);
4024                 }
4025                 has_utf8 = TRUE;
4026             } else if (need > len) {
4027                 /* encoded value larger than old, may need extra space (NOTE:
4028                  * SvCUR() is not set correctly here).   See Note on sizing
4029                  * above.  */
4030                 const STRLEN extra = need + (send - s) + 1;
4031                 const STRLEN off = d - SvPVX_const(sv);
4032                 d = off + SvGROW(sv, off + extra);
4033             }
4034             s += len;
4035
4036             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
4037         }
4038     } /* while loop to process each character */
4039
4040     /* terminate the string and set up the sv */
4041     *d = '\0';
4042     SvCUR_set(sv, d - SvPVX_const(sv));
4043     if (SvCUR(sv) >= SvLEN(sv))
4044         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %" UVuf
4045                    " >= %" UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
4046
4047     SvPOK_on(sv);
4048     if (has_utf8) {
4049         SvUTF8_on(sv);
4050         if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
4051             PL_parser->lex_sub_op->op_private |=
4052                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
4053         }
4054     }
4055
4056     /* shrink the sv if we allocated more than we used */
4057     if (SvCUR(sv) + 5 < SvLEN(sv)) {
4058         SvPV_shrink_to_cur(sv);
4059     }
4060
4061     /* return the substring (via pl_yylval) only if we parsed anything */
4062     if (s > start) {
4063         char *s2 = start;
4064         for (; s2 < s; s2++) {
4065             if (*s2 == '\n')
4066                 COPLINE_INC_WITH_HERELINES;
4067         }
4068         SvREFCNT_inc_simple_void_NN(sv);
4069         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4070             && ! PL_parser->lex_re_reparsing)
4071         {
4072             const char *const key = PL_lex_inpat ? "qr" : "q";
4073             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4074             const char *type;
4075             STRLEN typelen;
4076
4077             if (PL_lex_inwhat == OP_TRANS) {
4078                 type = "tr";
4079                 typelen = 2;
4080             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4081                 type = "s";
4082                 typelen = 1;
4083             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4084                 type = "q";
4085                 typelen = 1;
4086             } else  {
4087                 type = "qq";
4088                 typelen = 2;
4089             }
4090
4091             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4092                                 type, typelen);
4093         }
4094         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4095     }
4096     LEAVE_with_name("scan_const");
4097     return s;
4098 }
4099
4100 /* S_intuit_more
4101  * Returns TRUE if there's more to the expression (e.g., a subscript),
4102  * FALSE otherwise.
4103  *
4104  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4105  *
4106  * ->[ and ->{ return TRUE
4107  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4108  * { and [ outside a pattern are always subscripts, so return TRUE
4109  * if we're outside a pattern and it's not { or [, then return FALSE
4110  * if we're in a pattern and the first char is a {
4111  *   {4,5} (any digits around the comma) returns FALSE
4112  * if we're in a pattern and the first char is a [
4113  *   [] returns FALSE
4114  *   [SOMETHING] has a funky algorithm to decide whether it's a
4115  *      character class or not.  It has to deal with things like
4116  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4117  * anything else returns TRUE
4118  */
4119
4120 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4121
4122 STATIC int
4123 S_intuit_more(pTHX_ char *s)
4124 {
4125     PERL_ARGS_ASSERT_INTUIT_MORE;
4126
4127     if (PL_lex_brackets)
4128         return TRUE;
4129     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4130         return TRUE;
4131     if (*s == '-' && s[1] == '>'
4132      && FEATURE_POSTDEREF_QQ_IS_ENABLED
4133      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4134         ||(s[2] == '@' && strchr("*[{",s[3])) ))
4135         return TRUE;
4136     if (*s != '{' && *s != '[')
4137         return FALSE;
4138     if (!PL_lex_inpat)
4139         return TRUE;
4140
4141     /* In a pattern, so maybe we have {n,m}. */
4142     if (*s == '{') {
4143         if (regcurly(s)) {
4144             return FALSE;
4145         }
4146         return TRUE;
4147     }
4148
4149     /* On the other hand, maybe we have a character class */
4150
4151     s++;
4152     if (*s == ']' || *s == '^')
4153         return FALSE;
4154     else {
4155         /* this is terrifying, and it works */
4156         int weight;
4157         char seen[256];
4158         const char * const send = strchr(s,']');
4159         unsigned char un_char, last_un_char;
4160         char tmpbuf[sizeof PL_tokenbuf * 4];
4161
4162         if (!send)              /* has to be an expression */
4163             return TRUE;
4164         weight = 2;             /* let's weigh the evidence */
4165
4166         if (*s == '$')
4167             weight -= 3;
4168         else if (isDIGIT(*s)) {
4169             if (s[1] != ']') {
4170                 if (isDIGIT(s[1]) && s[2] == ']')
4171                     weight -= 10;
4172             }
4173             else
4174                 weight -= 100;
4175         }
4176         Zero(seen,256,char);
4177         un_char = 255;
4178         for (; s < send; s++) {
4179             last_un_char = un_char;
4180             un_char = (unsigned char)*s;
4181             switch (*s) {
4182             case '@':
4183             case '&':
4184             case '$':
4185                 weight -= seen[un_char] * 10;
4186                 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4187                     int len;
4188                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4189                     len = (int)strlen(tmpbuf);
4190                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4191                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
4192                         weight -= 100;
4193                     else
4194                         weight -= 10;
4195                 }
4196                 else if (*s == '$'
4197                          && s[1]
4198                          && strchr("[#!%*<>()-=",s[1]))
4199                 {
4200                     if (/*{*/ strchr("])} =",s[2]))
4201                         weight -= 10;
4202                     else
4203                         weight -= 1;
4204                 }
4205                 break;
4206             case '\\':
4207                 un_char = 254;
4208                 if (s[1]) {
4209                     if (strchr("wds]",s[1]))
4210                         weight += 100;
4211                     else if (seen[(U8)'\''] || seen[(U8)'"'])
4212                         weight += 1;
4213                     else if (strchr("rnftbxcav",s[1]))
4214                         weight += 40;
4215                     else if (isDIGIT(s[1])) {
4216                         weight += 40;
4217                         while (s[1] && isDIGIT(s[1]))
4218                             s++;
4219                     }
4220                 }
4221                 else
4222                     weight += 100;
4223                 break;
4224             case '-':
4225                 if (s[1] == '\\')
4226                     weight += 50;
4227                 if (strchr("aA01! ",last_un_char))
4228                     weight += 30;
4229                 if (strchr("zZ79~",s[1]))
4230                     weight += 30;
4231                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4232                     weight -= 5;        /* cope with negative subscript */
4233                 break;
4234             default:
4235                 if (!isWORDCHAR(last_un_char)
4236                     && !(last_un_char == '$' || last_un_char == '@'
4237                          || last_un_char == '&')
4238                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4239                     char *d = s;
4240                     while (isALPHA(*s))
4241                         s++;
4242                     if (keyword(d, s - d, 0))
4243                         weight -= 150;
4244                 }
4245                 if (un_char == last_un_char + 1)
4246                     weight += 5;
4247                 weight -= seen[un_char];
4248                 break;
4249             }
4250             seen[un_char]++;
4251         }
4252         if (weight >= 0)        /* probably a character class */
4253             return FALSE;
4254     }
4255
4256     return TRUE;
4257 }
4258
4259 /*
4260  * S_intuit_method
4261  *
4262  * Does all the checking to disambiguate
4263  *   foo bar
4264  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4265  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4266  *
4267  * First argument is the stuff after the first token, e.g. "bar".
4268  *
4269  * Not a method if foo is a filehandle.
4270  * Not a method if foo is a subroutine prototyped to take a filehandle.
4271  * Not a method if it's really "Foo $bar"
4272  * Method if it's "foo $bar"
4273  * Not a method if it's really "print foo $bar"
4274  * Method if it's really "foo package::" (interpreted as package->foo)
4275  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4276  * Not a method if bar is a filehandle or package, but is quoted with
4277  *   =>
4278  */
4279
4280 STATIC int
4281 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4282 {
4283     char *s = start + (*start == '$');
4284     char tmpbuf[sizeof PL_tokenbuf];
4285     STRLEN len;
4286     GV* indirgv;
4287         /* Mustn't actually add anything to a symbol table.
4288            But also don't want to "initialise" any placeholder
4289            constants that might already be there into full
4290            blown PVGVs with attached PVCV.  */
4291     GV * const gv =
4292         ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4293
4294     PERL_ARGS_ASSERT_INTUIT_METHOD;
4295
4296     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4297             return 0;
4298     if (cv && SvPOK(cv)) {
4299         const char *proto = CvPROTO(cv);
4300         if (proto) {
4301             while (*proto && (isSPACE(*proto) || *proto == ';'))
4302                 proto++;
4303             if (*proto == '*')
4304                 return 0;
4305         }
4306     }
4307
4308     if (*start == '$') {
4309         SSize_t start_off = start - SvPVX(PL_linestr);
4310         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4311             || isUPPER(*PL_tokenbuf))
4312             return 0;
4313         /* this could be $# */
4314         if (isSPACE(*s))
4315             s = skipspace(s);
4316         PL_bufptr = SvPVX(PL_linestr) + start_off;
4317         PL_expect = XREF;
4318         return *s == '(' ? FUNCMETH : METHOD;
4319     }
4320
4321     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4322     /* start is the beginning of the possible filehandle/object,
4323      * and s is the end of it
4324      * tmpbuf is a copy of it (but with single quotes as double colons)
4325      */
4326
4327     if (!keyword(tmpbuf, len, 0)) {
4328         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4329             len -= 2;
4330             tmpbuf[len] = '\0';
4331             goto bare_package;
4332         }
4333         indirgv = gv_fetchpvn_flags(tmpbuf, len,
4334                                     GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4335                                     SVt_PVCV);
4336         if (indirgv && SvTYPE(indirgv) != SVt_NULL
4337          && (!isGV(indirgv) || GvCVu(indirgv)))
4338             return 0;
4339         /* filehandle or package name makes it a method */
4340         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4341             s = skipspace(s);
4342             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4343                 return 0;       /* no assumptions -- "=>" quotes bareword */
4344       bare_package:
4345             NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4346                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4347             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4348             PL_expect = XTERM;
4349             force_next(BAREWORD);
4350             PL_bufptr = s;
4351             return *s == '(' ? FUNCMETH : METHOD;
4352         }
4353     }
4354     return 0;
4355 }
4356
4357 /* Encoded script support. filter_add() effectively inserts a
4358  * 'pre-processing' function into the current source input stream.
4359  * Note that the filter function only applies to the current source file
4360  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4361  *
4362  * The datasv parameter (which may be NULL) can be used to pass
4363  * private data to this instance of the filter. The filter function
4364  * can recover the SV using the FILTER_DATA macro and use it to
4365  * store private buffers and state information.
4366  *
4367  * The supplied datasv parameter is upgraded to a PVIO type
4368  * and the IoDIRP/IoANY field is used to store the function pointer,
4369  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4370  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4371  * private use must be set using malloc'd pointers.
4372  */
4373
4374 SV *
4375 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4376 {
4377     if (!funcp)
4378         return NULL;
4379
4380     if (!PL_parser)
4381         return NULL;
4382
4383     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4384         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4385
4386     if (!PL_rsfp_filters)
4387         PL_rsfp_filters = newAV();
4388     if (!datasv)
4389         datasv = newSV(0);
4390     SvUPGRADE(datasv, SVt_PVIO);
4391     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4392     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4393     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4394                           FPTR2DPTR(void *, IoANY(datasv)),
4395                           SvPV_nolen(datasv)));
4396     av_unshift(PL_rsfp_filters, 1);
4397     av_store(PL_rsfp_filters, 0, datasv) ;
4398     if (
4399         !PL_parser->filtered
4400      && PL_parser->lex_flags & LEX_EVALBYTES
4401      && PL_bufptr < PL_bufend
4402     ) {
4403         const char *s = PL_bufptr;
4404         while (s < PL_bufend) {
4405             if (*s == '\n') {
4406                 SV *linestr = PL_parser->linestr;
4407                 char *buf = SvPVX(linestr);
4408                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4409                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4410                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4411                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4412                 STRLEN const last_uni_pos =
4413                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4414                 STRLEN const last_lop_pos =
4415                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4416                 av_push(PL_rsfp_filters, linestr);
4417                 PL_parser->linestr =
4418                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4419                 buf = SvPVX(PL_parser->linestr);
4420                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4421                 PL_parser->bufptr = buf + bufptr_pos;
4422                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4423                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4424                 PL_parser->linestart = buf + linestart_pos;
4425                 if (PL_parser->last_uni)
4426                     PL_parser->last_uni = buf + last_uni_pos;
4427                 if (PL_parser->last_lop)
4428                     PL_parser->last_lop = buf + last_lop_pos;
4429                 SvLEN(linestr) = SvCUR(linestr);
4430                 SvCUR(linestr) = s-SvPVX(linestr);
4431                 PL_parser->filtered = 1;
4432                 break;
4433             }
4434             s++;
4435         }
4436     }
4437     return(datasv);
4438 }
4439
4440
4441 /* Delete most recently added instance of this filter function. */
4442 void
4443 Perl_filter_del(pTHX_ filter_t funcp)
4444 {
4445     SV *datasv;
4446
4447     PERL_ARGS_ASSERT_FILTER_DEL;
4448
4449 #ifdef DEBUGGING
4450     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4451                           FPTR2DPTR(void*, funcp)));
4452 #endif
4453     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4454         return;
4455     /* if filter is on top of stack (usual case) just pop it off */
4456     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4457     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4458         sv_free(av_pop(PL_rsfp_filters));
4459
4460         return;
4461     }
4462     /* we need to search for the correct entry and clear it     */
4463     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4464 }
4465
4466
4467 /* Invoke the idxth filter function for the current rsfp.        */
4468 /* maxlen 0 = read one text line */
4469 I32
4470 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4471 {
4472     filter_t funcp;
4473     SV *datasv = NULL;
4474     /* This API is bad. It should have been using unsigned int for maxlen.
4475        Not sure if we want to change the API, but if not we should sanity
4476        check the value here.  */
4477     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4478
4479     PERL_ARGS_ASSERT_FILTER_READ;
4480
4481     if (!PL_parser || !PL_rsfp_filters)
4482         return -1;
4483     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4484         /* Provide a default input filter to make life easy.    */
4485         /* Note that we append to the line. This is handy.      */
4486         DEBUG_P(PerlIO_printf(Perl_debug_log,
4487                               "filter_read %d: from rsfp\n", idx));
4488         if (correct_length) {
4489             /* Want a block */
4490             int len ;
4491             const int old_len = SvCUR(buf_sv);
4492
4493             /* ensure buf_sv is large enough */
4494             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4495             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4496                                    correct_length)) <= 0) {
4497                 if (PerlIO_error(PL_rsfp))
4498                     return -1;          /* error */
4499                 else
4500                     return 0 ;          /* end of file */
4501             }
4502             SvCUR_set(buf_sv, old_len + len) ;
4503             SvPVX(buf_sv)[old_len + len] = '\0';
4504         } else {
4505             /* Want a line */
4506             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4507                 if (PerlIO_error(PL_rsfp))
4508                     return -1;          /* error */
4509                 else
4510                     return 0 ;          /* end of file */
4511             }
4512         }
4513         return SvCUR(buf_sv);
4514     }
4515     /* Skip this filter slot if filter has been deleted */
4516     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4517         DEBUG_P(PerlIO_printf(Perl_debug_log,
4518                               "filter_read %d: skipped (filter deleted)\n",
4519                               idx));
4520         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4521     }
4522     if (SvTYPE(datasv) != SVt_PVIO) {
4523         if (correct_length) {
4524             /* Want a block */
4525             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4526             if (!remainder) return 0; /* eof */
4527             if (correct_length > remainder) correct_length = remainder;
4528             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4529             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4530         } else {
4531             /* Want a line */
4532             const char *s = SvEND(datasv);
4533             const char *send = SvPVX(datasv) + SvLEN(datasv);
4534             while (s < send) {
4535                 if (*s == '\n') {
4536                     s++;
4537                     break;
4538                 }
4539                 s++;
4540             }
4541             if (s == send) return 0; /* eof */
4542             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4543             SvCUR_set(datasv, s-SvPVX(datasv));
4544         }
4545         return SvCUR(buf_sv);
4546     }
4547     /* Get function pointer hidden within datasv        */
4548     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4549     DEBUG_P(PerlIO_printf(Perl_debug_log,
4550                           "filter_read %d: via function %p (%s)\n",
4551                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4552     /* Call function. The function is expected to       */
4553     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4554     /* Return: <0:error, =0:eof, >0:not eof             */
4555     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4556 }
4557
4558 STATIC char *
4559 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4560 {
4561     PERL_ARGS_ASSERT_FILTER_GETS;
4562
4563 #ifdef PERL_CR_FILTER
4564     if (!PL_rsfp_filters) {
4565         filter_add(S_cr_textfilter,NULL);
4566     }
4567 #endif
4568     if (PL_rsfp_filters) {
4569         if (!append)
4570             SvCUR_set(sv, 0);   /* start with empty line        */
4571         if (FILTER_READ(0, sv, 0) > 0)
4572             return ( SvPVX(sv) ) ;
4573         else
4574             return NULL ;
4575     }
4576     else
4577         return (sv_gets(sv, PL_rsfp, append));
4578 }
4579
4580 STATIC HV *
4581 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4582 {
4583     GV *gv;
4584
4585     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4586
4587     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4588         return PL_curstash;
4589
4590     if (len > 2
4591         && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4592         && (gv = gv_fetchpvn_flags(pkgname,
4593                                    len,
4594                                    ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4595     {
4596         return GvHV(gv);                        /* Foo:: */
4597     }
4598
4599     /* use constant CLASS => 'MyClass' */
4600     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4601     if (gv && GvCV(gv)) {
4602         SV * const sv = cv_const_sv(GvCV(gv));
4603         if (sv)
4604             return gv_stashsv(sv, 0);
4605     }
4606
4607     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4608 }
4609
4610
4611 STATIC char *
4612 S_tokenize_use(pTHX_ int is_use, char *s) {
4613     PERL_ARGS_ASSERT_TOKENIZE_USE;
4614
4615     if (PL_expect != XSTATE)
4616         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4617                     is_use ? "use" : "no"));
4618     PL_expect = XTERM;
4619     s = skipspace(s);
4620     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4621         s = force_version(s, TRUE);
4622         if (*s == ';' || *s == '}'
4623                 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4624             NEXTVAL_NEXTTOKE.opval = NULL;
4625             force_next(BAREWORD);
4626         }
4627         else if (*s == 'v') {
4628             s = force_word(s,BAREWORD,FALSE,TRUE);
4629             s = force_version(s, FALSE);
4630         }
4631     }
4632     else {
4633         s = force_word(s,BAREWORD,FALSE,TRUE);
4634         s = force_version(s, FALSE);
4635     }
4636     pl_yylval.ival = is_use;
4637     return s;
4638 }
4639 #ifdef DEBUGGING
4640     static const char* const exp_name[] =
4641         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4642           "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4643           "SIGVAR", "TERMORDORDOR"
4644         };
4645 #endif
4646
4647 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4648 STATIC bool
4649 S_word_takes_any_delimiter(char *p, STRLEN len)
4650 {
4651     return (len == 1 && strchr("msyq", p[0]))
4652             || (len == 2
4653                 && ((p[0] == 't' && p[1] == 'r')
4654                     || (p[0] == 'q' && strchr("qwxr", p[1]))));
4655 }
4656
4657 static void
4658 S_check_scalar_slice(pTHX_ char *s)
4659 {
4660     s++;
4661     while (SPACE_OR_TAB(*s)) s++;
4662     if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4663                                                              PL_bufend,
4664                                                              UTF))
4665     {
4666         return;
4667     }
4668     while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4669            || (*s && strchr(" \t$#+-'\"", *s)))
4670     {
4671         s += UTF ? UTF8SKIP(s) : 1;
4672     }
4673     if (*s == '}' || *s == ']')
4674         pl_yylval.ival = OPpSLICEWARNING;
4675 }
4676
4677 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4678 static void
4679 S_lex_token_boundary(pTHX)
4680 {
4681     PL_oldoldbufptr = PL_oldbufptr;
4682     PL_oldbufptr = PL_bufptr;
4683 }
4684
4685 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
4686 static char *
4687 S_vcs_conflict_marker(pTHX_ char *s)
4688 {
4689     lex_token_boundary();
4690     PL_bufptr = s;
4691     yyerror("Version control conflict marker");
4692     while (s < PL_bufend && *s != '\n')
4693         s++;
4694     return s;
4695 }
4696
4697 /*
4698   yylex
4699
4700   Works out what to call the token just pulled out of the input
4701   stream.  The yacc parser takes care of taking the ops we return and
4702   stitching them into a tree.
4703
4704   Returns:
4705     The type of the next token
4706
4707   Structure:
4708       Check if we have already built the token; if so, use it.
4709       Switch based on the current state:
4710           - if we have a case modifier in a string, deal with that
4711           - handle other cases of interpolation inside a string
4712           - scan the next line if we are inside a format
4713       In the normal state, switch on the next character:
4714           - default:
4715             if alphabetic, go to key lookup
4716             unrecognized character - croak
4717           - 0/4/26: handle end-of-line or EOF
4718           - cases for whitespace
4719           - \n and #: handle comments and line numbers
4720           - various operators, brackets and sigils
4721           - numbers
4722           - quotes
4723           - 'v': vstrings (or go to key lookup)
4724           - 'x' repetition operator (or go to key lookup)
4725           - other ASCII alphanumerics (key lookup begins here):
4726               word before => ?
4727               keyword plugin
4728               scan built-in keyword (but do nothing with it yet)
4729               check for statement label
4730               check for lexical subs
4731                   goto just_a_word if there is one
4732               see whether built-in keyword is overridden
4733               switch on keyword number:
4734                   - default: just_a_word:
4735                       not a built-in keyword; handle bareword lookup
4736                       disambiguate between method and sub call
4737                       fall back to bareword
4738                   - cases for built-in keywords
4739 */
4740
4741
4742 int
4743 Perl_yylex(pTHX)
4744 {
4745     dVAR;
4746     char *s = PL_bufptr;
4747     char *d;
4748     STRLEN len;
4749     bool bof = FALSE;
4750     const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4751     U8 formbrack = 0;
4752     U32 fake_eof = 0;
4753
4754     /* orig_keyword, gvp, and gv are initialized here because
4755      * jump to the label just_a_word_zero can bypass their
4756      * initialization later. */
4757     I32 orig_keyword = 0;
4758     GV *gv = NULL;
4759     GV **gvp = NULL;
4760
4761     if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
4762         const U8* first_bad_char_loc;
4763         if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
4764                                                         PL_bufend - PL_bufptr,
4765                                                         &first_bad_char_loc)))
4766         {
4767             _force_out_malformed_utf8_message(first_bad_char_loc,
4768                                               (U8 *) PL_bufend,
4769                                               0,
4770                                               1 /* 1 means die */ );
4771             NOT_REACHED; /* NOTREACHED */
4772         }
4773         PL_parser->recheck_utf8_validity = FALSE;
4774     }
4775     DEBUG_T( {
4776         SV* tmp = newSVpvs("");
4777         PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
4778             (IV)CopLINE(PL_curcop),
4779             lex_state_names[PL_lex_state],
4780             exp_name[PL_expect],
4781             pv_display(tmp, s, strlen(s), 0, 60));
4782         SvREFCNT_dec(tmp);
4783     } );
4784
4785     /* when we've already built the next token, just pull it out of the queue */
4786     if (PL_nexttoke) {
4787         PL_nexttoke--;
4788         pl_yylval = PL_nextval[PL_nexttoke];
4789         {
4790             I32 next_type;
4791             next_type = PL_nexttype[PL_nexttoke];
4792             if (next_type & (7<<24)) {
4793                 if (next_type & (1<<24)) {
4794                     if (PL_lex_brackets > 100)
4795                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4796                     PL_lex_brackstack[PL_lex_brackets++] =
4797                         (char) ((next_type >> 16) & 0xff);
4798                 }
4799                 if (next_type & (2<<24))
4800                     PL_lex_allbrackets++;
4801                 if (next_type & (4<<24))
4802                     PL_lex_allbrackets--;
4803                 next_type &= 0xffff;
4804             }
4805             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4806         }
4807     }
4808
4809     switch (PL_lex_state) {
4810     case LEX_NORMAL:
4811     case LEX_INTERPNORMAL:
4812         break;
4813
4814     /* interpolated case modifiers like \L \U, including \Q and \E.
4815        when we get here, PL_bufptr is at the \
4816     */
4817     case LEX_INTERPCASEMOD:
4818 #ifdef DEBUGGING
4819         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4820             Perl_croak(aTHX_
4821                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4822                        PL_bufptr, PL_bufend, *PL_bufptr);
4823 #endif
4824         /* handle \E or end of string */
4825         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4826             /* if at a \E */
4827             if (PL_lex_casemods) {
4828                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4829                 PL_lex_casestack[PL_lex_casemods] = '\0';
4830
4831                 if (PL_bufptr != PL_bufend
4832                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4833                         || oldmod == 'F')) {
4834                     PL_bufptr += 2;
4835                     PL_lex_state = LEX_INTERPCONCAT;
4836                 }
4837                 PL_lex_allbrackets--;
4838                 return REPORT(')');
4839             }
4840             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4841                /* Got an unpaired \E */
4842                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4843                         "Useless use of \\E");
4844             }
4845             if (PL_bufptr != PL_bufend)
4846                 PL_bufptr += 2;
4847             PL_lex_state = LEX_INTERPCONCAT;
4848             return yylex();
4849         }
4850         else {
4851             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4852               "### Saw case modifier\n"); });
4853             s = PL_bufptr + 1;
4854             if (s[1] == '\\' && s[2] == 'E') {
4855                 PL_bufptr = s + 3;
4856                 PL_lex_state = LEX_INTERPCONCAT;
4857                 return yylex();
4858             }
4859             else {
4860                 I32 tmp;
4861                 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4862                     tmp = *s, *s = s[2], s[2] = (char)tmp;      /* misordered... */
4863                 if ((*s == 'L' || *s == 'U' || *s == 'F')
4864                     && (strpbrk(PL_lex_casestack, "LUF")))
4865                 {
4866                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4867                     PL_lex_allbrackets--;
4868                     return REPORT(')');
4869                 }
4870                 if (PL_lex_casemods > 10)
4871                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4872                 PL_lex_casestack[PL_lex_casemods++] = *s;
4873                 PL_lex_casestack[PL_lex_casemods] = '\0';
4874                 PL_lex_state = LEX_INTERPCONCAT;
4875                 NEXTVAL_NEXTTOKE.ival = 0;
4876                 force_next((2<<24)|'(');
4877                 if (*s == 'l')
4878                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4879                 else if (*s == 'u')
4880                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4881                 else if (*s == 'L')
4882                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4883                 else if (*s == 'U')
4884                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4885                 else if (*s == 'Q')
4886                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4887                 else if (*s == 'F')
4888                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4889                 else
4890                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4891                 PL_bufptr = s + 1;
4892             }
4893             force_next(FUNC);
4894             if (PL_lex_starts) {
4895                 s = PL_bufptr;
4896                 PL_lex_starts = 0;
4897                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4898                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4899                     TOKEN(',');
4900                 else
4901                     AopNOASSIGN(OP_CONCAT);
4902             }
4903             else
4904                 return yylex();
4905         }
4906
4907     case LEX_INTERPPUSH:
4908         return REPORT(sublex_push());
4909
4910     case LEX_INTERPSTART:
4911         if (PL_bufptr == PL_bufend)
4912             return REPORT(sublex_done());
4913         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4914               "### Interpolated variable\n"); });
4915         PL_expect = XTERM;
4916         /* for /@a/, we leave the joining for the regex engine to do
4917          * (unless we're within \Q etc) */
4918         PL_lex_dojoin = (*PL_bufptr == '@'
4919                             && (!PL_lex_inpat || PL_lex_casemods));
4920         PL_lex_state = LEX_INTERPNORMAL;
4921         if (PL_lex_dojoin) {
4922             NEXTVAL_NEXTTOKE.ival = 0;
4923             force_next(',');
4924             force_ident("\"", '$');
4925             NEXTVAL_NEXTTOKE.ival = 0;
4926             force_next('$');
4927             NEXTVAL_NEXTTOKE.ival = 0;
4928             force_next((2<<24)|'(');
4929             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4930             force_next(FUNC);
4931         }
4932         /* Convert (?{...}) and friends to 'do {...}' */
4933         if (PL_lex_inpat && *PL_bufptr == '(') {
4934             PL_parser->lex_shared->re_eval_start = PL_bufptr;
4935             PL_bufptr += 2;
4936             if (*PL_bufptr != '{')
4937                 PL_bufptr++;
4938             PL_expect = XTERMBLOCK;
4939             force_next(DO);
4940         }
4941
4942         if (PL_lex_starts++) {
4943             s = PL_bufptr;
4944             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4945             if (!PL_lex_casemods && PL_lex_inpat)
4946                 TOKEN(',');
4947             else
4948                 AopNOASSIGN(OP_CONCAT);
4949         }
4950         return yylex();
4951
4952     case LEX_INTERPENDMAYBE:
4953         if (intuit_more(PL_bufptr)) {
4954             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4955             break;
4956         }
4957         /* FALLTHROUGH */
4958
4959     case LEX_INTERPEND:
4960         if (PL_lex_dojoin) {
4961             const U8 dojoin_was = PL_lex_dojoin;
4962             PL_lex_dojoin = FALSE;
4963             PL_lex_state = LEX_INTERPCONCAT;
4964             PL_lex_allbrackets--;
4965             return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN);
4966         }
4967         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4968             && SvEVALED(PL_lex_repl))
4969         {
4970             if (PL_bufptr != PL_bufend)
4971                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4972             PL_lex_repl = NULL;
4973         }
4974         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
4975            re_eval_str.  If the here-doc body’s length equals the previous
4976            value of re_eval_start, re_eval_start will now be null.  So
4977            check re_eval_str as well. */
4978         if (PL_parser->lex_shared->re_eval_start
4979          || PL_parser->lex_shared->re_eval_str) {
4980             SV *sv;
4981             if (*PL_bufptr != ')')
4982                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4983             PL_bufptr++;
4984             /* having compiled a (?{..}) expression, return the original
4985              * text too, as a const */
4986             if (PL_parser->lex_shared->re_eval_str) {
4987                 sv = PL_parser->lex_shared->re_eval_str;
4988                 PL_parser->lex_shared->re_eval_str = NULL;
4989                 SvCUR_set(sv,
4990                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4991                 SvPV_shrink_to_cur(sv);
4992             }
4993             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4994                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
4995             NEXTVAL_NEXTTOKE.opval =
4996                     newSVOP(OP_CONST, 0,
4997                                  sv);
4998             force_next(THING);
4999             PL_parser->lex_shared->re_eval_start = NULL;
5000             PL_expect = XTERM;
5001             return REPORT(',');
5002         }
5003
5004         /* FALLTHROUGH */
5005     case LEX_INTERPCONCAT:
5006 #ifdef DEBUGGING
5007         if (PL_lex_brackets)
5008             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5009                        (long) PL_lex_brackets);
5010 #endif
5011         if (PL_bufptr == PL_bufend)
5012             return REPORT(sublex_done());
5013
5014         /* m'foo' still needs to be parsed for possible (?{...}) */
5015         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5016             SV *sv = newSVsv(PL_linestr);
5017             sv = tokeq(sv);
5018             pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
5019             s = PL_bufend;
5020         }
5021         else {
5022             int save_error_count = PL_error_count;
5023
5024             s = scan_const(PL_bufptr);
5025
5026             /* Set flag if this was a pattern and there were errors.  op.c will
5027              * refuse to compile a pattern with this flag set.  Otherwise, we
5028              * could get segfaults, etc. */
5029             if (PL_lex_inpat && PL_error_count > save_error_count) {
5030                 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
5031             }
5032             if (*s == '\\')
5033                 PL_lex_state = LEX_INTERPCASEMOD;
5034             else
5035                 PL_lex_state = LEX_INTERPSTART;
5036         }
5037
5038         if (s != PL_bufptr) {
5039             NEXTVAL_NEXTTOKE = pl_yylval;
5040             PL_expect = XTERM;
5041             force_next(THING);
5042             if (PL_lex_starts++) {
5043                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5044                 if (!PL_lex_casemods && PL_lex_inpat)
5045                     TOKEN(',');
5046                 else
5047                     AopNOASSIGN(OP_CONCAT);
5048             }
5049             else {
5050                 PL_bufptr = s;
5051                 return yylex();
5052             }
5053         }
5054
5055         return yylex();
5056     case LEX_FORMLINE:
5057         s = scan_formline(PL_bufptr);
5058         if (!PL_lex_formbrack)
5059         {
5060             formbrack = 1;
5061             goto rightbracket;
5062         }
5063         PL_bufptr = s;
5064         return yylex();
5065     }
5066
5067     /* We really do *not* want PL_linestr ever becoming a COW. */
5068     assert (!SvIsCOW(PL_linestr));
5069     s = PL_bufptr;
5070     PL_oldoldbufptr = PL_oldbufptr;
5071     PL_oldbufptr = s;
5072     PL_parser->saw_infix_sigil = 0;
5073
5074     if (PL_in_my == KEY_sigvar) {
5075         /* we expect the sigil and optional var name part of a
5076          * signature element here. Since a '$' is not necessarily
5077          * followed by a var name, handle it specially here; the general
5078          * yylex code would otherwise try to interpret whatever follows
5079          * as a var; e.g. ($, ...) would be seen as the var '$,'
5080          */
5081
5082         U8 sigil;
5083
5084         s = skipspace(s);
5085         sigil = *s++;
5086         PL_bufptr = s; /* for error reporting */
5087         switch (sigil) {
5088         case '$':
5089         case '@':
5090         case '%':
5091             /* spot stuff that looks like an prototype */
5092             if (strchr("$:@%&*;\\[]", *s)) {
5093                 yyerror("Illegal character following sigil in a subroutine signature");
5094                 break;
5095             }
5096             /* '$#' is banned, while '$ # comment' isn't */
5097             if (*s == '#') {
5098                 yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
5099                 break;
5100             }
5101             s = skipspace(s);
5102             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5103                 char *dest = PL_tokenbuf + 1;
5104                 /* read var name, including sigil, into PL_tokenbuf */
5105                 PL_tokenbuf[0] = sigil;
5106                 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
5107                     0, cBOOL(UTF), FALSE);
5108                 *dest = '\0';
5109                 assert(PL_tokenbuf[1]); /* we have a variable name */
5110                 NEXTVAL_NEXTTOKE.ival = sigil;
5111                 force_next('p'); /* force a signature pending identifier */
5112             }
5113             else
5114                 PL_in_my = 0;
5115             PL_expect = XOPERATOR;
5116             break;
5117
5118         case ')':
5119             PL_expect = XBLOCK;
5120             break;
5121         case ',': /* handle ($a,,$b) */
5122             break;
5123
5124         default:
5125             PL_in_my = 0;
5126             yyerror("A signature parameter must start with '$', '@' or '%'");
5127             /* very crude error recovery: skip to likely next signature
5128              * element */
5129             while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5130                 s++;
5131             break;
5132         }
5133         TOKEN(sigil);
5134     }
5135
5136   retry:
5137     switch (*s) {
5138     default:
5139         if (UTF) {
5140             if (isIDFIRST_utf8_safe(s, PL_bufend)) {
5141                 goto keylookup;
5142             }
5143         }
5144         else if (isALNUMC(*s)) {
5145             goto keylookup;
5146         }
5147     {
5148         SV *dsv = newSVpvs_flags("", SVs_TEMP);
5149         const char *c;
5150         if (UTF) {
5151             STRLEN skiplen = UTF8SKIP(s);
5152             STRLEN stravail = PL_bufend - s;
5153             c = sv_uni_display(dsv, newSVpvn_flags(s,
5154                                                    skiplen > stravail ? stravail : skiplen,
5155                                                    SVs_TEMP | SVf_UTF8),
5156                                10, UNI_DISPLAY_ISPRINT);
5157         }
5158         else {
5159             c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5160         }
5161         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5162         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5163             d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)PL_linestart) : s - UNRECOGNIZED_PRECEDE_COUNT;
5164         } else {
5165             d = PL_linestart;
5166         }
5167         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
5168                           UTF8fARG(UTF, (s - d), d),
5169                          (int) len + 1);
5170     }
5171     case 4:
5172     case 26:
5173         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
5174     case 0:
5175         if ((!PL_rsfp || PL_lex_inwhat)
5176          && (!PL_parser->filtered || s+1 < PL_bufend)) {
5177             PL_last_uni = 0;
5178             PL_last_lop = 0;
5179             if (PL_lex_brackets
5180                 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
5181             {
5182                 yyerror((const char *)
5183                         (PL_lex_formbrack
5184                          ? "Format not terminated"
5185                          : "Missing right curly or square bracket"));
5186             }
5187             DEBUG_T( { PerlIO_printf(Perl_debug_log,
5188                         "### Tokener got EOF\n");
5189             } );
5190             TOKEN(0);
5191         }
5192         if (s++ < PL_bufend)
5193             goto retry;                 /* ignore stray nulls */
5194         PL_last_uni = 0;
5195         PL_last_lop = 0;
5196         if (!PL_in_eval && !PL_preambled) {
5197             PL_preambled = TRUE;
5198             if (PL_perldb) {
5199                 /* Generate a string of Perl code to load the debugger.
5200                  * If PERL5DB is set, it will return the contents of that,
5201                  * otherwise a compile-time require of perl5db.pl.  */
5202
5203                 const char * const pdb = PerlEnv_getenv("PERL5DB");
5204
5205                 if (pdb) {
5206                     sv_setpv(PL_linestr, pdb);
5207                     sv_catpvs(PL_linestr,";");
5208                 } else {
5209                     SETERRNO(0,SS_NORMAL);
5210                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5211                 }
5212                 PL_parser->preambling = CopLINE(PL_curcop);
5213             } else
5214                 SvPVCLEAR(PL_linestr);
5215             if (PL_preambleav) {
5216                 SV **svp = AvARRAY(PL_preambleav);
5217                 SV **const end = svp + AvFILLp(PL_preambleav);
5218                 while(svp <= end) {
5219                     sv_catsv(PL_linestr, *svp);
5220                     ++svp;
5221                     sv_catpvs(PL_linestr, ";");
5222                 }
5223                 sv_free(MUTABLE_SV(PL_preambleav));
5224                 PL_preambleav = NULL;
5225             }
5226             if (PL_minus_E)
5227                 sv_catpvs(PL_linestr,
5228                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5229             if (PL_minus_n || PL_minus_p) {
5230                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5231                 if (PL_minus_l)
5232                     sv_catpvs(PL_linestr,"chomp;");
5233                 if (PL_minus_a) {
5234                     if (PL_minus_F) {
5235                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5236                              || *PL_splitstr == '"')
5237                               && strchr(PL_splitstr + 1, *PL_splitstr))
5238                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5239                         else {
5240                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5241                                bytes can be used as quoting characters.  :-) */
5242                             const char *splits = PL_splitstr;
5243                             sv_catpvs(PL_linestr, "our @F=split(q\0");
5244                             do {
5245                                 /* Need to \ \s  */
5246                                 if (*splits == '\\')
5247                                     sv_catpvn(PL_linestr, splits, 1);
5248                                 sv_catpvn(PL_linestr, splits, 1);
5249                             } while (*splits++);
5250                             /* This loop will embed the trailing NUL of
5251                                PL_linestr as the last thing it does before
5252                                terminating.  */
5253                             sv_catpvs(PL_linestr, ");");
5254                         }
5255                     }
5256                     else
5257                         sv_catpvs(PL_linestr,"our @F=split(' ');");
5258                 }
5259             }
5260             sv_catpvs(PL_linestr, "\n");
5261             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5262             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5263             PL_last_lop = PL_last_uni = NULL;
5264             if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
5265                 update_debugger_info(PL_linestr, NULL, 0);
5266             goto retry;
5267         }
5268         do {
5269             fake_eof = 0;
5270             bof = cBOOL(PL_rsfp);
5271             if (0) {
5272               fake_eof:
5273                 fake_eof = LEX_FAKE_EOF;
5274             }
5275             PL_bufptr = PL_bufend;
5276             COPLINE_INC_WITH_HERELINES;
5277             if (!lex_next_chunk(fake_eof)) {
5278                 CopLINE_dec(PL_curcop);
5279                 s = PL_bufptr;
5280                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
5281             }
5282             CopLINE_dec(PL_curcop);
5283             s = PL_bufptr;
5284             /* If it looks like the start of a BOM or raw UTF-16,
5285              * check if it in fact is. */
5286             if (bof && PL_rsfp
5287                 && (*s == 0
5288                     || *(U8*)s == BOM_UTF8_FIRST_BYTE
5289                         || *(U8*)s >= 0xFE
5290                         || s[1] == 0))
5291             {
5292                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5293                 bof = (offset == (Off_t)SvCUR(PL_linestr));
5294 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5295                 /* offset may include swallowed CR */
5296                 if (!bof)
5297                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5298 #endif
5299                 if (bof) {
5300                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5301                     s = swallow_bom((U8*)s);
5302                 }
5303             }
5304             if (PL_parser->in_pod) {
5305                 /* Incest with pod. */
5306                 if (*s == '=' && strEQs(s, "=cut") && !isALPHA(s[4])) {
5307                     SvPVCLEAR(PL_linestr);
5308                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5309                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5310                     PL_last_lop = PL_last_uni = NULL;
5311                     PL_parser->in_pod = 0;
5312                 }
5313             }
5314             if (PL_rsfp || PL_parser->filtered)
5315                 incline(s);
5316         } while (PL_parser->in_pod);
5317         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5318         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5319         PL_last_lop = PL_last_uni = NULL;
5320         if (CopLINE(PL_curcop) == 1) {
5321             while (s < PL_bufend && isSPACE(*s))
5322                 s++;
5323             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5324                 s++;
5325             d = NULL;
5326             if (!PL_in_eval) {
5327                 if (*s == '#' && *(s+1) == '!')
5328                     d = s + 2;
5329 #ifdef ALTERNATE_SHEBANG
5330                 else {
5331                     static char const as[] = ALTERNATE_SHEBANG;
5332                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5333                         d = s + (sizeof(as) - 1);
5334                 }
5335 #endif /* ALTERNATE_SHEBANG */
5336             }
5337             if (d) {
5338                 char *ipath;
5339                 char *ipathend;
5340
5341                 while (isSPACE(*d))
5342                     d++;
5343                 ipath = d;
5344                 while (*d && !isSPACE(*d))
5345                     d++;
5346                 ipathend = d;
5347
5348 #ifdef ARG_ZERO_IS_SCRIPT
5349                 if (ipathend > ipath) {
5350                     /*
5351                      * HP-UX (at least) sets argv[0] to the script name,
5352                      * which makes $^X incorrect.  And Digital UNIX and Linux,
5353                      * at least, set argv[0] to the basename of the Perl
5354                      * interpreter. So, having found "#!", we'll set it right.
5355                      */
5356                     SV* copfilesv = CopFILESV(PL_curcop);
5357                     if (copfilesv) {
5358                         SV * const x =
5359                             GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5360                                              SVt_PV)); /* $^X */
5361                         assert(SvPOK(x) || SvGMAGICAL(x));
5362                         if (sv_eq(x, copfilesv)) {
5363                             sv_setpvn(x, ipath, ipathend - ipath);
5364                             SvSETMAGIC(x);
5365                         }
5366                         else {
5367                             STRLEN blen;
5368                             STRLEN llen;
5369                             const char *bstart = SvPV_const(copfilesv, blen);
5370                             const char * const lstart = SvPV_const(x, llen);
5371                             if (llen < blen) {
5372                                 bstart += blen - llen;
5373                                 if (strnEQ(bstart, lstart, llen) &&     bstart[-1] == '/') {
5374                                     sv_setpvn(x, ipath, ipathend - ipath);
5375                                     SvSETMAGIC(x);
5376                                 }
5377                             }
5378                         }
5379                     }
5380                     else {
5381                         /* Anything to do if no copfilesv? */
5382                     }
5383                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
5384                 }
5385 #endif /* ARG_ZERO_IS_SCRIPT */
5386
5387                 /*
5388                  * Look for options.
5389                  */
5390                 d = instr(s,"perl -");
5391                 if (!d) {
5392                     d = instr(s,"perl");
5393 #if defined(DOSISH)
5394                     /* avoid getting into infinite loops when shebang
5395                      * line contains "Perl" rather than "perl" */
5396                     if (!d) {
5397                         for (d = ipathend-4; d >= ipath; --d) {
5398                             if (isALPHA_FOLD_EQ(*d, 'p')
5399                                 && !ibcmp(d, "perl", 4))
5400                             {
5401                                 break;
5402                             }
5403                         }
5404                         if (d < ipath)
5405                             d = NULL;
5406                     }
5407 #endif
5408                 }
5409 #ifdef ALTERNATE_SHEBANG
5410                 /*
5411                  * If the ALTERNATE_SHEBANG on this system starts with a
5412                  * character that can be part of a Perl expression, then if
5413                  * we see it but not "perl", we're probably looking at the
5414                  * start of Perl code, not a request to hand off to some
5415                  * other interpreter.  Similarly, if "perl" is there, but
5416                  * not in the first 'word' of the line, we assume the line
5417                  * contains the start of the Perl program.
5418                  */
5419                 if (d && *s != '#') {
5420                     const char *c = ipath;
5421                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5422                         c++;
5423                     if (c < d)
5424                         d = NULL;       /* "perl" not in first word; ignore */
5425                     else
5426                         *s = '#';       /* Don't try to parse shebang line */
5427                 }
5428 #endif /* ALTERNATE_SHEBANG */
5429                 if (!d
5430                     && *s == '#'
5431                     && ipathend > ipath
5432                     && !PL_minus_c
5433                     && !instr(s,"indir")
5434                     && instr(PL_origargv[0],"perl"))
5435                 {
5436                     dVAR;
5437                     char **newargv;
5438
5439                     *ipathend = '\0';
5440                     s = ipathend + 1;
5441                     while (s < PL_bufend && isSPACE(*s))
5442                         s++;
5443                     if (s < PL_bufend) {
5444                         Newx(newargv,PL_origargc+3,char*);
5445                         newargv[1] = s;
5446                         while (s < PL_bufend && !isSPACE(*s))
5447                             s++;
5448                         *s = '\0';
5449                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5450                     }
5451                     else
5452                         newargv = PL_origargv;
5453                     newargv[0] = ipath;
5454                     PERL_FPU_PRE_EXEC
5455                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5456                     PERL_FPU_POST_EXEC
5457                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5458                 }
5459                 if (d) {
5460                     while (*d && !isSPACE(*d))
5461                         d++;
5462                     while (SPACE_OR_TAB(*d))
5463                         d++;
5464
5465                     if (*d++ == '-') {
5466                         const bool switches_done = PL_doswitches;
5467                         const U32 oldpdb = PL_perldb;
5468                         const bool oldn = PL_minus_n;
5469                         const bool oldp = PL_minus_p;
5470                         const char *d1 = d;
5471
5472                         do {
5473                             bool baduni = FALSE;
5474                             if (*d1 == 'C') {
5475                                 const char *d2 = d1 + 1;
5476                                 if (parse_unicode_opts((const char **)&d2)
5477                                     != PL_unicode)
5478                                     baduni = TRUE;
5479                             }
5480                             if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
5481                                 const char * const m = d1;
5482                                 while (*d1 && !isSPACE(*d1))
5483                                     d1++;
5484                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5485                                       (int)(d1 - m), m);
5486                             }
5487                             d1 = moreswitches(d1);
5488                         } while (d1);
5489                         if (PL_doswitches && !switches_done) {
5490                             int argc = PL_origargc;
5491                             char **argv = PL_origargv;
5492                             do {
5493                                 argc--,argv++;
5494                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5495                             init_argv_symbols(argc,argv);
5496                         }
5497                         if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
5498                             || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5499                               /* if we have already added "LINE: while (<>) {",
5500                                  we must not do it again */
5501                         {
5502                             SvPVCLEAR(PL_linestr);
5503                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5504                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5505                             PL_last_lop = PL_last_uni = NULL;
5506                             PL_preambled = FALSE;
5507                             if (PERLDB_LINE_OR_SAVESRC)
5508                                 (void)gv_fetchfile(PL_origfilename);
5509                             goto retry;
5510                         }
5511                     }
5512                 }
5513             }
5514         }
5515         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5516             PL_lex_state = LEX_FORMLINE;
5517             force_next(FORMRBRACK);
5518             TOKEN(';');
5519         }
5520         goto retry;
5521     case '\r':
5522 #ifdef PERL_STRICT_CR
5523         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5524         Perl_croak(aTHX_
5525       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5526 #endif
5527     case ' ': case '\t': case '\f': case '\v':
5528         s++;
5529         goto retry;
5530     case '#':
5531     case '\n':
5532         if (PL_lex_state != LEX_NORMAL
5533             || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
5534         {
5535             const bool in_comment = *s == '#';
5536             if (*s == '#' && s == PL_linestart && PL_in_eval
5537              && !PL_rsfp && !PL_parser->filtered) {
5538                 /* handle eval qq[#line 1 "foo"\n ...] */
5539                 CopLINE_dec(PL_curcop);
5540                 incline(s);
5541             }
5542             d = s;
5543             while (d < PL_bufend && *d != '\n')
5544                 d++;
5545             if (d < PL_bufend)
5546                 d++;
5547             else if (d > PL_bufend)
5548                 /* Found by Ilya: feed random input to Perl. */
5549                 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5550                            d, PL_bufend);
5551             s = d;
5552             if (in_comment && d == PL_bufend
5553                 && PL_lex_state == LEX_INTERPNORMAL
5554                 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5555                 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5556             else
5557                 incline(s);
5558             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5559                 PL_lex_state = LEX_FORMLINE;
5560                 force_next(FORMRBRACK);
5561                 TOKEN(';');
5562             }
5563         }
5564         else {
5565             while (s < PL_bufend && *s != '\n')
5566                 s++;
5567             if (s < PL_bufend)
5568                 {
5569                     s++;
5570                     if (s < PL_bufend)
5571                         incline(s);
5572                 }
5573             else if (s > PL_bufend)
5574                 /* Found by Ilya: feed random input to Perl. */
5575                 Perl_croak(aTHX_ "panic: input overflow");
5576         }
5577         goto retry;
5578     case '-':
5579         if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5580             I32 ftst = 0;
5581             char tmp;
5582
5583             s++;
5584             PL_bufptr = s;
5585             tmp = *s++;
5586
5587             while (s < PL_bufend && SPACE_OR_TAB(*s))
5588                 s++;
5589
5590             if (strEQs(s,"=>")) {
5591                 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5592                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5593                 OPERATOR('-');          /* unary minus */
5594             }
5595             switch (tmp) {
5596             case 'r': ftst = OP_FTEREAD;        break;
5597             case 'w': ftst = OP_FTEWRITE;       break;
5598             case 'x': ftst = OP_FTEEXEC;        break;
5599             case 'o': ftst = OP_FTEOWNED;       break;
5600             case 'R': ftst = OP_FTRREAD;        break;
5601             case 'W': ftst = OP_FTRWRITE;       break;
5602             case 'X': ftst = OP_FTREXEC;        break;
5603             case 'O': ftst = OP_FTROWNED;       break;
5604             case 'e': ftst = OP_FTIS;           break;
5605             case 'z': ftst = OP_FTZERO;         break;
5606             case 's': ftst = OP_FTSIZE;         break;
5607             case 'f': ftst = OP_FTFILE;         break;
5608             case 'd': ftst = OP_FTDIR;          break;
5609             case 'l': ftst = OP_FTLINK;         break;
5610             case 'p': ftst = OP_FTPIPE;         break;
5611             case 'S': ftst = OP_FTSOCK;         break;
5612             case 'u': ftst = OP_FTSUID;         break;
5613             case 'g': ftst = OP_FTSGID;         break;
5614             case 'k': ftst = OP_FTSVTX;         break;
5615             case 'b': ftst = OP_FTBLK;          break;
5616             case 'c': ftst = OP_FTCHR;          break;
5617             case 't': ftst = OP_FTTTY;          break;
5618             case 'T': ftst = OP_FTTEXT;         break;
5619             case 'B': ftst = OP_FTBINARY;       break;
5620             case 'M': case 'A': case 'C':
5621                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5622                 switch (tmp) {
5623                 case 'M': ftst = OP_FTMTIME;    break;
5624                 case 'A': ftst = OP_FTATIME;    break;
5625                 case 'C': ftst = OP_FTCTIME;    break;
5626                 default:                        break;
5627                 }
5628                 break;
5629             default:
5630                 break;
5631             }
5632             if (ftst) {
5633                 PL_last_uni = PL_oldbufptr;
5634                 PL_last_lop_op = (OPCODE)ftst;
5635                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5636                         "### Saw file test %c\n", (int)tmp);
5637                 } );
5638                 FTST(ftst);
5639             }
5640             else {
5641                 /* Assume it was a minus followed by a one-letter named
5642                  * subroutine call (or a -bareword), then. */
5643                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5644                         "### '-%c' looked like a file test but was not\n",
5645                         (int) tmp);
5646                 } );
5647                 s = --PL_bufptr;
5648             }
5649         }
5650         {
5651             const char tmp = *s++;
5652             if (*s == tmp) {
5653                 s++;
5654                 if (PL_expect == XOPERATOR)
5655                     TERM(POSTDEC);
5656                 else
5657                     OPERATOR(PREDEC);
5658             }
5659             else if (*s == '>') {
5660                 s++;
5661                 s = skipspace(s);
5662                 if (((*s == '$' || *s == '&') && s[1] == '*')
5663                   ||(*s == '$' && s[1] == '#' && s[2] == '*')
5664                   ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5665                   ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5666                  )
5667                 {
5668                     PL_expect = XPOSTDEREF;
5669                     TOKEN(ARROW);
5670                 }
5671                 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5672                     s = force_word(s,METHOD,FALSE,TRUE);
5673                     TOKEN(ARROW);
5674                 }
5675                 else if (*s == '$')
5676                     OPERATOR(ARROW);
5677                 else
5678                     TERM(ARROW);
5679             }
5680             if (PL_expect == XOPERATOR) {
5681                 if (*s == '='
5682                     && !PL_lex_allbrackets
5683                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5684                 {
5685                     s--;
5686                     TOKEN(0);
5687                 }
5688                 Aop(OP_SUBTRACT);
5689             }
5690             else {
5691                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5692                     check_uni();
5693                 OPERATOR('-');          /* unary minus */
5694             }
5695         }
5696
5697     case '+':
5698         {
5699             const char tmp = *s++;
5700             if (*s == tmp) {
5701                 s++;
5702                 if (PL_expect == XOPERATOR)
5703                     TERM(POSTINC);
5704                 else
5705                     OPERATOR(PREINC);
5706             }
5707             if (PL_expect == XOPERATOR) {
5708                 if (*s == '='
5709                     && !PL_lex_allbrackets
5710                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5711                 {
5712                     s--;
5713                     TOKEN(0);
5714                 }
5715                 Aop(OP_ADD);
5716             }
5717             else {
5718                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5719                     check_uni();
5720                 OPERATOR('+');
5721             }
5722         }
5723
5724     case '*':
5725         if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5726         if (PL_expect != XOPERATOR) {
5727             s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5728             PL_expect = XOPERATOR;
5729             force_ident(PL_tokenbuf, '*');
5730             if (!*PL_tokenbuf)
5731                 PREREF('*');
5732             TERM('*');
5733         }
5734         s++;
5735         if (*s == '*') {
5736             s++;
5737             if (*s == '=' && !PL_lex_allbrackets
5738                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5739             {
5740                 s -= 2;
5741                 TOKEN(0);
5742             }
5743             PWop(OP_POW);
5744         }
5745         if (*s == '='
5746             && !PL_lex_allbrackets
5747             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5748         {
5749             s--;
5750             TOKEN(0);
5751         }
5752         PL_parser->saw_infix_sigil = 1;
5753         Mop(OP_MULTIPLY);
5754
5755     case '%':
5756     {
5757         if (PL_expect == XOPERATOR) {
5758             if (s[1] == '='
5759                 && !PL_lex_allbrackets
5760                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5761             {
5762                 TOKEN(0);
5763             }
5764             ++s;
5765             PL_parser->saw_infix_sigil = 1;
5766             Mop(OP_MODULO);
5767         }
5768         else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5769         PL_tokenbuf[0] = '%';
5770         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5771         pl_yylval.ival = 0;
5772         if (!PL_tokenbuf[1]) {
5773             PREREF('%');
5774         }
5775         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5776             if (*s == '[')
5777                 PL_tokenbuf[0] = '@';
5778         }
5779         PL_expect = XOPERATOR;
5780         force_ident_maybe_lex('%');
5781         TERM('%');
5782     }
5783     case '^':
5784         d = s;
5785         bof = FEATURE_BITWISE_IS_ENABLED;
5786         if (bof && s[1] == '.')
5787             s++;
5788         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5789                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5790         {
5791             s = d;
5792             TOKEN(0);
5793         }
5794         s++;
5795         BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5796     case '[':
5797         if (PL_lex_brackets > 100)
5798             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5799         PL_lex_brackstack[PL_lex_brackets++] = 0;
5800         PL_lex_allbrackets++;
5801         {
5802             const char tmp = *s++;
5803             OPERATOR(tmp);
5804         }
5805     case '~':
5806         if (s[1] == '~'
5807             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5808         {
5809             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5810                 TOKEN(0);
5811             s += 2;
5812             Perl_ck_warner_d(aTHX_
5813                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5814                 "Smartmatch is experimental");
5815             Eop(OP_SMARTMATCH);
5816         }
5817         s++;
5818         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
5819             s++;
5820             BCop(OP_SCOMPLEMENT);
5821         }
5822         BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
5823     case ',':
5824         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5825             TOKEN(0);
5826         s++;
5827         OPERATOR(',');
5828     case ':':
5829         if (s[1] == ':') {
5830             len = 0;
5831             goto just_a_word_zero_gv;
5832         }
5833         s++;
5834         {
5835         OP *attrs;
5836
5837         switch (PL_expect) {
5838         case XOPERATOR:
5839             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5840                 break;
5841             PL_bufptr = s;      /* update in case we back off */
5842             if (*s == '=') {
5843                 Perl_croak(aTHX_
5844                            "Use of := for an empty attribute list is not allowed");
5845             }
5846             goto grabattrs;
5847         case XATTRBLOCK:
5848             PL_expect = XBLOCK;
5849             goto grabattrs;
5850         case XATTRTERM:
5851             PL_expect = XTERMBLOCK;
5852          grabattrs:
5853             s = skipspace(s);
5854             attrs = NULL;
5855             while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5856                 I32 tmp;
5857                 SV *sv;
5858                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5859                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5860                     if (tmp < 0) tmp = -tmp;
5861                     switch (tmp) {
5862                     case KEY_or:
5863                     case KEY_and:
5864                     case KEY_for:
5865                     case KEY_foreach:
5866                     case KEY_unless:
5867                     case KEY_if:
5868                     case KEY_while:
5869                     case KEY_until:
5870                         goto got_attrs;
5871                     default:
5872                         break;
5873                     }
5874                 }
5875                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5876                 if (*d == '(') {
5877                     d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5878                     if (!d) {
5879                         if (attrs)
5880                             op_free(attrs);
5881                         sv_free(sv);
5882                         Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5883                     }
5884                     COPLINE_SET_FROM_MULTI_END;
5885                 }
5886                 if (PL_lex_stuff) {
5887                     sv_catsv(sv, PL_lex_stuff);
5888                     attrs = op_append_elem(OP_LIST, attrs,
5889                                         newSVOP(OP_CONST, 0, sv));
5890                     SvREFCNT_dec_NN(PL_lex_stuff);
5891                     PL_lex_stuff = NULL;
5892                 }
5893                 else {
5894                     /* NOTE: any CV attrs applied here need to be part of
5895                        the CVf_BUILTIN_ATTRS define in cv.h! */
5896                     if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5897                         sv_free(sv);
5898                         CvLVALUE_on(PL_compcv);
5899                     }
5900                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5901                         sv_free(sv);
5902                         CvMETHOD_on(PL_compcv);
5903                     }
5904                     else if (!PL_in_my && len == 5
5905                           && strnEQ(SvPVX(sv), "const", len))
5906                     {
5907                         sv_free(sv);
5908                         Perl_ck_warner_d(aTHX_
5909                             packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5910                            ":const is experimental"
5911                         );
5912                         CvANONCONST_on(PL_compcv);
5913                         if (!CvANON(PL_compcv))
5914                             yyerror(":const is not permitted on named "
5915                                     "subroutines");
5916                     }
5917                     /* After we've set the flags, it could be argued that
5918                        we don't need to do the attributes.pm-based setting
5919                        process, and shouldn't bother appending recognized
5920                        flags.  To experiment with that, uncomment the
5921                        following "else".  (Note that's already been
5922                        uncommented.  That keeps the above-applied built-in
5923                        attributes from being intercepted (and possibly
5924                        rejected) by a package's attribute routines, but is
5925                        justified by the performance win for the common case
5926                        of applying only built-in attributes.) */
5927                     else
5928                         attrs = op_append_elem(OP_LIST, attrs,
5929                                             newSVOP(OP_CONST, 0,
5930                                                     sv));
5931                 }
5932                 s = skipspace(d);
5933                 if (*s == ':' && s[1] != ':')
5934                     s = skipspace(s+1);
5935                 else if (s == d)
5936                     break;      /* require real whitespace or :'s */
5937                 /* XXX losing whitespace on sequential attributes here */
5938             }
5939             {
5940                 if (*s != ';'
5941                     && *s != '}'
5942                     && !(PL_expect == XOPERATOR
5943                          ? (*s == '=' ||  *s == ')')
5944                          : (*s == '{' ||  *s == '(')))
5945                 {
5946                     const char q = ((*s == '\'') ? '"' : '\'');
5947                     /* If here for an expression, and parsed no attrs, back
5948                        off. */
5949                     if (PL_expect == XOPERATOR && !attrs) {
5950                         s = PL_bufptr;
5951                         break;
5952                     }
5953                     /* MUST advance bufptr here to avoid bogus "at end of line"
5954                        context messages from yyerror().
5955                     */
5956                     PL_bufptr = s;
5957                     yyerror( (const char *)
5958                              (*s
5959                               ? Perl_form(aTHX_ "Invalid separator character "
5960                                           "%c%c%c in attribute list", q, *s, q)
5961                               : "Unterminated attribute list" ) );
5962                     if (attrs)
5963                         op_free(attrs);
5964                     OPERATOR(':');
5965                 }
5966             }
5967         got_attrs:
5968             if (attrs) {
5969                 NEXTVAL_NEXTTOKE.opval = attrs;
5970                 force_next(THING);
5971             }
5972             TOKEN(COLONATTR);
5973         }
5974         }
5975         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5976             s--;
5977             TOKEN(0);
5978         }
5979         PL_lex_allbrackets--;
5980         OPERATOR(':');
5981     case '(':
5982         s++;
5983         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5984             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5985         else
5986             PL_expect = XTERM;
5987         s = skipspace(s);
5988         PL_lex_allbrackets++;
5989         TOKEN('(');
5990     case ';':
5991         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5992             TOKEN(0);
5993         CLINE;
5994         s++;
5995         PL_expect = XSTATE;
5996         TOKEN(';');
5997     case ')':
5998         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5999             TOKEN(0);
6000         s++;
6001         PL_lex_allbrackets--;
6002         s = skipspace(s);
6003         if (*s == '{')
6004             PREBLOCK(')');
6005         TERM(')');
6006     case ']':
6007         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6008             TOKEN(0);
6009         s++;
6010         if (PL_lex_brackets <= 0)
6011             /* diag_listed_as: Unmatched right %s bracket */
6012             yyerror("Unmatched right square bracket");
6013         else
6014             --PL_lex_brackets;
6015         PL_lex_allbrackets--;
6016         if (PL_lex_state == LEX_INTERPNORMAL) {
6017             if (PL_lex_brackets == 0) {
6018                 if (*s == '-' && s[1] == '>')
6019                     PL_lex_state = LEX_INTERPENDMAYBE;
6020                 else if (*s != '[' && *s != '{')
6021                     PL_lex_state = LEX_INTERPEND;
6022             }
6023         }
6024         TERM(']');
6025     case '{':
6026         s++;
6027       leftbracket:
6028         if (PL_lex_brackets > 100) {
6029             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6030         }
6031         switch (PL_expect) {
6032         case XTERM:
6033         case XTERMORDORDOR:
6034             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6035             PL_lex_allbrackets++;
6036             OPERATOR(HASHBRACK);
6037         case XOPERATOR:
6038             while (s < PL_bufend && SPACE_OR_TAB(*s))
6039                 s++;
6040             d = s;
6041             PL_tokenbuf[0] = '\0';
6042             if (d < PL_bufend && *d == '-') {
6043                 PL_tokenbuf[0] = '-';
6044                 d++;
6045                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6046                     d++;
6047             }
6048             if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6049                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6050                               FALSE, &len);
6051                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6052                     d++;
6053                 if (*d == '}') {
6054                     const char minus = (PL_tokenbuf[0] == '-');
6055                     s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6056                     if (minus)
6057                         force_next('-');
6058                 }
6059             }
6060             /* FALLTHROUGH */
6061         case XATTRTERM:
6062         case XTERMBLOCK:
6063             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6064             PL_lex_allbrackets++;
6065             PL_expect = XSTATE;
6066             break;
6067         case XATTRBLOCK:
6068         case XBLOCK:
6069             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6070             PL_lex_allbrackets++;
6071             PL_expect = XSTATE;
6072             break;
6073         case XBLOCKTERM:
6074             PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6075             PL_lex_allbrackets++;
6076             PL_expect = XSTATE;
6077             break;
6078         default: {
6079                 const char *t;
6080                 if (PL_oldoldbufptr == PL_last_lop)
6081                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6082                 else
6083                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6084                 PL_lex_allbrackets++;
6085                 s = skipspace(s);
6086                 if (*s == '}') {
6087                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6088                         PL_expect = XTERM;
6089                         /* This hack is to get the ${} in the message. */
6090                         PL_bufptr = s+1;
6091                         yyerror("syntax error");
6092                         break;
6093                     }
6094                     OPERATOR(HASHBRACK);
6095                 }
6096                 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6097                     /* ${...} or @{...} etc., but not print {...}
6098                      * Skip the disambiguation and treat this as a block.
6099                      */
6100                     goto block_expectation;
6101                 }
6102                 /* This hack serves to disambiguate a pair of curlies
6103                  * as being a block or an anon hash.  Normally, expectation
6104                  * determines that, but in cases where we're not in a
6105                  * position to expect anything in particular (like inside
6106                  * eval"") we have to resolve the ambiguity.  This code
6107                  * covers the case where the first term in the curlies is a
6108                  * quoted string.  Most other cases need to be explicitly
6109                  * disambiguated by prepending a "+" before the opening
6110                  * curly in order to force resolution as an anon hash.
6111                  *
6112                  * XXX should probably propagate the outer expectation
6113                  * into eval"" to rely less on this hack, but that could
6114                  * potentially break current behavior of eval"".
6115                  * GSAR 97-07-21
6116                  */
6117                 t = s;
6118                 if (*s == '\'' || *s == '"' || *s == '`') {
6119                     /* common case: get past first string, handling escapes */
6120                     for (t++; t < PL_bufend && *t != *s;)
6121                         if (*t++ == '\\')
6122                             t++;
6123                     t++;
6124                 }
6125                 else if (*s == 'q') {
6126                     if (++t < PL_bufend
6127                         && (!isWORDCHAR(*t)
6128                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6129                                 && !isWORDCHAR(*t))))
6130                     {
6131                         /* skip q//-like construct */
6132                         const char *tmps;
6133                         char open, close, term;
6134                         I32 brackets = 1;
6135
6136                         while (t < PL_bufend && isSPACE(*t))
6137                             t++;
6138                         /* check for q => */
6139                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6140                             OPERATOR(HASHBRACK);
6141                         }
6142                         term = *t;
6143                         open = term;
6144                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6145                             term = tmps[5];
6146                         close = term;
6147                         if (open == close)
6148                             for (t++; t < PL_bufend; t++) {
6149                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6150                                     t++;
6151                                 else if (*t == open)
6152                                     break;
6153                             }
6154                         else {
6155                             for (t++; t < PL_bufend; t++) {
6156                                 if (*t == '\\' && t+1 < PL_bufend)
6157                                     t++;
6158                                 else if (*t == close && --brackets <= 0)
6159                                     break;
6160                                 else if (*t == open)
6161                                     brackets++;
6162                             }
6163                         }
6164                         t++;
6165                     }
6166                     else
6167                         /* skip plain q word */
6168                         while (   t < PL_bufend
6169                                && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6170                         {
6171                             t += UTF ? UTF8SKIP(t) : 1;
6172                         }
6173                 }
6174                 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6175                     t += UTF ? UTF8SKIP(t) : 1;
6176                     while (   t < PL_bufend
6177                            && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6178                     {
6179                         t += UTF ? UTF8SKIP(t) : 1;
6180                     }
6181                 }
6182                 while (t < PL_bufend && isSPACE(*t))
6183                     t++;
6184                 /* if comma follows first term, call it an anon hash */
6185                 /* XXX it could be a comma expression with loop modifiers */
6186                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6187                                    || (*t == '=' && t[1] == '>')))
6188                     OPERATOR(HASHBRACK);
6189                 if (PL_expect == XREF)
6190                 {
6191                   block_expectation:
6192                     /* If there is an opening brace or 'sub:', treat it
6193                        as a term to make ${{...}}{k} and &{sub:attr...}
6194                        dwim.  Otherwise, treat it as a statement, so
6195                        map {no strict; ...} works.
6196                      */
6197                     s = skipspace(s);
6198                     if (*s == '{') {
6199                         PL_expect = XTERM;
6200                         break;
6201                     }
6202                     if (strEQs(s, "sub")) {
6203                         d = s + 3;
6204                         d = skipspace(d);
6205                         if (*d == ':') {
6206                             PL_expect = XTERM;
6207                             break;
6208                         }
6209                     }
6210                     PL_expect = XSTATE;
6211                 }
6212                 else {
6213                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6214                     PL_expect = XSTATE;
6215                 }
6216             }
6217             break;
6218         }
6219         pl_yylval.ival = CopLINE(PL_curcop);
6220         PL_copline = NOLINE;   /* invalidate current command line number */
6221         TOKEN(formbrack ? '=' : '{');
6222     case '}':
6223         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6224             TOKEN(0);
6225       rightbracket:
6226         s++;
6227         if (PL_lex_brackets <= 0)
6228             /* diag_listed_as: Unmatched right %s bracket */
6229             yyerror("Unmatched right curly bracket");
6230         else
6231             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6232         PL_lex_allbrackets--;
6233         if (PL_lex_state == LEX_INTERPNORMAL) {
6234             if (PL_lex_brackets == 0) {
6235                 if (PL_expect & XFAKEBRACK) {
6236                     PL_expect &= XENUMMASK;
6237                     PL_lex_state = LEX_INTERPEND;
6238                     PL_bufptr = s;
6239                     return yylex();     /* ignore fake brackets */
6240                 }
6241                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6242                  && SvEVALED(PL_lex_repl))
6243                     PL_lex_state = LEX_INTERPEND;
6244                 else if (*s == '-' && s[1] == '>')
6245                     PL_lex_state = LEX_INTERPENDMAYBE;
6246                 else if (*s != '[' && *s != '{')
6247                     PL_lex_state = LEX_INTERPEND;
6248             }
6249         }
6250         if (PL_expect & XFAKEBRACK) {
6251             PL_expect &= XENUMMASK;
6252             PL_bufptr = s;
6253             return yylex();             /* ignore fake brackets */
6254         }
6255         force_next(formbrack ? '.' : '}');
6256         if (formbrack) LEAVE;
6257         if (formbrack == 2) { /* means . where arguments were expected */
6258             force_next(';');
6259             TOKEN(FORMRBRACK);
6260         }
6261         TOKEN(';');
6262     case '&':
6263         if (PL_expect == XPOSTDEREF) POSTDEREF('&');
6264         s++;
6265         if (*s++ == '&') {
6266             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6267                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6268                 s -= 2;
6269                 TOKEN(0);
6270             }
6271             AOPERATOR(ANDAND);
6272         }
6273         s--;
6274         if (PL_expect == XOPERATOR) {
6275             if (   PL_bufptr == PL_linestart
6276                 && ckWARN(WARN_SEMICOLON)
6277                 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6278             {
6279                 CopLINE_dec(PL_curcop);
6280                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6281                 CopLINE_inc(PL_curcop);
6282             }
6283             d = s;
6284             if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6285                 s++;
6286             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6287                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6288                 s = d;
6289                 s--;
6290                 TOKEN(0);
6291             }
6292             if (d == s) {
6293                 PL_parser->saw_infix_sigil = 1;
6294                 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6295             }
6296             else
6297                 BAop(OP_SBIT_AND);
6298         }
6299
6300         PL_tokenbuf[0] = '&';
6301         s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6302         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6303         if (PL_tokenbuf[1]) {
6304             force_ident_maybe_lex('&');
6305         }
6306         else
6307             PREREF('&');
6308         TERM('&');
6309
6310     case '|':
6311         s++;
6312         if (*s++ == '|') {
6313             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6314                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6315                 s -= 2;
6316                 TOKEN(0);
6317             }
6318             AOPERATOR(OROR);
6319         }
6320         s--;
6321         d = s;
6322         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6323             s++;
6324         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6325                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6326             s = d - 1;
6327             TOKEN(0);
6328         }
6329         BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6330     case '=':
6331         s++;
6332         {
6333             const char tmp = *s++;
6334             if (tmp == '=') {
6335                 if ((s == PL_linestart+2 || s[-3] == '\n') && strEQs(s, "=====")) {
6336                     s = vcs_conflict_marker(s + 5);
6337                     goto retry;
6338                 }
6339                 if (!PL_lex_allbrackets
6340                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6341                 {
6342                     s -= 2;
6343                     TOKEN(0);
6344                 }
6345                 Eop(OP_EQ);
6346             }
6347             if (tmp == '>') {
6348                 if (!PL_lex_allbrackets
6349                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
6350                 {
6351                     s -= 2;
6352                     TOKEN(0);
6353                 }
6354                 OPERATOR(',');
6355             }
6356             if (tmp == '~')
6357                 PMop(OP_MATCH);
6358             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6359                 && strchr("+-*/%.^&|<",tmp))
6360                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6361                             "Reversed %c= operator",(int)tmp);
6362             s--;
6363             if (PL_expect == XSTATE
6364                 && isALPHA(tmp)
6365                 && (s == PL_linestart+1 || s[-2] == '\n') )
6366             {
6367                 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6368                     || PL_lex_state != LEX_NORMAL) {
6369                     d = PL_bufend;
6370                     while (s < d) {
6371                         if (*s++ == '\n') {
6372                             incline(s);
6373                             if (strEQs(s,"=cut")) {
6374                                 s = strchr(s,'\n');
6375                                 if (s)
6376                                     s++;
6377                                 else
6378                                     s = d;
6379                                 incline(s);
6380                                 goto retry;
6381                             }
6382                         }
6383                     }
6384                     goto retry;
6385                 }
6386                 s = PL_bufend;
6387                 PL_parser->in_pod = 1;
6388                 goto retry;
6389             }
6390         }
6391         if (PL_expect == XBLOCK) {
6392             const char *t = s;
6393 #ifdef PERL_STRICT_CR
6394             while (SPACE_OR_TAB(*t))
6395 #else
6396             while (SPACE_OR_TAB(*t) || *t == '\r')
6397 #endif
6398                 t++;
6399             if (*t == '\n' || *t == '#') {
6400                 formbrack = 1;
6401                 ENTER;
6402                 SAVEI8(PL_parser->form_lex_state);
6403                 SAVEI32(PL_lex_formbrack);
6404                 PL_parser->form_lex_state = PL_lex_state;
6405                 PL_lex_formbrack = PL_lex_brackets + 1;
6406                 goto leftbracket;
6407             }
6408         }
6409         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6410             s--;
6411             TOKEN(0);
6412         }
6413         pl_yylval.ival = 0;
6414         OPERATOR(ASSIGNOP);
6415     case '!':
6416         s++;
6417         {
6418             const char tmp = *s++;
6419             if (tmp == '=') {
6420                 /* was this !=~ where !~ was meant?
6421                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6422
6423                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6424                     const char *t = s+1;
6425
6426                     while (t < PL_bufend && isSPACE(*t))
6427                         ++t;
6428
6429                     if (*t == '/' || *t == '?'
6430                         || ((*t == 'm' || *t == 's' || *t == 'y')
6431                             && !isWORDCHAR(t[1]))
6432                         || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6433                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6434                                     "!=~ should be !~");
6435                 }
6436                 if (!PL_lex_allbrackets
6437                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6438                 {
6439                     s -= 2;
6440                     TOKEN(0);
6441                 }
6442                 Eop(OP_NE);
6443             }
6444             if (tmp == '~')
6445                 PMop(OP_NOT);
6446         }
6447         s--;
6448         OPERATOR('!');
6449     case '<':
6450         if (PL_expect != XOPERATOR) {
6451             if (s[1] != '<' && !strchr(s,'>'))
6452                 check_uni();
6453             if (s[1] == '<' && s[2] != '>') {
6454                 if ((s == PL_linestart || s[-1] == '\n') && strEQs(s+2, "<<<<<")) {
6455                     s = vcs_conflict_marker(s + 7);
6456                     goto retry;
6457                 }
6458                 s = scan_heredoc(s);
6459             }
6460             else
6461                 s = scan_inputsymbol(s);
6462             PL_expect = XOPERATOR;
6463             TOKEN(sublex_start());
6464         }
6465         s++;
6466         {
6467             char tmp = *s++;
6468             if (tmp == '<') {
6469                 if ((s == PL_linestart+2 || s[-3] == '\n') && strEQs(s, "<<<<<")) {
6470                     s = vcs_conflict_marker(s + 5);
6471                     goto retry;
6472                 }
6473                 if (*s == '=' && !PL_lex_allbrackets
6474                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6475                 {
6476                     s -= 2;
6477                     TOKEN(0);
6478                 }
6479                 SHop(OP_LEFT_SHIFT);
6480             }
6481             if (tmp == '=') {
6482                 tmp = *s++;
6483                 if (tmp == '>') {
6484                     if (!PL_lex_allbrackets
6485                         && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6486                     {
6487                         s -= 3;
6488                         TOKEN(0);
6489                     }
6490                     Eop(OP_NCMP);
6491                 }
6492                 s--;
6493                 if (!PL_lex_allbrackets
6494                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6495                 {
6496                     s -= 2;
6497                     TOKEN(0);
6498                 }
6499                 Rop(OP_LE);
6500             }
6501         }
6502         s--;
6503         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6504             s--;
6505             TOKEN(0);
6506         }
6507         Rop(OP_LT);
6508     case '>':
6509         s++;
6510         {
6511             const char tmp = *s++;
6512             if (tmp == '>') {
6513                 if ((s == PL_linestart+2 || s[-3] == '\n') && strEQs(s, ">>>>>")) {
6514                     s = vcs_conflict_marker(s + 5);
6515                     goto retry;
6516                 }
6517                 if (*s == '=' && !PL_lex_allbrackets
6518                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6519                 {
6520                     s -= 2;
6521                     TOKEN(0);
6522                 }
6523                 SHop(OP_RIGHT_SHIFT);
6524             }
6525             else if (tmp == '=') {
6526                 if (!PL_lex_allbrackets
6527                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6528                 {
6529                     s -= 2;
6530                     TOKEN(0);
6531                 }
6532                 Rop(OP_GE);
6533             }
6534         }
6535         s--;
6536         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6537             s--;
6538             TOKEN(0);
6539         }
6540         Rop(OP_GT);
6541
6542     case '$':
6543         CLINE;
6544
6545         if (PL_expect == XPOSTDEREF) {
6546             if (s[1] == '#') {
6547                 s++;
6548                 POSTDEREF(DOLSHARP);
6549             }
6550             POSTDEREF('$');
6551         }
6552
6553         if (   s[1] == '#'
6554             && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
6555                 || strchr("{$:+-@", s[2])))
6556         {
6557             PL_tokenbuf[0] = '@';
6558             s = scan_ident(s + 1, PL_tokenbuf + 1,
6559                            sizeof PL_tokenbuf - 1, FALSE);
6560             if (PL_expect == XOPERATOR) {
6561                 d = s;
6562                 if (PL_bufptr > s) {
6563                     d = PL_bufptr-1;
6564                     PL_bufptr = PL_oldbufptr;
6565                 }
6566                 no_op("Array length", d);
6567             }
6568             if (!PL_tokenbuf[1])
6569                 PREREF(DOLSHARP);
6570             PL_expect = XOPERATOR;
6571             force_ident_maybe_lex('#');
6572             TOKEN(DOLSHARP);
6573         }
6574
6575         PL_tokenbuf[0] = '$';
6576         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6577         if (PL_expect == XOPERATOR) {
6578             d = s;
6579             if (PL_bufptr > s) {
6580                 d = PL_bufptr-1;
6581                 PL_bufptr = PL_oldbufptr;
6582             }
6583             no_op("Scalar", d);
6584         }
6585         if (!PL_tokenbuf[1]) {
6586             if (s == PL_bufend)
6587                 yyerror("Final $ should be \\$ or $name");
6588             PREREF('$');
6589         }
6590
6591         d = s;
6592         {
6593             const char tmp = *s;
6594             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6595                 s = skipspace(s);
6596
6597             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6598                 && intuit_more(s)) {
6599                 if (*s == '[') {
6600                     PL_tokenbuf[0] = '@';
6601                     if (ckWARN(WARN_SYNTAX)) {
6602                         char *t = s+1;
6603
6604                         while (   isSPACE(*t)
6605                                || isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)
6606                                || *t == '$')
6607                         {
6608                             t += UTF ? UTF8SKIP(t) : 1;
6609                         }
6610                         if (*t++ == ',') {
6611                             PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
6612                             while (t < PL_bufend && *t != ']')
6613                                 t++;
6614                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6615                                         "Multidimensional syntax %" UTF8f " not supported",
6616                                         UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
6617                         }
6618                     }
6619                 }
6620                 else if (*s == '{') {
6621                     char *t;
6622                     PL_tokenbuf[0] = '%';
6623                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
6624                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6625                         {
6626                             char tmpbuf[sizeof PL_tokenbuf];
6627                             do {
6628                                 t++;
6629                             } while (isSPACE(*t));
6630                             if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
6631                                 STRLEN len;
6632                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6633                                               &len);
6634                                 while (isSPACE(*t))
6635                                     t++;
6636                                 if (  *t == ';'
6637                                     && get_cvn_flags(tmpbuf, len, UTF
6638                                                                   ? SVf_UTF8
6639                                                                   : 0))
6640                                 {
6641                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6642                                         "You need to quote \"%" UTF8f "\"",
6643                                          UTF8fARG(UTF, len, tmpbuf));
6644                                 }
6645                             }
6646                         }
6647                 }
6648             }
6649
6650             PL_expect = XOPERATOR;
6651             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6652                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6653                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6654                     PL_expect = XOPERATOR;
6655                 else if (strchr("$@\"'`q", *s))
6656                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6657                 else if (   strchr("&*<%", *s)
6658                          && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
6659                 {
6660                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6661                 }
6662                 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6663                     char tmpbuf[sizeof PL_tokenbuf];
6664                     int t2;
6665                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6666                     if ((t2 = keyword(tmpbuf, len, 0))) {
6667                         /* binary operators exclude handle interpretations */
6668                         switch (t2) {
6669                         case -KEY_x:
6670                         case -KEY_eq:
6671                         case -KEY_ne:
6672                         case -KEY_gt:
6673                         case -KEY_lt:
6674                         case -KEY_ge:
6675                         case -KEY_le:
6676                         case -KEY_cmp:
6677                             break;
6678                         default:
6679                             PL_expect = XTERM;  /* e.g. print $fh length() */
6680                             break;
6681                         }
6682                     }
6683                     else {
6684                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6685                     }
6686                 }
6687                 else if (isDIGIT(*s))
6688                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6689                 else if (*s == '.' && isDIGIT(s[1]))
6690                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6691                 else if ((*s == '?' || *s == '-' || *s == '+')
6692                          && !isSPACE(s[1]) && s[1] != '=')
6693                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6694                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6695                          && s[1] != '/')
6696                     PL_expect = XTERM;          /* e.g. print $fh /.../
6697                                                    XXX except DORDOR operator
6698                                                 */
6699                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6700                          && s[2] != '=')
6701                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6702             }
6703         }
6704         force_ident_maybe_lex('$');
6705         TOKEN('$');
6706
6707     case '@':
6708         if (PL_expect == XPOSTDEREF)
6709             POSTDEREF('@');
6710         PL_tokenbuf[0] = '@';
6711         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6712         if (PL_expect == XOPERATOR) {
6713             d = s;
6714             if (PL_bufptr > s) {
6715                 d = PL_bufptr-1;
6716                 PL_bufptr = PL_oldbufptr;
6717             }
6718             no_op("Array", d);
6719         }
6720         pl_yylval.ival = 0;
6721         if (!PL_tokenbuf[1]) {
6722             PREREF('@');
6723         }
6724         if (PL_lex_state == LEX_NORMAL)
6725             s = skipspace(s);
6726         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6727             if (*s == '{')
6728                 PL_tokenbuf[0] = '%';
6729
6730             /* Warn about @ where they meant $. */
6731             if (*s == '[' || *s == '{') {
6732                 if (ckWARN(WARN_SYNTAX)) {
6733                     S_check_scalar_slice(aTHX_ s);
6734                 }
6735             }
6736         }
6737         PL_expect = XOPERATOR;
6738         force_ident_maybe_lex('@');
6739         TERM('@');
6740
6741      case '/':                  /* may be division, defined-or, or pattern */
6742         if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6743             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6744                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6745                 TOKEN(0);
6746             s += 2;
6747             AOPERATOR(DORDOR);
6748         }
6749         else if (PL_expect == XOPERATOR) {
6750             s++;
6751             if (*s == '=' && !PL_lex_allbrackets
6752                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6753             {
6754                 s--;
6755                 TOKEN(0);
6756             }
6757             Mop(OP_DIVIDE);
6758         }
6759         else {
6760             /* Disable warning on "study /blah/" */
6761             if (    PL_oldoldbufptr == PL_last_uni
6762                 && (   *PL_last_uni != 's' || s - PL_last_uni < 5
6763                     || memNE(PL_last_uni, "study", 5)
6764                     || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6765              ))
6766                 check_uni();
6767             s = scan_pat(s,OP_MATCH);
6768             TERM(sublex_start());
6769         }
6770
6771      case '?':                  /* conditional */
6772         s++;
6773         if (!PL_lex_allbrackets
6774             && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
6775         {
6776             s--;
6777             TOKEN(0);
6778         }
6779         PL_lex_allbrackets++;
6780         OPERATOR('?');
6781
6782     case '.':
6783         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6784 #ifdef PERL_STRICT_CR
6785             && s[1] == '\n'
6786 #else
6787             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6788 #endif
6789             && (s == PL_linestart || s[-1] == '\n') )
6790         {
6791             PL_expect = XSTATE;
6792             formbrack = 2; /* dot seen where arguments expected */
6793             goto rightbracket;
6794         }
6795         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6796             s += 3;
6797             OPERATOR(YADAYADA);
6798         }
6799         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6800             char tmp = *s++;
6801             if (*s == tmp) {
6802                 if (!PL_lex_allbrackets
6803                     && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
6804                 {
6805                     s--;
6806                     TOKEN(0);
6807                 }
6808                 s++;
6809                 if (*s == tmp) {
6810                     s++;
6811                     pl_yylval.ival = OPf_SPECIAL;
6812                 }
6813                 else
6814                     pl_yylval.ival = 0;
6815                 OPERATOR(DOTDOT);
6816             }
6817             if (*s == '=' && !PL_lex_allbrackets
6818                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6819             {
6820                 s--;
6821                 TOKEN(0);
6822             }
6823             Aop(OP_CONCAT);
6824         }
6825         /* FALLTHROUGH */
6826     case '0': case '1': case '2': case '3': case '4':
6827     case '5': case '6': case '7': case '8': case '9':
6828         s = scan_num(s, &pl_yylval);
6829         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6830         if (PL_expect == XOPERATOR)
6831             no_op("Number",s);
6832         TERM(THING);
6833
6834     case '\'':
6835         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6836         if (!s)
6837             missingterm(NULL);
6838         COPLINE_SET_FROM_MULTI_END;
6839         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6840         if (PL_expect == XOPERATOR) {
6841             no_op("String",s);
6842         }
6843         pl_yylval.ival = OP_CONST;
6844         TERM(sublex_start());
6845
6846     case '"':
6847         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6848         DEBUG_T( {
6849             if (s)
6850                 printbuf("### Saw string before %s\n", s);
6851             else
6852                 PerlIO_printf(Perl_debug_log,
6853                              "### Saw unterminated string\n");
6854         } );
6855         if (PL_expect == XOPERATOR) {
6856                 no_op("String",s);
6857         }
6858         if (!s)
6859             missingterm(NULL);
6860         pl_yylval.ival = OP_CONST;
6861         /* FIXME. I think that this can be const if char *d is replaced by
6862            more localised variables.  */
6863         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6864             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6865                 pl_yylval.ival = OP_STRINGIFY;
6866                 break;
6867             }
6868         }
6869         if (pl_yylval.ival == OP_CONST)
6870             COPLINE_SET_FROM_MULTI_END;
6871         TERM(sublex_start());
6872
6873     case '`':
6874         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6875         DEBUG_T( {
6876             if (s)
6877                 printbuf("### Saw backtick string before %s\n", s);
6878             else
6879                 PerlIO_printf(Perl_debug_log,
6880                              "### Saw unterminated backtick string\n");
6881         } );
6882         if (PL_expect == XOPERATOR)
6883             no_op("Backticks",s);
6884         if (!s)
6885             missingterm(NULL);
6886         pl_yylval.ival = OP_BACKTICK;
6887         TERM(sublex_start());
6888
6889     case '\\':
6890         s++;
6891         if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6892          && isDIGIT(*s))
6893             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6894                            *s, *s);
6895         if (PL_expect == XOPERATOR)
6896             no_op("Backslash",s);
6897         OPERATOR(REFGEN);
6898
6899     case 'v':
6900         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6901             char *start = s + 2;
6902             while (isDIGIT(*start) || *start == '_')
6903                 start++;
6904             if (*start == '.' && isDIGIT(start[1])) {
6905                 s = scan_num(s, &pl_yylval);
6906                 TERM(THING);
6907             }
6908             else if ((*start == ':' && start[1] == ':')
6909                   || (PL_expect == XSTATE && *start == ':'))
6910                 goto keylookup;
6911             else if (PL_expect == XSTATE) {
6912                 d = start;
6913                 while (d < PL_bufend && isSPACE(*d)) d++;
6914                 if (*d == ':') goto keylookup;
6915             }
6916             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6917             if (!isALPHA(*start) && (PL_expect == XTERM
6918                         || PL_expect == XREF || PL_expect == XSTATE
6919                         || PL_expect == XTERMORDORDOR)) {
6920                 GV *const gv = gv_fetchpvn_flags(s, start - s,
6921                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
6922                 if (!gv) {
6923                     s = scan_num(s, &pl_yylval);
6924                     TERM(THING);
6925                 }
6926             }
6927         }
6928         goto keylookup;
6929     case 'x':
6930         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6931             s++;
6932             Mop(OP_REPEAT);
6933         }
6934         goto keylookup;
6935
6936     case '_':
6937     case 'a': case 'A':
6938     case 'b': case 'B':
6939     case 'c': case 'C':
6940     case 'd': case 'D':
6941     case 'e': case 'E':
6942     case 'f': case 'F':
6943     case 'g': case 'G':
6944     case 'h': case 'H':
6945     case 'i': case 'I':
6946     case 'j': case 'J':
6947     case 'k': case 'K':
6948     case 'l': case 'L':
6949     case 'm': case 'M':
6950     case 'n': case 'N':
6951     case 'o': case 'O':
6952     case 'p': case 'P':
6953     case 'q': case 'Q':
6954     case 'r': case 'R':
6955     case 's': case 'S':
6956     case 't': case 'T':
6957     case 'u': case 'U':
6958               case 'V':
6959     case 'w': case 'W':
6960               case 'X':
6961     case 'y': case 'Y':
6962     case 'z': case 'Z':
6963
6964       keylookup: {
6965         bool anydelim;
6966         bool lex;
6967         I32 tmp;
6968         SV *sv;
6969         CV *cv;
6970         PADOFFSET off;
6971         OP *rv2cv_op;
6972
6973         lex = FALSE;
6974         orig_keyword = 0;
6975         off = 0;
6976         sv = NULL;
6977         cv = NULL;
6978         gv = NULL;
6979         gvp = NULL;
6980         rv2cv_op = NULL;
6981
6982         PL_bufptr = s;
6983         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6984
6985         /* Some keywords can be followed by any delimiter, including ':' */
6986         anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
6987
6988         /* x::* is just a word, unless x is "CORE" */
6989         if (!anydelim && *s == ':' && s[1] == ':') {
6990             if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
6991             goto just_a_word;
6992         }
6993
6994         d = s;
6995         while (d < PL_bufend && isSPACE(*d))
6996                 d++;    /* no comments skipped here, or s### is misparsed */
6997
6998         /* Is this a word before a => operator? */
6999         if (*d == '=' && d[1] == '>') {
7000           fat_arrow:
7001             CLINE;
7002             pl_yylval.opval
7003                 = newSVOP(OP_CONST, 0,
7004                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7005             pl_yylval.opval->op_private = OPpCONST_BARE;
7006             TERM(BAREWORD);
7007         }
7008
7009         /* Check for plugged-in keyword */
7010         {
7011             OP *o;
7012             int result;
7013             char *saved_bufptr = PL_bufptr;
7014             PL_bufptr = s;
7015             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
7016             s = PL_bufptr;
7017             if (result == KEYWORD_PLUGIN_DECLINE) {
7018                 /* not a plugged-in keyword */
7019                 PL_bufptr = saved_bufptr;
7020             } else if (result == KEYWORD_PLUGIN_STMT) {
7021                 pl_yylval.opval = o;
7022                 CLINE;
7023                 if (!PL_nexttoke) PL_expect = XSTATE;
7024                 return REPORT(PLUGSTMT);
7025             } else if (result == KEYWORD_PLUGIN_EXPR) {
7026                 pl_yylval.opval = o;
7027                 CLINE;
7028                 if (!PL_nexttoke) PL_expect = XOPERATOR;
7029                 return REPORT(PLUGEXPR);
7030             } else {
7031                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
7032                                         PL_tokenbuf);
7033             }
7034         }
7035
7036         /* Check for built-in keyword */
7037         tmp = keyword(PL_tokenbuf, len, 0);
7038
7039         /* Is this a label? */
7040         if (!anydelim && PL_expect == XSTATE
7041               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7042             s = d + 1;
7043             pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
7044             pl_yylval.pval[len] = '\0';
7045             pl_yylval.pval[len+1] = UTF ? 1 : 0;
7046             CLINE;
7047             TOKEN(LABEL);
7048         }
7049
7050         /* Check for lexical sub */
7051         if (PL_expect != XOPERATOR) {
7052             char tmpbuf[sizeof PL_tokenbuf + 1];
7053             *tmpbuf = '&';
7054             Copy(PL_tokenbuf, tmpbuf+1, len, char);
7055             off = pad_findmy_pvn(tmpbuf, len+1, 0);
7056             if (off != NOT_IN_PAD) {
7057                 assert(off); /* we assume this is boolean-true below */
7058                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7059                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
7060                     HEK * const stashname = HvNAME_HEK(stash);
7061                     sv = newSVhek(stashname);
7062                     sv_catpvs(sv, "::");
7063                     sv_catpvn_flags(sv, PL_tokenbuf, len,
7064                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
7065                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7066                                     SVt_PVCV);
7067                     off = 0;
7068                     if (!gv) {
7069                         sv_free(sv);
7070                         sv = NULL;
7071                         goto just_a_word;
7072                     }
7073                 }
7074                 else {
7075                     rv2cv_op = newOP(OP_PADANY, 0);
7076                     rv2cv_op->op_targ = off;
7077                     cv = find_lexical_cv(off);
7078                 }
7079                 lex = TRUE;
7080                 goto just_a_word;
7081             }
7082             off = 0;
7083         }
7084
7085         if (tmp < 0) {                  /* second-class keyword? */
7086             GV *ogv = NULL;     /* override (winner) */
7087             GV *hgv = NULL;     /* hidden (loser) */
7088             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7089                 CV *cv;
7090                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7091                                             (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
7092                                             SVt_PVCV))
7093                     && (cv = GvCVu(gv)))
7094                 {
7095                     if (GvIMPORTED_CV(gv))
7096                         ogv = gv;
7097                     else if (! CvMETHOD(cv))
7098                         hgv = gv;
7099                 }
7100                 if (!ogv
7101                     && (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7102                                                               len, FALSE))
7103                     && (gv = *gvp)
7104                     && (isGV_with_GP(gv)
7105                         ? GvCVu(gv) && GvIMPORTED_CV(gv)
7106                         :   SvPCS_IMPORTED(gv)
7107                         && (gv_init(gv, PL_globalstash, PL_tokenbuf,
7108                                                                  len, 0), 1)))
7109                 {
7110                     ogv = gv;
7111                 }
7112             }
7113             if (ogv) {
7114                 orig_keyword = tmp;
7115                 tmp = 0;                /* overridden by import or by GLOBAL */
7116             }
7117             else if (gv && !gvp
7118                      && -tmp==KEY_lock  /* XXX generalizable kludge */
7119                      && GvCVu(gv))
7120             {
7121                 tmp = 0;                /* any sub overrides "weak" keyword */
7122             }
7123             else {                      /* no override */
7124                 tmp = -tmp;
7125                 if (tmp == KEY_dump) {
7126                     Perl_ck_warner_d(aTHX_ packWARN2(WARN_MISC,WARN_DEPRECATED),
7127                                      "dump() better written as CORE::dump(). "
7128                                      "dump() will no longer be available "
7129                                      "in Perl 5.30");
7130                 }
7131                 gv = NULL;
7132                 gvp = 0;
7133                 if (hgv && tmp != KEY_x)        /* never ambiguous */
7134                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7135                                    "Ambiguous call resolved as CORE::%s(), "
7136                                    "qualify as such or use &",
7137                                    GvENAME(hgv));
7138             }
7139         }
7140
7141         if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7142          && (!anydelim || *s != '#')) {
7143             /* no override, and not s### either; skipspace is safe here
7144              * check for => on following line */
7145             bool arrow;
7146             STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7147             STRLEN   soff = s         - SvPVX(PL_linestr);
7148             s = peekspace(s);
7149             arrow = *s == '=' && s[1] == '>';
7150             PL_bufptr = SvPVX(PL_linestr) + bufoff;
7151             s         = SvPVX(PL_linestr) +   soff;
7152             if (arrow)
7153                 goto fat_arrow;
7154         }
7155
7156       reserved_word:
7157         switch (tmp) {
7158
7159             /* Trade off - by using this evil construction we can pull the
7160                variable gv into the block labelled keylookup. If not, then
7161                we have to give it function scope so that the goto from the
7162                earlier ':' case doesn't bypass the initialisation.  */
7163             just_a_word_zero_gv:
7164                 sv = NULL;
7165                 cv = NULL;
7166                 gv = NULL;
7167                 gvp = NULL;
7168                 rv2cv_op = NULL;
7169                 orig_keyword = 0;
7170                 lex = 0;
7171                 off = 0;
7172         default:                        /* not a keyword */
7173           just_a_word: {
7174                 int pkgname = 0;
7175                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7176                 bool safebw;
7177
7178
7179                 /* Get the rest if it looks like a package qualifier */
7180
7181                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7182                     STRLEN morelen;
7183                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7184                                   TRUE, &morelen);
7185                     if (!morelen)
7186                         Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7187                                 UTF8fARG(UTF, len, PL_tokenbuf),
7188                                 *s == '\'' ? "'" : "::");
7189                     len += morelen;
7190                     pkgname = 1;
7191                 }
7192
7193                 if (PL_expect == XOPERATOR) {
7194                     if (PL_bufptr == PL_linestart) {
7195                         CopLINE_dec(PL_curcop);
7196                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7197                         CopLINE_inc(PL_curcop);
7198                     }
7199                     else
7200                         no_op("Bareword",s);
7201                 }
7202
7203                 /* See if the name is "Foo::",
7204                    in which case Foo is a bareword
7205                    (and a package name). */
7206
7207                 if (len > 2
7208                     && PL_tokenbuf[len - 2] == ':'
7209                     && PL_tokenbuf[len - 1] == ':')
7210                 {
7211                     if (ckWARN(WARN_BAREWORD)
7212                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7213                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7214                                     "Bareword \"%" UTF8f
7215                                     "\" refers to nonexistent package",
7216                                     UTF8fARG(UTF, len, PL_tokenbuf));
7217                     len -= 2;
7218                     PL_tokenbuf[len] = '\0';
7219                     gv = NULL;
7220                     gvp = 0;
7221                     safebw = TRUE;
7222                 }
7223                 else {
7224                     safebw = FALSE;
7225                 }
7226
7227                 /* if we saw a global override before, get the right name */
7228
7229                 if (!sv)
7230                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7231                                                 len);
7232                 if (gvp) {
7233                     SV * const tmp_sv = sv;
7234                     sv = newSVpvs("CORE::GLOBAL::");
7235                     sv_catsv(sv, tmp_sv);
7236                     SvREFCNT_dec(tmp_sv);
7237                 }
7238
7239
7240                 /* Presume this is going to be a bareword of some sort. */
7241                 CLINE;
7242                 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
7243                 pl_yylval.opval->op_private = OPpCONST_BARE;
7244
7245                 /* And if "Foo::", then that's what it certainly is. */
7246                 if (safebw)
7247                     goto safe_bareword;
7248
7249                 if (!off)
7250                 {
7251                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7252                     const_op->op_private = OPpCONST_BARE;
7253                     rv2cv_op =
7254                         newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7255                     cv = lex
7256                         ? isGV(gv)
7257                             ? GvCV(gv)
7258                             : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
7259                                 ? (CV *)SvRV(gv)
7260                                 : ((CV *)gv)
7261                         : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
7262                 }
7263
7264                 /* Use this var to track whether intuit_method has been
7265                    called.  intuit_method returns 0 or > 255.  */
7266                 tmp = 1;
7267
7268                 /* See if it's the indirect object for a list operator. */
7269
7270                 if (PL_oldoldbufptr
7271                     && PL_oldoldbufptr < PL_bufptr
7272                     && (PL_oldoldbufptr == PL_last_lop
7273                         || PL_oldoldbufptr == PL_last_uni)
7274                     && /* NO SKIPSPACE BEFORE HERE! */
7275                        (PL_expect == XREF
7276                         || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7277                                                                == OA_FILEREF))
7278                 {
7279                     bool immediate_paren = *s == '(';
7280                     SSize_t s_off;
7281
7282                     /* (Now we can afford to cross potential line boundary.) */
7283                     s = skipspace(s);
7284
7285                     /* intuit_method() can indirectly call lex_next_chunk(),
7286                      * invalidating s
7287                      */
7288                     s_off = s - SvPVX(PL_linestr);
7289                     /* Two barewords in a row may indicate method call. */
7290                     if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7291                             || *s == '$')
7292                         && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
7293                     {
7294                         /* the code at method: doesn't use s */
7295                         goto method;
7296                     }
7297                     s = SvPVX(PL_linestr) + s_off;
7298
7299                     /* If not a declared subroutine, it's an indirect object. */
7300                     /* (But it's an indir obj regardless for sort.) */
7301                     /* Also, if "_" follows a filetest operator, it's a bareword */
7302
7303                     if (
7304                         ( !immediate_paren && (PL_last_lop_op == OP_SORT
7305                          || (!cv
7306                              && (PL_last_lop_op != OP_MAPSTART
7307                                  && PL_last_lop_op != OP_GREPSTART))))
7308                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7309                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7310                                                             == OA_FILESTATOP))
7311                        )
7312                     {
7313                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7314                         goto bareword;
7315                     }
7316                 }
7317
7318                 PL_expect = XOPERATOR;
7319                 s = skipspace(s);
7320
7321                 /* Is this a word before a => operator? */
7322                 if (*s == '=' && s[1] == '>' && !pkgname) {
7323                     op_free(rv2cv_op);
7324                     CLINE;
7325                     if (gvp || (lex && !off)) {
7326                         assert (cSVOPx(pl_yylval.opval)->op_sv == sv);
7327                         /* This is our own scalar, created a few lines
7328                            above, so this is safe. */
7329                         SvREADONLY_off(sv);
7330                         sv_setpv(sv, PL_tokenbuf);
7331                         if (UTF && !IN_BYTES
7332                          && is_utf8_string((U8*)PL_tokenbuf, len))
7333                               SvUTF8_on(sv);
7334                         SvREADONLY_on(sv);
7335                     }
7336                     TERM(BAREWORD);
7337                 }
7338
7339                 /* If followed by a paren, it's certainly a subroutine. */
7340                 if (*s == '(') {
7341                     CLINE;
7342                     if (cv) {
7343                         d = s + 1;
7344                         while (SPACE_OR_TAB(*d))
7345                             d++;
7346                         if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7347                             s = d + 1;
7348                             goto its_constant;
7349                         }
7350                     }
7351                     NEXTVAL_NEXTTOKE.opval =
7352                         off ? rv2cv_op : pl_yylval.opval;
7353                     if (off)
7354                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
7355                     else op_free(rv2cv_op),        force_next(BAREWORD);
7356                     pl_yylval.ival = 0;
7357                     TOKEN('&');
7358                 }
7359
7360                 /* If followed by var or block, call it a method (unless sub) */
7361
7362                 if ((*s == '$' || *s == '{') && !cv) {
7363                     op_free(rv2cv_op);
7364                     PL_last_lop = PL_oldbufptr;
7365                     PL_last_lop_op = OP_METHOD;
7366                     if (!PL_lex_allbrackets
7367                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7368                     {
7369                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7370                     }
7371                     PL_expect = XBLOCKTERM;
7372                     PL_bufptr = s;
7373                     return REPORT(METHOD);
7374                 }
7375
7376                 /* If followed by a bareword, see if it looks like indir obj. */
7377
7378                 if (   tmp == 1
7379                     && !orig_keyword
7380                     && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7381                     && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
7382                 {
7383                   method:
7384                     if (lex && !off) {
7385                         assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
7386                         SvREADONLY_off(sv);
7387                         sv_setpvn(sv, PL_tokenbuf, len);
7388                         if (UTF && !IN_BYTES
7389                          && is_utf8_string((U8*)PL_tokenbuf, len))
7390                             SvUTF8_on (sv);
7391                         else SvUTF8_off(sv);
7392                     }
7393                     op_free(rv2cv_op);
7394                     if (tmp == METHOD && !PL_lex_allbrackets
7395                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7396                     {
7397                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7398                     }
7399                     return REPORT(tmp);
7400                 }
7401
7402                 /* Not a method, so call it a subroutine (if defined) */
7403
7404                 if (cv) {
7405                     /* Check for a constant sub */
7406                     if ((sv = cv_const_sv_or_av(cv))) {
7407                   its_constant:
7408                         op_free(rv2cv_op);
7409                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7410                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7411                         if (SvTYPE(sv) == SVt_PVAV)
7412                             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7413                                                       pl_yylval.opval);
7414                         else {
7415                             pl_yylval.opval->op_private = 0;
7416                             pl_yylval.opval->op_folded = 1;
7417                             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7418                         }
7419                         TOKEN(BAREWORD);
7420                     }
7421
7422                     op_free(pl_yylval.opval);
7423                     pl_yylval.opval =
7424                         off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7425                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7426                     PL_last_lop = PL_oldbufptr;
7427                     PL_last_lop_op = OP_ENTERSUB;
7428                     /* Is there a prototype? */
7429                     if (
7430                         SvPOK(cv))
7431                     {
7432                         STRLEN protolen = CvPROTOLEN(cv);
7433                         const char *proto = CvPROTO(cv);
7434                         bool optional;
7435                         proto = S_strip_spaces(aTHX_ proto, &protolen);
7436                         if (!protolen)
7437                             TERM(FUNC0SUB);
7438                         if ((optional = *proto == ';'))
7439                           do
7440                             proto++;
7441                           while (*proto == ';');
7442                         if (
7443                             (
7444                                 (
7445                                     *proto == '$' || *proto == '_'
7446                                  || *proto == '*' || *proto == '+'
7447                                 )
7448                              && proto[1] == '\0'
7449                             )
7450                          || (
7451                              *proto == '\\' && proto[1] && proto[2] == '\0'
7452                             )
7453                         )
7454                             UNIPROTO(UNIOPSUB,optional);
7455                         if (*proto == '\\' && proto[1] == '[') {
7456                             const char *p = proto + 2;
7457                             while(*p && *p != ']')
7458                                 ++p;
7459                             if(*p == ']' && !p[1])
7460                                 UNIPROTO(UNIOPSUB,optional);
7461                         }
7462                         if (*proto == '&' && *s == '{') {
7463                             if (PL_curstash)
7464                                 sv_setpvs(PL_subname, "__ANON__");
7465                             else
7466                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7467                             if (!PL_lex_allbrackets
7468                                 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7469                             {
7470                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7471                             }
7472                             PREBLOCK(LSTOPSUB);
7473                         }
7474                     }
7475                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7476                     PL_expect = XTERM;
7477                     force_next(off ? PRIVATEREF : BAREWORD);
7478                     if (!PL_lex_allbrackets
7479                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7480                     {
7481                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7482                     }
7483                     TOKEN(NOAMP);
7484                 }
7485
7486                 /* Call it a bare word */
7487
7488                 if (PL_hints & HINT_STRICT_SUBS)
7489                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
7490                 else {
7491                 bareword:
7492                     /* after "print" and similar functions (corresponding to
7493                      * "F? L" in opcode.pl), whatever wasn't already parsed as
7494                      * a filehandle should be subject to "strict subs".
7495                      * Likewise for the optional indirect-object argument to system
7496                      * or exec, which can't be a bareword */
7497                     if ((PL_last_lop_op == OP_PRINT
7498                             || PL_last_lop_op == OP_PRTF
7499                             || PL_last_lop_op == OP_SAY
7500                             || PL_last_lop_op == OP_SYSTEM
7501                             || PL_last_lop_op == OP_EXEC)
7502                             && (PL_hints & HINT_STRICT_SUBS))
7503                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7504                     if (lastchar != '-') {
7505                         if (ckWARN(WARN_RESERVED)) {
7506                             d = PL_tokenbuf;
7507                             while (isLOWER(*d))
7508                                 d++;
7509                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7510                             {
7511                                 /* PL_warn_reserved is constant */
7512                                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
7513                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7514                                        PL_tokenbuf);
7515                                 GCC_DIAG_RESTORE;
7516                             }
7517                         }
7518                     }
7519                 }
7520                 op_free(rv2cv_op);
7521
7522             safe_bareword:
7523                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7524                  && saw_infix_sigil) {
7525                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7526                                      "Operator or semicolon missing before %c%" UTF8f,
7527                                      lastchar,
7528                                      UTF8fARG(UTF, strlen(PL_tokenbuf),
7529                                               PL_tokenbuf));
7530                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7531                                      "Ambiguous use of %c resolved as operator %c",
7532                                      lastchar, lastchar);
7533                 }
7534                 TOKEN(BAREWORD);
7535             }
7536
7537         case KEY___FILE__:
7538             FUN0OP(
7539                 newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7540             );
7541
7542         case KEY___LINE__:
7543             FUN0OP(
7544                 newSVOP(OP_CONST, 0,
7545                     Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7546             );
7547
7548         case KEY___PACKAGE__:
7549             FUN0OP(
7550                 newSVOP(OP_CONST, 0,
7551                                         (PL_curstash
7552                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7553                                          : &PL_sv_undef))
7554             );
7555
7556         case KEY___DATA__:
7557         case KEY___END__: {
7558             GV *gv;
7559             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7560                 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7561                                         ? PL_curstash
7562                                         : PL_defstash;
7563                 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7564                 if (!isGV(gv))
7565                     gv_init(gv,stash,"DATA",4,0);
7566                 GvMULTI_on(gv);
7567                 if (!GvIO(gv))
7568                     GvIOp(gv) = newIO();
7569                 IoIFP(GvIOp(gv)) = PL_rsfp;
7570 #if defined(HAS_FCNTL) && defined(F_SETFD) && defined(FD_CLOEXEC)
7571                 {
7572                     const int fd = PerlIO_fileno(PL_rsfp);
7573                     if (fd >= 3) {
7574                         fcntl(fd,F_SETFD, FD_CLOEXEC);
7575                     }
7576                 }
7577 #endif
7578                 /* Mark this internal pseudo-handle as clean */
7579                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7580                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7581                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7582                 else
7583                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7584 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7585                 /* if the script was opened in binmode, we need to revert
7586                  * it to text mode for compatibility; but only iff it has CRs
7587                  * XXX this is a questionable hack at best. */
7588                 if (PL_bufend-PL_bufptr > 2
7589                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7590                 {
7591                     Off_t loc = 0;
7592                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7593                         loc = PerlIO_tell(PL_rsfp);
7594                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7595                     }
7596 #ifdef NETWARE
7597                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7598 #else
7599                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7600 #endif  /* NETWARE */
7601                         if (loc > 0)
7602                             PerlIO_seek(PL_rsfp, loc, 0);
7603                     }
7604                 }
7605 #endif
7606 #ifdef PERLIO_LAYERS
7607                 if (!IN_BYTES) {
7608                     if (UTF)
7609                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7610                 }
7611 #endif
7612                 PL_rsfp = NULL;
7613             }
7614             goto fake_eof;
7615         }
7616
7617         case KEY___SUB__:
7618             FUN0OP(CvCLONE(PL_compcv)
7619                         ? newOP(OP_RUNCV, 0)
7620                         : newPVOP(OP_RUNCV,0,NULL));
7621
7622         case KEY_AUTOLOAD:
7623         case KEY_DESTROY:
7624         case KEY_BEGIN:
7625         case KEY_UNITCHECK:
7626         case KEY_CHECK:
7627         case KEY_INIT:
7628         case KEY_END:
7629             if (PL_expect == XSTATE) {
7630                 s = PL_bufptr;
7631                 goto really_sub;
7632             }
7633             goto just_a_word;
7634
7635         case_KEY_CORE:
7636             {
7637                 STRLEN olen = len;
7638                 d = s;
7639                 s += 2;
7640                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7641                 if ((*s == ':' && s[1] == ':')
7642                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7643                 {
7644                     s = d;
7645                     len = olen;
7646                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7647                     goto just_a_word;
7648                 }
7649                 if (!tmp)
7650                     Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
7651                                       UTF8fARG(UTF, len, PL_tokenbuf));
7652                 if (tmp < 0)
7653                     tmp = -tmp;
7654                 else if (tmp == KEY_require || tmp == KEY_do
7655                       || tmp == KEY_glob)
7656                     /* that's a way to remember we saw "CORE::" */
7657                     orig_keyword = tmp;
7658                 goto reserved_word;
7659             }
7660
7661         case KEY_abs:
7662             UNI(OP_ABS);
7663
7664         case KEY_alarm:
7665             UNI(OP_ALARM);
7666
7667         case KEY_accept:
7668             LOP(OP_ACCEPT,XTERM);
7669
7670         case KEY_and:
7671             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7672                 return REPORT(0);
7673             OPERATOR(ANDOP);
7674
7675         case KEY_atan2:
7676             LOP(OP_ATAN2,XTERM);
7677
7678         case KEY_bind:
7679             LOP(OP_BIND,XTERM);
7680
7681         case KEY_binmode:
7682             LOP(OP_BINMODE,XTERM);
7683
7684         case KEY_bless:
7685             LOP(OP_BLESS,XTERM);
7686
7687         case KEY_break:
7688             FUN0(OP_BREAK);
7689
7690         case KEY_chop:
7691             UNI(OP_CHOP);
7692
7693         case KEY_continue:
7694                     /* We have to disambiguate the two senses of
7695                       "continue". If the next token is a '{' then
7696                       treat it as the start of a continue block;
7697                       otherwise treat it as a control operator.
7698                      */
7699                     s = skipspace(s);
7700                     if (*s == '{')
7701             PREBLOCK(CONTINUE);
7702                     else
7703                         FUN0(OP_CONTINUE);
7704
7705         case KEY_chdir:
7706             /* may use HOME */
7707             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7708             UNI(OP_CHDIR);
7709
7710         case KEY_close:
7711             UNI(OP_CLOSE);
7712
7713         case KEY_closedir:
7714             UNI(OP_CLOSEDIR);
7715
7716         case KEY_cmp:
7717             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7718                 return REPORT(0);
7719             Eop(OP_SCMP);
7720
7721         case KEY_caller:
7722             UNI(OP_CALLER);
7723
7724         case KEY_crypt:
7725 #ifdef FCRYPT
7726             if (!PL_cryptseen) {
7727                 PL_cryptseen = TRUE;
7728                 init_des();
7729             }
7730 #endif
7731             LOP(OP_CRYPT,XTERM);
7732
7733         case KEY_chmod:
7734             LOP(OP_CHMOD,XTERM);
7735
7736         case KEY_chown:
7737             LOP(OP_CHOWN,XTERM);
7738
7739         case KEY_connect:
7740             LOP(OP_CONNECT,XTERM);
7741
7742         case KEY_chr:
7743             UNI(OP_CHR);
7744
7745         case KEY_cos:
7746             UNI(OP_COS);
7747
7748         case KEY_chroot:
7749             UNI(OP_CHROOT);
7750
7751         case KEY_default:
7752             PREBLOCK(DEFAULT);
7753
7754         case KEY_do:
7755             s = skipspace(s);
7756             if (*s == '{')
7757                 PRETERMBLOCK(DO);
7758             if (*s != '\'') {
7759                 *PL_tokenbuf = '&';
7760                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7761                               1, &len);
7762                 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7763                  && !keyword(PL_tokenbuf + 1, len, 0)) {
7764                     SSize_t off = s-SvPVX(PL_linestr);
7765                     d = skipspace(d);
7766                     s = SvPVX(PL_linestr)+off;
7767                     if (*d == '(') {
7768                         force_ident_maybe_lex('&');
7769                         s = d;
7770                     }
7771                 }
7772             }
7773             if (orig_keyword == KEY_do) {
7774                 orig_keyword = 0;
7775                 pl_yylval.ival = 1;
7776             }
7777             else
7778                 pl_yylval.ival = 0;
7779             OPERATOR(DO);
7780
7781         case KEY_die:
7782             PL_hints |= HINT_BLOCK_SCOPE;
7783             LOP(OP_DIE,XTERM);
7784
7785         case KEY_defined:
7786             UNI(OP_DEFINED);
7787
7788         case KEY_delete:
7789             UNI(OP_DELETE);
7790
7791         case KEY_dbmopen:
7792             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7793                               STR_WITH_LEN("NDBM_File::"),
7794                               STR_WITH_LEN("DB_File::"),
7795                               STR_WITH_LEN("GDBM_File::"),
7796                               STR_WITH_LEN("SDBM_File::"),
7797                               STR_WITH_LEN("ODBM_File::"),
7798                               NULL);
7799             LOP(OP_DBMOPEN,XTERM);
7800
7801         case KEY_dbmclose:
7802             UNI(OP_DBMCLOSE);
7803
7804         case KEY_dump:
7805             LOOPX(OP_DUMP);
7806
7807         case KEY_else:
7808             PREBLOCK(ELSE);
7809
7810         case KEY_elsif:
7811             pl_yylval.ival = CopLINE(PL_curcop);
7812             OPERATOR(ELSIF);
7813
7814         case KEY_eq:
7815             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7816                 return REPORT(0);
7817             Eop(OP_SEQ);
7818
7819         case KEY_exists:
7820             UNI(OP_EXISTS);
7821
7822         case KEY_exit:
7823             UNI(OP_EXIT);
7824
7825         case KEY_eval:
7826             s = skipspace(s);
7827             if (*s == '{') { /* block eval */
7828                 PL_expect = XTERMBLOCK;
7829                 UNIBRACK(OP_ENTERTRY);
7830             }
7831             else { /* string eval */
7832                 PL_expect = XTERM;
7833                 UNIBRACK(OP_ENTEREVAL);
7834             }
7835
7836         case KEY_evalbytes:
7837             PL_expect = XTERM;
7838             UNIBRACK(-OP_ENTEREVAL);
7839
7840         case KEY_eof:
7841             UNI(OP_EOF);
7842
7843         case KEY_exp:
7844             UNI(OP_EXP);
7845
7846         case KEY_each:
7847             UNI(OP_EACH);
7848
7849         case KEY_exec:
7850             LOP(OP_EXEC,XREF);
7851
7852         case KEY_endhostent:
7853             FUN0(OP_EHOSTENT);
7854
7855         case KEY_endnetent:
7856             FUN0(OP_ENETENT);
7857
7858         case KEY_endservent:
7859             FUN0(OP_ESERVENT);
7860
7861         case KEY_endprotoent:
7862             FUN0(OP_EPROTOENT);
7863
7864         case KEY_endpwent:
7865             FUN0(OP_EPWENT);
7866
7867         case KEY_endgrent:
7868             FUN0(OP_EGRENT);
7869
7870         case KEY_for:
7871         case KEY_foreach:
7872             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7873                 return REPORT(0);
7874             pl_yylval.ival = CopLINE(PL_curcop);
7875             s = skipspace(s);
7876             if (   PL_expect == XSTATE
7877                 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
7878             {
7879                 char *p = s;
7880                 SSize_t s_off = s - SvPVX(PL_linestr);
7881
7882                 if ((PL_bufend - p) >= 3
7883                     && strEQs(p, "my") && isSPACE(*(p + 2)))
7884                 {
7885                     p += 2;
7886                 }
7887                 else if ((PL_bufend - p) >= 4
7888                          && strEQs(p, "our") && isSPACE(*(p + 3)))
7889                     p += 3;
7890                 p = skipspace(p);
7891                 /* skip optional package name, as in "for my abc $x (..)" */
7892                 if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
7893                     p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7894                     p = skipspace(p);
7895                 }
7896                 if (*p != '$' && *p != '\\')
7897                     Perl_croak(aTHX_ "Missing $ on loop variable");
7898
7899                 /* The buffer may have been reallocated, update s */
7900                 s = SvPVX(PL_linestr) + s_off;
7901             }
7902             OPERATOR(FOR);
7903
7904         case KEY_formline:
7905             LOP(OP_FORMLINE,XTERM);
7906
7907         case KEY_fork:
7908             FUN0(OP_FORK);
7909
7910         case KEY_fc:
7911             UNI(OP_FC);
7912
7913         case KEY_fcntl:
7914             LOP(OP_FCNTL,XTERM);
7915
7916         case KEY_fileno:
7917             UNI(OP_FILENO);
7918
7919         case KEY_flock:
7920             LOP(OP_FLOCK,XTERM);
7921
7922         case KEY_gt:
7923             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7924                 return REPORT(0);
7925             Rop(OP_SGT);
7926
7927         case KEY_ge:
7928             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7929                 return REPORT(0);
7930             Rop(OP_SGE);
7931
7932         case KEY_grep:
7933             LOP(OP_GREPSTART, XREF);
7934
7935         case KEY_goto:
7936             LOOPX(OP_GOTO);
7937
7938         case KEY_gmtime:
7939             UNI(OP_GMTIME);
7940
7941         case KEY_getc:
7942             UNIDOR(OP_GETC);
7943
7944         case KEY_getppid:
7945             FUN0(OP_GETPPID);
7946
7947         case KEY_getpgrp:
7948             UNI(OP_GETPGRP);
7949
7950         case KEY_getpriority:
7951             LOP(OP_GETPRIORITY,XTERM);
7952
7953         case KEY_getprotobyname:
7954             UNI(OP_GPBYNAME);
7955
7956         case KEY_getprotobynumber:
7957             LOP(OP_GPBYNUMBER,XTERM);
7958
7959         case KEY_getprotoent:
7960             FUN0(OP_GPROTOENT);
7961
7962         case KEY_getpwent:
7963             FUN0(OP_GPWENT);
7964
7965         case KEY_getpwnam:
7966             UNI(OP_GPWNAM);
7967
7968         case KEY_getpwuid:
7969             UNI(OP_GPWUID);
7970
7971         case KEY_getpeername:
7972             UNI(OP_GETPEERNAME);
7973
7974         case KEY_gethostbyname:
7975             UNI(OP_GHBYNAME);
7976
7977         case KEY_gethostbyaddr:
7978             LOP(OP_GHBYADDR,XTERM);
7979
7980         case KEY_gethostent:
7981             FUN0(OP_GHOSTENT);
7982
7983         case KEY_getnetbyname:
7984             UNI(OP_GNBYNAME);
7985
7986         case KEY_getnetbyaddr:
7987             LOP(OP_GNBYADDR,XTERM);
7988
7989         case KEY_getnetent:
7990             FUN0(OP_GNETENT);
7991
7992         case KEY_getservbyname:
7993             LOP(OP_GSBYNAME,XTERM);
7994
7995         case KEY_getservbyport:
7996             LOP(OP_GSBYPORT,XTERM);
7997
7998         case KEY_getservent:
7999             FUN0(OP_GSERVENT);
8000
8001         case KEY_getsockname:
8002             UNI(OP_GETSOCKNAME);
8003
8004         case KEY_getsockopt:
8005             LOP(OP_GSOCKOPT,XTERM);
8006
8007         case KEY_getgrent:
8008             FUN0(OP_GGRENT);
8009
8010         case KEY_getgrnam:
8011             UNI(OP_GGRNAM);
8012
8013         case KEY_getgrgid:
8014             UNI(OP_GGRGID);
8015
8016         case KEY_getlogin:
8017             FUN0(OP_GETLOGIN);
8018
8019         case KEY_given:
8020             pl_yylval.ival = CopLINE(PL_curcop);
8021             Perl_ck_warner_d(aTHX_
8022                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8023                 "given is experimental");
8024             OPERATOR(GIVEN);
8025
8026         case KEY_glob:
8027             LOP(
8028              orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
8029              XTERM
8030             );
8031
8032         case KEY_hex:
8033             UNI(OP_HEX);
8034
8035         case KEY_if:
8036             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8037                 return REPORT(0);
8038             pl_yylval.ival = CopLINE(PL_curcop);
8039             OPERATOR(IF);
8040
8041         case KEY_index:
8042             LOP(OP_INDEX,XTERM);
8043
8044         case KEY_int:
8045             UNI(OP_INT);
8046
8047         case KEY_ioctl:
8048             LOP(OP_IOCTL,XTERM);
8049
8050         case KEY_join:
8051             LOP(OP_JOIN,XTERM);
8052
8053         case KEY_keys:
8054             UNI(OP_KEYS);
8055
8056         case KEY_kill:
8057             LOP(OP_KILL,XTERM);
8058
8059         case KEY_last:
8060             LOOPX(OP_LAST);
8061
8062         case KEY_lc:
8063             UNI(OP_LC);
8064
8065         case KEY_lcfirst:
8066             UNI(OP_LCFIRST);
8067
8068         case KEY_local:
8069             OPERATOR(LOCAL);
8070
8071         case KEY_length:
8072             UNI(OP_LENGTH);
8073
8074         case KEY_lt:
8075             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8076                 return REPORT(0);
8077             Rop(OP_SLT);
8078
8079         case KEY_le:
8080             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8081                 return REPORT(0);
8082             Rop(OP_SLE);
8083
8084         case KEY_localtime:
8085             UNI(OP_LOCALTIME);
8086
8087         case KEY_log:
8088             UNI(OP_LOG);
8089
8090         case KEY_link:
8091             LOP(OP_LINK,XTERM);
8092
8093         case KEY_listen:
8094             LOP(OP_LISTEN,XTERM);
8095
8096         case KEY_lock:
8097             UNI(OP_LOCK);
8098
8099         case KEY_lstat:
8100             UNI(OP_LSTAT);
8101
8102         case KEY_m:
8103             s = scan_pat(s,OP_MATCH);
8104             TERM(sublex_start());
8105
8106         case KEY_map:
8107             LOP(OP_MAPSTART, XREF);
8108
8109         case KEY_mkdir:
8110             LOP(OP_MKDIR,XTERM);
8111
8112         case KEY_msgctl:
8113             LOP(OP_MSGCTL,XTERM);
8114
8115         case KEY_msgget:
8116             LOP(OP_MSGGET,XTERM);
8117
8118         case KEY_msgrcv:
8119             LOP(OP_MSGRCV,XTERM);
8120
8121         case KEY_msgsnd:
8122             LOP(OP_MSGSND,XTERM);
8123
8124         case KEY_our:
8125         case KEY_my:
8126         case KEY_state:
8127             if (PL_in_my) {
8128                 PL_bufptr = s;
8129                 yyerror(Perl_form(aTHX_
8130                                   "Can't redeclare \"%s\" in \"%s\"",
8131                                    tmp      == KEY_my    ? "my" :
8132                                    tmp      == KEY_state ? "state" : "our",
8133                                    PL_in_my == KEY_my    ? "my" :
8134                                    PL_in_my == KEY_state ? "state" : "our"));
8135             }
8136             PL_in_my = (U16)tmp;
8137             s = skipspace(s);
8138             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8139                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8140                 if (len == 3 && strEQs(PL_tokenbuf, "sub"))
8141                     goto really_sub;
8142                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8143                 if (!PL_in_my_stash) {
8144                     char tmpbuf[1024];
8145                     int len;
8146                     PL_bufptr = s;
8147                     len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8148                     PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
8149                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8150                 }
8151             }
8152             else if (*s == '\\') {
8153                 if (!FEATURE_MYREF_IS_ENABLED)
8154                     Perl_croak(aTHX_ "The experimental declared_refs "
8155                                      "feature is not enabled");
8156                 Perl_ck_warner_d(aTHX_
8157                      packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
8158                     "Declaring references is experimental");
8159             }
8160             OPERATOR(MY);
8161
8162         case KEY_next:
8163             LOOPX(OP_NEXT);
8164
8165         case KEY_ne:
8166             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8167                 return REPORT(0);
8168             Eop(OP_SNE);
8169
8170         case KEY_no:
8171             s = tokenize_use(0, s);
8172             TOKEN(USE);
8173
8174         case KEY_not:
8175             if (*s == '(' || (s = skipspace(s), *s == '('))
8176                 FUN1(OP_NOT);
8177             else {
8178                 if (!PL_lex_allbrackets
8179                     && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8180                 {
8181                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8182                 }
8183                 OPERATOR(NOTOP);
8184             }
8185
8186         case KEY_open:
8187             s = skipspace(s);
8188             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8189                 const char *t;
8190                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8191                               &len);
8192                 for (t=d; isSPACE(*t);)
8193                     t++;
8194                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8195                     /* [perl #16184] */
8196                     && !(t[0] == '=' && t[1] == '>')
8197                     && !(t[0] == ':' && t[1] == ':')
8198                     && !keyword(s, d-s, 0)
8199                 ) {
8200                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8201                        "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8202                         UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8203                 }
8204             }
8205             LOP(OP_OPEN,XTERM);
8206
8207         case KEY_or:
8208             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8209                 return REPORT(0);
8210             pl_yylval.ival = OP_OR;
8211             OPERATOR(OROP);
8212
8213         case KEY_ord:
8214             UNI(OP_ORD);
8215
8216         case KEY_oct:
8217             UNI(OP_OCT);
8218
8219         case KEY_opendir:
8220             LOP(OP_OPEN_DIR,XTERM);
8221
8222         case KEY_print:
8223             checkcomma(s,PL_tokenbuf,"filehandle");
8224             LOP(OP_PRINT,XREF);
8225
8226         case KEY_printf:
8227             checkcomma(s,PL_tokenbuf,"filehandle");
8228             LOP(OP_PRTF,XREF);
8229
8230         case KEY_prototype:
8231             UNI(OP_PROTOTYPE);
8232
8233         case KEY_push:
8234             LOP(OP_PUSH,XTERM);
8235
8236         case KEY_pop:
8237             UNIDOR(OP_POP);
8238
8239         case KEY_pos:
8240             UNIDOR(OP_POS);
8241
8242         case KEY_pack:
8243             LOP(OP_PACK,XTERM);
8244
8245         case KEY_package:
8246             s = force_word(s,BAREWORD,FALSE,TRUE);
8247             s = skipspace(s);
8248             s = force_strict_version(s);
8249             PREBLOCK(PACKAGE);
8250
8251         case KEY_pipe:
8252             LOP(OP_PIPE_OP,XTERM);
8253
8254         case KEY_q:
8255             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8256             if (!s)
8257                 missingterm(NULL);
8258             COPLINE_SET_FROM_MULTI_END;
8259             pl_yylval.ival = OP_CONST;
8260             TERM(sublex_start());
8261
8262         case KEY_quotemeta:
8263             UNI(OP_QUOTEMETA);
8264
8265         case KEY_qw: {
8266             OP *words = NULL;
8267             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8268             if (!s)
8269                 missingterm(NULL);
8270             COPLINE_SET_FROM_MULTI_END;
8271             PL_expect = XOPERATOR;
8272             if (SvCUR(PL_lex_stuff)) {
8273                 int warned_comma = !ckWARN(WARN_QW);
8274                 int warned_comment = warned_comma;
8275                 d = SvPV_force(PL_lex_stuff, len);
8276                 while (len) {
8277                     for (; isSPACE(*d) && len; --len, ++d)
8278                         /**/;
8279                     if (len) {
8280                         SV *sv;
8281                         const char *b = d;
8282                         if (!warned_comma || !warned_comment) {
8283                             for (; !isSPACE(*d) && len; --len, ++d) {
8284                                 if (!warned_comma && *d == ',') {
8285                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8286                                         "Possible attempt to separate words with commas");
8287                                     ++warned_comma;
8288                                 }
8289                                 else if (!warned_comment && *d == '#') {
8290                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8291                                         "Possible attempt to put comments in qw() list");
8292                                     ++warned_comment;
8293                                 }
8294                             }
8295                         }
8296                         else {
8297                             for (; !isSPACE(*d) && len; --len, ++d)
8298                                 /**/;
8299                         }
8300                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8301                         words = op_append_elem(OP_LIST, words,
8302                                             newSVOP(OP_CONST, 0, tokeq(sv)));
8303                     }
8304                 }
8305             }
8306             if (!words)
8307                 words = newNULLLIST();
8308             SvREFCNT_dec_NN(PL_lex_stuff);
8309             PL_lex_stuff = NULL;
8310             PL_expect = XOPERATOR;
8311             pl_yylval.opval = sawparens(words);
8312             TOKEN(QWLIST);
8313         }
8314
8315         case KEY_qq:
8316             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8317             if (!s)
8318                 missingterm(NULL);
8319             pl_yylval.ival = OP_STRINGIFY;
8320             if (SvIVX(PL_lex_stuff) == '\'')
8321                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
8322             TERM(sublex_start());
8323
8324         case KEY_qr:
8325             s = scan_pat(s,OP_QR);
8326             TERM(sublex_start());
8327
8328         case KEY_qx:
8329             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8330             if (!s)
8331                 missingterm(NULL);
8332             pl_yylval.ival = OP_BACKTICK;
8333             TERM(sublex_start());
8334
8335         case KEY_return:
8336             OLDLOP(OP_RETURN);
8337
8338         case KEY_require:
8339             s = skipspace(s);
8340             if (isDIGIT(*s)) {
8341                 s = force_version(s, FALSE);
8342             }
8343             else if (*s != 'v' || !isDIGIT(s[1])
8344                     || (s = force_version(s, TRUE), *s == 'v'))
8345             {
8346                 *PL_tokenbuf = '\0';
8347                 s = force_word(s,BAREWORD,TRUE,TRUE);
8348                 if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
8349                                            PL_tokenbuf + sizeof(PL_tokenbuf),
8350                                            UTF))
8351                 {
8352                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8353                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
8354                 }
8355                 else if (*s == '<')
8356                     yyerror("<> at require-statement should be quotes");
8357             }
8358             if (orig_keyword == KEY_require) {
8359                 orig_keyword = 0;
8360                 pl_yylval.ival = 1;
8361             }
8362             else
8363                 pl_yylval.ival = 0;
8364             PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
8365             PL_bufptr = s;
8366             PL_last_uni = PL_oldbufptr;
8367             PL_last_lop_op = OP_REQUIRE;
8368             s = skipspace(s);
8369             return REPORT( (int)REQUIRE );
8370
8371         case KEY_reset:
8372             UNI(OP_RESET);
8373
8374         case KEY_redo:
8375             LOOPX(OP_REDO);
8376
8377         case KEY_rename:
8378             LOP(OP_RENAME,XTERM);
8379
8380         case KEY_rand:
8381             UNI(OP_RAND);
8382
8383         case KEY_rmdir:
8384             UNI(OP_RMDIR);
8385
8386         case KEY_rindex:
8387             LOP(OP_RINDEX,XTERM);
8388
8389         case KEY_read:
8390             LOP(OP_READ,XTERM);
8391
8392         case KEY_readdir:
8393             UNI(OP_READDIR);
8394
8395         case KEY_readline:
8396             UNIDOR(OP_READLINE);
8397
8398         case KEY_readpipe:
8399             UNIDOR(OP_BACKTICK);
8400
8401         case KEY_rewinddir:
8402             UNI(OP_REWINDDIR);
8403
8404         case KEY_recv:
8405             LOP(OP_RECV,XTERM);
8406
8407         case KEY_reverse:
8408             LOP(OP_REVERSE,XTERM);
8409
8410         case KEY_readlink:
8411             UNIDOR(OP_READLINK);
8412
8413         case KEY_ref:
8414             UNI(OP_REF);
8415
8416         case KEY_s:
8417             s = scan_subst(s);
8418             if (pl_yylval.opval)
8419                 TERM(sublex_start());
8420             else
8421                 TOKEN(1);       /* force error */
8422
8423         case KEY_say:
8424             checkcomma(s,PL_tokenbuf,"filehandle");
8425             LOP(OP_SAY,XREF);
8426
8427         case KEY_chomp:
8428             UNI(OP_CHOMP);
8429
8430         case KEY_scalar:
8431             UNI(OP_SCALAR);
8432
8433         case KEY_select:
8434             LOP(OP_SELECT,XTERM);
8435
8436         case KEY_seek:
8437             LOP(OP_SEEK,XTERM);
8438
8439         case KEY_semctl:
8440             LOP(OP_SEMCTL,XTERM);
8441
8442         case KEY_semget:
8443             LOP(OP_SEMGET,XTERM);
8444
8445         case KEY_semop:
8446             LOP(OP_SEMOP,XTERM);
8447
8448         case KEY_send:
8449             LOP(OP_SEND,XTERM);
8450
8451         case KEY_setpgrp:
8452             LOP(OP_SETPGRP,XTERM);
8453
8454         case KEY_setpriority:
8455             LOP(OP_SETPRIORITY,XTERM);
8456
8457         case KEY_sethostent:
8458             UNI(OP_SHOSTENT);
8459
8460         case KEY_setnetent:
8461             UNI(OP_SNETENT);
8462
8463         case KEY_setservent:
8464             UNI(OP_SSERVENT);
8465
8466         case KEY_setprotoent:
8467             UNI(OP_SPROTOENT);
8468
8469         case KEY_setpwent:
8470             FUN0(OP_SPWENT);
8471
8472         case KEY_setgrent:
8473             FUN0(OP_SGRENT);
8474
8475         case KEY_seekdir:
8476             LOP(OP_SEEKDIR,XTERM);
8477
8478         case KEY_setsockopt:
8479             LOP(OP_SSOCKOPT,XTERM);
8480
8481         case KEY_shift:
8482             UNIDOR(OP_SHIFT);
8483
8484         case KEY_shmctl:
8485             LOP(OP_SHMCTL,XTERM);
8486
8487         case KEY_shmget:
8488             LOP(OP_SHMGET,XTERM);
8489
8490         case KEY_shmread:
8491             LOP(OP_SHMREAD,XTERM);
8492
8493         case KEY_shmwrite:
8494             LOP(OP_SHMWRITE,XTERM);
8495
8496         case KEY_shutdown:
8497             LOP(OP_SHUTDOWN,XTERM);
8498
8499         case KEY_sin:
8500             UNI(OP_SIN);
8501
8502         case KEY_sleep:
8503             UNI(OP_SLEEP);
8504
8505         case KEY_socket:
8506             LOP(OP_SOCKET,XTERM);
8507
8508         case KEY_socketpair:
8509             LOP(OP_SOCKPAIR,XTERM);
8510
8511         case KEY_sort:
8512             checkcomma(s,PL_tokenbuf,"subroutine name");
8513             s = skipspace(s);
8514             PL_expect = XTERM;
8515             s = force_word(s,BAREWORD,TRUE,TRUE);
8516             LOP(OP_SORT,XREF);
8517
8518         case KEY_split:
8519             LOP(OP_SPLIT,XTERM);
8520
8521         case KEY_sprintf:
8522             LOP(OP_SPRINTF,XTERM);
8523
8524         case KEY_splice:
8525             LOP(OP_SPLICE,XTERM);
8526
8527         case KEY_sqrt:
8528             UNI(OP_SQRT);
8529
8530         case KEY_srand:
8531             UNI(OP_SRAND);
8532
8533         case KEY_stat:
8534             UNI(OP_STAT);
8535
8536         case KEY_study:
8537             UNI(OP_STUDY);
8538
8539         case KEY_substr:
8540             LOP(OP_SUBSTR,XTERM);
8541
8542         case KEY_format:
8543         case KEY_sub:
8544           really_sub:
8545             {
8546                 char * const tmpbuf = PL_tokenbuf + 1;
8547                 expectation attrful;
8548                 bool have_name, have_proto;
8549                 const int key = tmp;
8550                 SV *format_name = NULL;
8551
8552                 SSize_t off = s-SvPVX(PL_linestr);
8553                 s = skipspace(s);
8554                 d = SvPVX(PL_linestr)+off;
8555
8556                 if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
8557                     || *s == '\''
8558                     || (*s == ':' && s[1] == ':'))
8559                 {
8560
8561                     PL_expect = XBLOCK;
8562                     attrful = XATTRBLOCK;
8563                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8564                                   &len);
8565                     if (key == KEY_format)
8566                         format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8567                     *PL_tokenbuf = '&';
8568                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
8569                      || pad_findmy_pvn(
8570                             PL_tokenbuf, len + 1, 0
8571                         ) != NOT_IN_PAD)
8572                         sv_setpvn(PL_subname, tmpbuf, len);
8573                     else {
8574                         sv_setsv(PL_subname,PL_curstname);
8575                         sv_catpvs(PL_subname,"::");
8576                         sv_catpvn(PL_subname,tmpbuf,len);
8577                     }
8578                     if (SvUTF8(PL_linestr))
8579                         SvUTF8_on(PL_subname);
8580                     have_name = TRUE;
8581
8582
8583                     s = skipspace(d);
8584                 }
8585                 else {
8586                     if (key == KEY_my || key == KEY_our || key==KEY_state)
8587                     {
8588                         *d = '\0';
8589                         /* diag_listed_as: Missing name in "%s sub" */
8590                         Perl_croak(aTHX_
8591                                   "Missing name in \"%s\"", PL_bufptr);
8592                     }
8593                     PL_expect = XTERMBLOCK;
8594                     attrful = XATTRTERM;
8595                     sv_setpvs(PL_subname,"?");
8596                     have_name = FALSE;
8597                 }
8598
8599                 if (key == KEY_format) {
8600                     if (format_name) {
8601                         NEXTVAL_NEXTTOKE.opval
8602                             = newSVOP(OP_CONST,0, format_name);
8603                         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8604                         force_next(BAREWORD);
8605                     }
8606                     PREBLOCK(FORMAT);
8607                 }
8608
8609                 /* Look for a prototype */
8610                 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
8611                     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8612                     COPLINE_SET_FROM_MULTI_END;
8613                     if (!s)
8614                         Perl_croak(aTHX_ "Prototype not terminated");
8615                     (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8616                     have_proto = TRUE;
8617
8618                     s = skipspace(s);
8619                 }
8620                 else
8621                     have_proto = FALSE;
8622
8623                 if (*s == ':' && s[1] != ':')
8624                     PL_expect = attrful;
8625                 else if ((*s != '{' && *s != '(') && key != KEY_format) {
8626                     assert(key == KEY_sub || key == KEY_AUTOLOAD ||
8627                            key == KEY_DESTROY || key == KEY_BEGIN ||
8628                            key == KEY_UNITCHECK || key == KEY_CHECK ||
8629                            key == KEY_INIT || key == KEY_END ||
8630                            key == KEY_my || key == KEY_state ||
8631                            key == KEY_our);
8632                     if (!have_name)
8633                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8634                     else if (*s != ';' && *s != '}')
8635                         Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
8636                 }
8637
8638                 if (have_proto) {
8639                     NEXTVAL_NEXTTOKE.opval =
8640                         newSVOP(OP_CONST, 0, PL_lex_stuff);
8641                     PL_lex_stuff = NULL;
8642                     force_next(THING);
8643                 }
8644                 if (!have_name) {
8645                     if (PL_curstash)
8646                         sv_setpvs(PL_subname, "__ANON__");
8647                     else
8648                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8649                     TOKEN(ANONSUB);
8650                 }
8651                 force_ident_maybe_lex('&');
8652                 TOKEN(SUB);
8653             }
8654
8655         case KEY_system:
8656             LOP(OP_SYSTEM,XREF);
8657
8658         case KEY_symlink:
8659             LOP(OP_SYMLINK,XTERM);
8660
8661         case KEY_syscall:
8662             LOP(OP_SYSCALL,XTERM);
8663
8664         case KEY_sysopen:
8665             LOP(OP_SYSOPEN,XTERM);
8666
8667         case KEY_sysseek:
8668             LOP(OP_SYSSEEK,XTERM);
8669
8670         case KEY_sysread:
8671             LOP(OP_SYSREAD,XTERM);
8672
8673         case KEY_syswrite:
8674             LOP(OP_SYSWRITE,XTERM);
8675
8676         case KEY_tr:
8677         case KEY_y:
8678             s = scan_trans(s);
8679             TERM(sublex_start());
8680
8681         case KEY_tell:
8682             UNI(OP_TELL);
8683
8684         case KEY_telldir:
8685             UNI(OP_TELLDIR);
8686
8687         case KEY_tie:
8688             LOP(OP_TIE,XTERM);
8689
8690         case KEY_tied:
8691             UNI(OP_TIED);
8692
8693         case KEY_time:
8694             FUN0(OP_TIME);
8695
8696         case KEY_times:
8697             FUN0(OP_TMS);
8698
8699         case KEY_truncate:
8700             LOP(OP_TRUNCATE,XTERM);
8701
8702         case KEY_uc:
8703             UNI(OP_UC);
8704
8705         case KEY_ucfirst:
8706             UNI(OP_UCFIRST);
8707
8708         case KEY_untie:
8709             UNI(OP_UNTIE);
8710
8711         case KEY_until:
8712             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8713                 return REPORT(0);
8714             pl_yylval.ival = CopLINE(PL_curcop);
8715             OPERATOR(UNTIL);
8716
8717         case KEY_unless:
8718             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8719                 return REPORT(0);
8720             pl_yylval.ival = CopLINE(PL_curcop);
8721             OPERATOR(UNLESS);
8722
8723         case KEY_unlink:
8724             LOP(OP_UNLINK,XTERM);
8725
8726         case KEY_undef:
8727             UNIDOR(OP_UNDEF);
8728
8729         case KEY_unpack:
8730             LOP(OP_UNPACK,XTERM);
8731
8732         case KEY_utime:
8733             LOP(OP_UTIME,XTERM);
8734
8735         case KEY_umask:
8736             UNIDOR(OP_UMASK);
8737
8738         case KEY_unshift:
8739             LOP(OP_UNSHIFT,XTERM);
8740
8741         case KEY_use:
8742             s = tokenize_use(1, s);
8743             TOKEN(USE);
8744
8745         case KEY_values:
8746             UNI(OP_VALUES);
8747
8748         case KEY_vec:
8749             LOP(OP_VEC,XTERM);
8750
8751         case KEY_when:
8752             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8753                 return REPORT(0);
8754             pl_yylval.ival = CopLINE(PL_curcop);
8755             Perl_ck_warner_d(aTHX_
8756                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8757                 "when is experimental");
8758             OPERATOR(WHEN);
8759
8760         case KEY_while:
8761             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8762                 return REPORT(0);
8763             pl_yylval.ival = CopLINE(PL_curcop);
8764             OPERATOR(WHILE);
8765
8766         case KEY_warn:
8767             PL_hints |= HINT_BLOCK_SCOPE;
8768             LOP(OP_WARN,XTERM);
8769
8770         case KEY_wait:
8771             FUN0(OP_WAIT);
8772
8773         case KEY_waitpid:
8774             LOP(OP_WAITPID,XTERM);
8775
8776         case KEY_wantarray:
8777             FUN0(OP_WANTARRAY);
8778
8779         case KEY_write:
8780             /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8781              * we use the same number on EBCDIC */
8782             gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8783             UNI(OP_ENTERWRITE);
8784
8785         case KEY_x:
8786             if (PL_expect == XOPERATOR) {
8787                 if (*s == '=' && !PL_lex_allbrackets
8788                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8789                 {
8790                     return REPORT(0);
8791                 }
8792                 Mop(OP_REPEAT);
8793             }
8794             check_uni();
8795             goto just_a_word;
8796
8797         case KEY_xor:
8798             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8799                 return REPORT(0);
8800             pl_yylval.ival = OP_XOR;
8801             OPERATOR(OROP);
8802         }
8803     }}
8804 }
8805
8806 /*
8807   S_pending_ident
8808
8809   Looks up an identifier in the pad or in a package
8810
8811   is_sig indicates that this is a subroutine signature variable
8812   rather than a plain pad var.
8813
8814   Returns:
8815     PRIVATEREF if this is a lexical name.
8816     BAREWORD   if this belongs to a package.
8817
8818   Structure:
8819       if we're in a my declaration
8820           croak if they tried to say my($foo::bar)
8821           build the ops for a my() declaration
8822       if it's an access to a my() variable
8823           build ops for access to a my() variable
8824       if in a dq string, and they've said @foo and we can't find @foo
8825           warn
8826       build ops for a bareword
8827 */
8828
8829 static int
8830 S_pending_ident(pTHX)
8831 {
8832     PADOFFSET tmp = 0;
8833     const char pit = (char)pl_yylval.ival;
8834     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8835     /* All routes through this function want to know if there is a colon.  */
8836     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8837
8838     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8839           "### Pending identifier '%s'\n", PL_tokenbuf); });
8840
8841     /* if we're in a my(), we can't allow dynamics here.
8842        $foo'bar has already been turned into $foo::bar, so
8843        just check for colons.
8844
8845        if it's a legal name, the OP is a PADANY.
8846     */
8847     if (PL_in_my) {
8848         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8849             if (has_colon)
8850                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8851                                   "%se %s in \"our\"",
8852                                   *PL_tokenbuf=='&' ?"subroutin":"variabl",
8853                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8854             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8855         }
8856         else {
8857             OP *o;
8858             if (has_colon) {
8859                 /* "my" variable %s can't be in a package */
8860                 /* PL_no_myglob is constant */
8861                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
8862                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8863                             PL_in_my == KEY_my ? "my" : "state",
8864                             *PL_tokenbuf == '&' ? "subroutin" : "variabl",
8865                             PL_tokenbuf),
8866                             UTF ? SVf_UTF8 : 0);
8867                 GCC_DIAG_RESTORE;
8868             }
8869
8870             if (PL_in_my == KEY_sigvar) {
8871                 /* A signature 'padop' needs in addition, an op_first to
8872                  * point to a child sigdefelem, and an extra field to hold
8873                  * the signature index. We can achieve both by using an
8874                  * UNOP_AUX and (ab)using the op_aux field to hold the
8875                  * index. If we ever need more fields, use a real malloced
8876                  * aux strut instead.
8877                  */
8878                 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
8879                                     INT2PTR(UNOP_AUX_item *,
8880                                         (PL_parser->sig_elems)));
8881                 o->op_private |= (  PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
8882                                   : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
8883                                   :                         OPpARGELEM_HV);
8884             }
8885             else
8886                 o = newOP(OP_PADANY, 0);
8887             o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8888                                                         UTF ? SVf_UTF8 : 0);
8889             if (PL_in_my == KEY_sigvar)
8890                 PL_in_my = 0;
8891
8892             pl_yylval.opval = o;
8893             return PRIVATEREF;
8894         }
8895     }
8896
8897     /*
8898        build the ops for accesses to a my() variable.
8899     */
8900
8901     if (!has_colon) {
8902         if (!PL_in_my)
8903             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8904                                  0);
8905         if (tmp != NOT_IN_PAD) {
8906             /* might be an "our" variable" */
8907             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8908                 /* build ops for a bareword */
8909                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
8910                 HEK * const stashname = HvNAME_HEK(stash);
8911                 SV *  const sym = newSVhek(stashname);
8912                 sv_catpvs(sym, "::");
8913                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8914                 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
8915                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8916                 if (pit != '&')
8917                   gv_fetchsv(sym,
8918                     GV_ADDMULTI,
8919                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8920                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8921                      : SVt_PVHV));
8922                 return BAREWORD;
8923             }
8924
8925             pl_yylval.opval = newOP(OP_PADANY, 0);
8926             pl_yylval.opval->op_targ = tmp;
8927             return PRIVATEREF;
8928         }
8929     }
8930
8931     /*
8932        Whine if they've said @foo or @foo{key} in a doublequoted string,
8933        and @foo (or %foo) isn't a variable we can find in the symbol
8934        table.
8935     */
8936     if (ckWARN(WARN_AMBIGUOUS)
8937         && pit == '@'
8938         && PL_lex_state != LEX_NORMAL
8939         && !PL_lex_brackets)
8940     {
8941         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8942                                          ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
8943                                          SVt_PVAV);
8944         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8945            )
8946         {
8947             /* Downgraded from fatal to warning 20000522 mjd */
8948             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8949                         "Possible unintended interpolation of %" UTF8f
8950                         " in string",
8951                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
8952         }
8953     }
8954
8955     /* build ops for a bareword */
8956     pl_yylval.opval = newSVOP(OP_CONST, 0,
8957                                    newSVpvn_flags(PL_tokenbuf + 1,
8958                                                       tokenbuf_len - 1,
8959                                                       UTF ? SVf_UTF8 : 0 ));
8960     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8961     if (pit != '&')
8962         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8963                      (PL_in_eval ? GV_ADDMULTI : GV_ADD)
8964                      | ( UTF ? SVf_UTF8 : 0 ),
8965                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8966                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8967                       : SVt_PVHV));
8968     return BAREWORD;
8969 }
8970
8971 STATIC void
8972 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8973 {
8974     PERL_ARGS_ASSERT_CHECKCOMMA;
8975
8976     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
8977         if (ckWARN(WARN_SYNTAX)) {
8978             int level = 1;
8979             const char *w;
8980             for (w = s+2; *w && level; w++) {
8981                 if (*w == '(')
8982                     ++level;
8983                 else if (*w == ')')
8984                     --level;
8985             }
8986             while (isSPACE(*w))
8987                 ++w;
8988             /* the list of chars below is for end of statements or
8989              * block / parens, boolean operators (&&, ||, //) and branch
8990              * constructs (or, and, if, until, unless, while, err, for).
8991              * Not a very solid hack... */
8992             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8993                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8994                             "%s (...) interpreted as function",name);
8995         }
8996     }
8997     while (s < PL_bufend && isSPACE(*s))
8998         s++;
8999     if (*s == '(')
9000         s++;
9001     while (s < PL_bufend && isSPACE(*s))
9002         s++;
9003     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9004         const char * const w = s;
9005         s += UTF ? UTF8SKIP(s) : 1;
9006         while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9007             s += UTF ? UTF8SKIP(s) : 1;
9008         while (s < PL_bufend && isSPACE(*s))
9009             s++;
9010         if (*s == ',') {
9011             GV* gv;
9012             if (keyword(w, s - w, 0))
9013                 return;
9014
9015             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9016             if (gv && GvCVu(gv))
9017                 return;
9018             if (s - w <= 254) {
9019                 PADOFFSET off;
9020                 char tmpbuf[256];
9021                 Copy(w, tmpbuf+1, s - w, char);
9022                 *tmpbuf = '&';
9023                 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9024                 if (off != NOT_IN_PAD) return;
9025             }
9026             Perl_croak(aTHX_ "No comma allowed after %s", what);
9027         }
9028     }
9029 }
9030
9031 /* S_new_constant(): do any overload::constant lookup.
9032
9033    Either returns sv, or mortalizes/frees sv and returns a new SV*.
9034    Best used as sv=new_constant(..., sv, ...).
9035    If s, pv are NULL, calls subroutine with one argument,
9036    and <type> is used with error messages only.
9037    <type> is assumed to be well formed UTF-8 */
9038
9039 STATIC SV *
9040 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9041                SV *sv, SV *pv, const char *type, STRLEN typelen)
9042 {
9043     dSP;
9044     HV * table = GvHV(PL_hintgv);                /* ^H */
9045     SV *res;
9046     SV *errsv = NULL;
9047     SV **cvp;
9048     SV *cv, *typesv;
9049     const char *why1 = "", *why2 = "", *why3 = "";
9050
9051     PERL_ARGS_ASSERT_NEW_CONSTANT;
9052     /* We assume that this is true: */
9053     if (*key == 'c') { assert (strEQ(key, "charnames")); }
9054     assert(type || s);
9055
9056     /* charnames doesn't work well if there have been errors found */
9057     if (PL_error_count > 0 && *key == 'c')
9058     {
9059         SvREFCNT_dec_NN(sv);
9060         return &PL_sv_undef;
9061     }
9062
9063     sv_2mortal(sv);                     /* Parent created it permanently */
9064     if (!table
9065         || ! (PL_hints & HINT_LOCALIZE_HH)
9066         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9067         || ! SvOK(*cvp))
9068     {
9069         char *msg;
9070
9071         /* Here haven't found what we're looking for.  If it is charnames,
9072          * perhaps it needs to be loaded.  Try doing that before giving up */
9073         if (*key == 'c') {
9074             Perl_load_module(aTHX_
9075                             0,
9076                             newSVpvs("_charnames"),
9077                              /* version parameter; no need to specify it, as if
9078                               * we get too early a version, will fail anyway,
9079                               * not being able to find '_charnames' */
9080                             NULL,
9081                             newSVpvs(":full"),
9082                             newSVpvs(":short"),
9083                             NULL);
9084             assert(sp == PL_stack_sp);
9085             table = GvHV(PL_hintgv);
9086             if (table
9087                 && (PL_hints & HINT_LOCALIZE_HH)
9088                 && (cvp = hv_fetch(table, key, keylen, FALSE))
9089                 && SvOK(*cvp))
9090             {
9091                 goto now_ok;
9092             }
9093         }
9094         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9095             msg = Perl_form(aTHX_
9096                                "Constant(%.*s) unknown",
9097                                 (int)(type ? typelen : len),
9098                                 (type ? type: s));
9099         }
9100         else {
9101             why1 = "$^H{";
9102             why2 = key;
9103             why3 = "} is not defined";
9104         report:
9105             if (*key == 'c') {
9106                 msg = Perl_form(aTHX_
9107                             /* The +3 is for '\N{'; -4 for that, plus '}' */
9108                             "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9109                       );
9110             }
9111             else {
9112                 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9113                                     (int)(type ? typelen : len),
9114                                     (type ? type: s), why1, why2, why3);
9115             }
9116         }
9117         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9118         return SvREFCNT_inc_simple_NN(sv);
9119     }
9120   now_ok:
9121     cv = *cvp;
9122     if (!pv && s)
9123         pv = newSVpvn_flags(s, len, SVs_TEMP);
9124     if (type && pv)
9125         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9126     else
9127         typesv = &PL_sv_undef;
9128
9129     PUSHSTACKi(PERLSI_OVERLOAD);
9130     ENTER ;
9131     SAVETMPS;
9132
9133     PUSHMARK(SP) ;
9134     EXTEND(sp, 3);
9135     if (pv)
9136         PUSHs(pv);
9137     PUSHs(sv);
9138     if (pv)
9139         PUSHs(typesv);
9140     PUTBACK;
9141     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9142
9143     SPAGAIN ;
9144
9145     /* Check the eval first */
9146     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9147         STRLEN errlen;
9148         const char * errstr;
9149         sv_catpvs(errsv, "Propagated");
9150         errstr = SvPV_const(errsv, errlen);
9151         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9152         (void)POPs;
9153         res = SvREFCNT_inc_simple_NN(sv);
9154     }
9155     else {
9156         res = POPs;
9157         SvREFCNT_inc_simple_void_NN(res);
9158     }
9159
9160     PUTBACK ;
9161     FREETMPS ;
9162     LEAVE ;
9163     POPSTACK;
9164
9165     if (!SvOK(res)) {
9166         why1 = "Call to &{$^H{";
9167         why2 = key;
9168         why3 = "}} did not return a defined value";
9169         sv = res;
9170         (void)sv_2mortal(sv);
9171         goto report;
9172     }
9173
9174     return res;
9175 }
9176
9177 PERL_STATIC_INLINE void
9178 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9179                     bool is_utf8, bool check_dollar)
9180 {
9181     PERL_ARGS_ASSERT_PARSE_IDENT;
9182
9183     while (*s < PL_bufend) {
9184         if (*d >= e)
9185             Perl_croak(aTHX_ "%s", ident_too_long);
9186         if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9187              /* The UTF-8 case must come first, otherwise things
9188              * like c\N{COMBINING TILDE} would start failing, as the
9189              * isWORDCHAR_A case below would gobble the 'c' up.
9190              */
9191
9192             char *t = *s + UTF8SKIP(*s);
9193             while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
9194                 t += UTF8SKIP(t);
9195             }
9196             if (*d + (t - *s) > e)
9197                 Perl_croak(aTHX_ "%s", ident_too_long);
9198             Copy(*s, *d, t - *s, char);
9199             *d += t - *s;
9200             *s = t;
9201         }
9202         else if ( isWORDCHAR_A(**s) ) {
9203             do {
9204                 *(*d)++ = *(*s)++;
9205             } while (isWORDCHAR_A(**s) && *d < e);
9206         }
9207         else if (   allow_package
9208                  && **s == '\''
9209                  && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
9210         {
9211             *(*d)++ = ':';
9212             *(*d)++ = ':';
9213             (*s)++;
9214         }
9215         else if (allow_package && **s == ':' && (*s)[1] == ':'
9216            /* Disallow things like Foo::$bar. For the curious, this is
9217             * the code path that triggers the "Bad name after" warning
9218             * when looking for barewords.
9219             */
9220            && !(check_dollar && (*s)[2] == '$')) {
9221             *(*d)++ = *(*s)++;
9222             *(*d)++ = *(*s)++;
9223         }
9224         else
9225             break;
9226     }
9227     return;
9228 }
9229
9230 /* Returns a NUL terminated string, with the length of the string written to
9231    *slp
9232    */
9233 STATIC char *
9234 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9235 {
9236     char *d = dest;
9237     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
9238     bool is_utf8 = cBOOL(UTF);
9239
9240     PERL_ARGS_ASSERT_SCAN_WORD;
9241
9242     parse_ident(&s, &d, e, allow_package, is_utf8, TRUE);
9243     *d = '\0';
9244     *slp = d - dest;
9245     return s;
9246 }
9247
9248 /* Is the byte 'd' a legal single character identifier name?  'u' is true
9249  * iff Unicode semantics are to be used.  The legal ones are any of:
9250  *  a) all ASCII characters except:
9251  *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
9252  *          2) '{'
9253  *     The final case currently doesn't get this far in the program, so we
9254  *     don't test for it.  If that were to change, it would be ok to allow it.
9255  *  b) When not under Unicode rules, any upper Latin1 character
9256  *  c) Otherwise, when unicode rules are used, all XIDS characters.
9257  *
9258  *      Because all ASCII characters have the same representation whether
9259  *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
9260  *      '{' without knowing if is UTF-8 or not. */
9261 #define VALID_LEN_ONE_IDENT(s, e, is_utf8)                                  \
9262     (isGRAPH_A(*(s)) || ((is_utf8)                                          \
9263                          ? isIDFIRST_utf8_safe(s, e)                        \
9264                          : (isGRAPH_L1(*s)                                  \
9265                             && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
9266
9267 STATIC char *
9268 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9269 {
9270     I32 herelines = PL_parser->herelines;
9271     SSize_t bracket = -1;
9272     char funny = *s++;
9273     char *d = dest;
9274     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
9275     bool is_utf8 = cBOOL(UTF);
9276     I32 orig_copline = 0, tmp_copline = 0;
9277
9278     PERL_ARGS_ASSERT_SCAN_IDENT;
9279
9280     if (isSPACE(*s) || !*s)
9281         s = skipspace(s);
9282     if (isDIGIT(*s)) {
9283         while (isDIGIT(*s)) {
9284             if (d >= e)
9285                 Perl_croak(aTHX_ "%s", ident_too_long);
9286             *d++ = *s++;
9287         }
9288     }
9289     else {  /* See if it is a "normal" identifier */
9290         parse_ident(&s, &d, e, 1, is_utf8, FALSE);
9291     }
9292     *d = '\0';
9293     d = dest;
9294     if (*d) {
9295         /* Either a digit variable, or parse_ident() found an identifier
9296            (anything valid as a bareword), so job done and return.  */
9297         if (PL_lex_state != LEX_NORMAL)
9298             PL_lex_state = LEX_INTERPENDMAYBE;
9299         return s;
9300     }
9301
9302     /* Here, it is not a run-of-the-mill identifier name */
9303
9304     if (*s == '$' && s[1]
9305         && (   isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
9306             || isDIGIT_A((U8)s[1])
9307             || s[1] == '$'
9308             || s[1] == '{'
9309             || strEQs(s+1,"::")) )
9310     {
9311         /* Dereferencing a value in a scalar variable.
9312            The alternatives are different syntaxes for a scalar variable.
9313            Using ' as a leading package separator isn't allowed. :: is.   */
9314         return s;
9315     }
9316     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
9317     if (*s == '{') {
9318         bracket = s - SvPVX(PL_linestr);
9319         s++;
9320         orig_copline = CopLINE(PL_curcop);
9321         if (s < PL_bufend && isSPACE(*s)) {
9322             s = skipspace(s);
9323         }
9324     }
9325     if ((s <= PL_bufend - (is_utf8)
9326                           ? UTF8SKIP(s)
9327                           : 1)
9328         && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
9329     {
9330         if (is_utf8) {
9331             const STRLEN skip = UTF8SKIP(s);
9332             STRLEN i;
9333             d[skip] = '\0';
9334             for ( i = 0; i < skip; i++ )
9335                 d[i] = *s++;
9336         }
9337         else {
9338             *d = *s++;
9339             d[1] = '\0';
9340         }
9341     }
9342     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9343     if (*d == '^' && *s && isCONTROLVAR(*s)) {
9344         *d = toCTRL(*s);
9345         s++;
9346     }
9347     /* Warn about ambiguous code after unary operators if {...} notation isn't
9348        used.  There's no difference in ambiguity; it's merely a heuristic
9349        about when not to warn.  */
9350     else if (ck_uni && bracket == -1)
9351         check_uni();
9352     if (bracket != -1) {
9353         bool skip;
9354         char *s2;
9355         /* If we were processing {...} notation then...  */
9356         if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
9357             || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9358                  && isWORDCHAR(*s))
9359         ) {
9360             /* note we have to check for a normal identifier first,
9361              * as it handles utf8 symbols, and only after that has
9362              * been ruled out can we look at the caret words */
9363             if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
9364                 /* if it starts as a valid identifier, assume that it is one.
9365                    (the later check for } being at the expected point will trap
9366                    cases where this doesn't pan out.)  */
9367                 d += is_utf8 ? UTF8SKIP(d) : 1;
9368                 parse_ident(&s, &d, e, 1, is_utf8, TRUE);
9369                 *d = '\0';
9370             }
9371             else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
9372                 d++;
9373                 while (isWORDCHAR(*s) && d < e) {
9374                     *d++ = *s++;
9375                 }
9376                 if (d >= e)
9377                     Perl_croak(aTHX_ "%s", ident_too_long);
9378                 *d = '\0';
9379             }
9380             tmp_copline = CopLINE(PL_curcop);
9381             if (s < PL_bufend && isSPACE(*s)) {
9382                 s = skipspace(s);
9383             }
9384             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9385                 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation.  */
9386                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9387                     const char * const brack =
9388                         (const char *)
9389                         ((*s == '[') ? "[...]" : "{...}");
9390                     orig_copline = CopLINE(PL_curcop);
9391                     CopLINE_set(PL_curcop, tmp_copline);
9392    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9393                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9394                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9395                         funny, dest, brack, funny, dest, brack);
9396                     CopLINE_set(PL_curcop, orig_copline);
9397                 }
9398                 bracket++;
9399                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9400                 PL_lex_allbrackets++;
9401                 return s;
9402             }
9403         }
9404
9405         if ( !tmp_copline )
9406             tmp_copline = CopLINE(PL_curcop);
9407         if ((skip = s < PL_bufend && isSPACE(*s)))
9408             /* Avoid incrementing line numbers or resetting PL_linestart,
9409                in case we have to back up.  */
9410             s2 = peekspace(s);
9411         else
9412             s2 = s;
9413
9414         /* Expect to find a closing } after consuming any trailing whitespace.
9415          */
9416         if (*s2 == '}') {
9417             /* Now increment line numbers if applicable.  */
9418             if (skip)
9419                 s = skipspace(s);
9420             s++;
9421             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9422                 PL_lex_state = LEX_INTERPEND;
9423                 PL_expect = XREF;
9424             }
9425             if (PL_lex_state == LEX_NORMAL) {
9426                 if (ckWARN(WARN_AMBIGUOUS)
9427                     && (keyword(dest, d - dest, 0)
9428                         || get_cvn_flags(dest, d - dest, is_utf8
9429                            ? SVf_UTF8
9430                            : 0)))
9431                 {
9432                     SV *tmp = newSVpvn_flags( dest, d - dest,
9433                                         SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9434                     if (funny == '#')
9435                         funny = '@';
9436                     orig_copline = CopLINE(PL_curcop);
9437                     CopLINE_set(PL_curcop, tmp_copline);
9438                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9439                         "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
9440                         funny, SVfARG(tmp), funny, SVfARG(tmp));
9441                     CopLINE_set(PL_curcop, orig_copline);
9442                 }
9443             }
9444         }
9445         else {
9446             /* Didn't find the closing } at the point we expected, so restore
9447                state such that the next thing to process is the opening { and */
9448             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9449             CopLINE_set(PL_curcop, orig_copline);
9450             PL_parser->herelines = herelines;
9451             *dest = '\0';
9452         }
9453     }
9454     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9455         PL_lex_state = LEX_INTERPEND;
9456     return s;
9457 }
9458
9459 static bool
9460 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
9461
9462     /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
9463      * found in the parse starting at 's', based on the subset that are valid
9464      * in this context input to this routine in 'valid_flags'. Advances s.
9465      * Returns TRUE if the input should be treated as a valid flag, so the next
9466      * char may be as well; otherwise FALSE. 'charset' should point to a NUL
9467      * upon first call on the current regex.  This routine will set it to any
9468      * charset modifier found.  The caller shouldn't change it.  This way,
9469      * another charset modifier encountered in the parse can be detected as an
9470      * error, as we have decided to allow only one */
9471
9472     const char c = **s;
9473     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9474
9475     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9476         if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
9477             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9478                        UTF ? SVf_UTF8 : 0);
9479             (*s) += charlen;
9480             /* Pretend that it worked, so will continue processing before
9481              * dieing */
9482             return TRUE;
9483         }
9484         return FALSE;
9485     }
9486
9487     switch (c) {
9488
9489         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
9490         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9491         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9492         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9493         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9494         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9495         case LOCALE_PAT_MOD:
9496             if (*charset) {
9497                 goto multiple_charsets;
9498             }
9499             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9500             *charset = c;
9501             break;
9502         case UNICODE_PAT_MOD:
9503             if (*charset) {
9504                 goto multiple_charsets;
9505             }
9506             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9507             *charset = c;
9508             break;
9509         case ASCII_RESTRICT_PAT_MOD:
9510             if (! *charset) {
9511                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9512             }
9513             else {
9514
9515                 /* Error if previous modifier wasn't an 'a', but if it was, see
9516                  * if, and accept, a second occurrence (only) */
9517                 if (*charset != 'a'
9518                     || get_regex_charset(*pmfl)
9519                         != REGEX_ASCII_RESTRICTED_CHARSET)
9520                 {
9521                         goto multiple_charsets;
9522                 }
9523                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9524             }
9525             *charset = c;
9526             break;
9527         case DEPENDS_PAT_MOD:
9528             if (*charset) {
9529                 goto multiple_charsets;
9530             }
9531             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9532             *charset = c;
9533             break;
9534     }
9535
9536     (*s)++;
9537     return TRUE;
9538
9539     multiple_charsets:
9540         if (*charset != c) {
9541             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9542         }
9543         else if (c == 'a') {
9544   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9545             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9546         }
9547         else {
9548             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9549         }
9550
9551         /* Pretend that it worked, so will continue processing before dieing */
9552         (*s)++;
9553         return TRUE;
9554 }
9555
9556 STATIC char *
9557 S_scan_pat(pTHX_ char *start, I32 type)
9558 {
9559     PMOP *pm;
9560     char *s;
9561     const char * const valid_flags =
9562         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9563     char charset = '\0';    /* character set modifier */
9564     unsigned int x_mod_count = 0;
9565
9566     PERL_ARGS_ASSERT_SCAN_PAT;
9567
9568     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
9569     if (!s)
9570         Perl_croak(aTHX_ "Search pattern not terminated");
9571
9572     pm = (PMOP*)newPMOP(type, 0);
9573     if (PL_multi_open == '?') {
9574         /* This is the only point in the code that sets PMf_ONCE:  */
9575         pm->op_pmflags |= PMf_ONCE;
9576
9577         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9578            allows us to restrict the list needed by reset to just the ??
9579            matches.  */
9580         assert(type != OP_TRANS);
9581         if (PL_curstash) {
9582             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9583             U32 elements;
9584             if (!mg) {
9585                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9586                                  0);
9587             }
9588             elements = mg->mg_len / sizeof(PMOP**);
9589             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9590             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9591             mg->mg_len = elements * sizeof(PMOP**);
9592             PmopSTASH_set(pm,PL_curstash);
9593         }
9594     }
9595
9596     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9597      * anon CV. False positives like qr/[(?{]/ are harmless */
9598
9599     if (type == OP_QR) {
9600         STRLEN len;
9601         char *e, *p = SvPV(PL_lex_stuff, len);
9602         e = p + len;
9603         for (; p < e; p++) {
9604             if (p[0] == '(' && p[1] == '?'
9605                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9606             {
9607                 pm->op_pmflags |= PMf_HAS_CV;
9608                 break;
9609             }
9610         }
9611         pm->op_pmflags |= PMf_IS_QR;
9612     }
9613
9614     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
9615                                 &s, &charset, &x_mod_count))
9616     {};
9617     /* issue a warning if /c is specified,but /g is not */
9618     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9619     {
9620         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9621                        "Use of /c modifier is meaningless without /g" );
9622     }
9623
9624     PL_lex_op = (OP*)pm;
9625     pl_yylval.ival = OP_MATCH;
9626     return s;
9627 }
9628
9629 STATIC char *
9630 S_scan_subst(pTHX_ char *start)
9631 {
9632     char *s;
9633     PMOP *pm;
9634     I32 first_start;
9635     line_t first_line;
9636     line_t linediff = 0;
9637     I32 es = 0;
9638     char charset = '\0';    /* character set modifier */
9639     unsigned int x_mod_count = 0;
9640     char *t;
9641
9642     PERL_ARGS_ASSERT_SCAN_SUBST;
9643
9644     pl_yylval.ival = OP_NULL;
9645
9646     s = scan_str(start, TRUE, FALSE, FALSE, &t);
9647
9648     if (!s)
9649         Perl_croak(aTHX_ "Substitution pattern not terminated");
9650
9651     s = t;
9652
9653     first_start = PL_multi_start;
9654     first_line = CopLINE(PL_curcop);
9655     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9656     if (!s) {
9657         SvREFCNT_dec_NN(PL_lex_stuff);
9658         PL_lex_stuff = NULL;
9659         Perl_croak(aTHX_ "Substitution replacement not terminated");
9660     }
9661     PL_multi_start = first_start;       /* so whole substitution is taken together */
9662
9663     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9664
9665
9666     while (*s) {
9667         if (*s == EXEC_PAT_MOD) {
9668             s++;
9669             es++;
9670         }
9671         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
9672                                   &s, &charset, &x_mod_count))
9673         {
9674             break;
9675         }
9676     }
9677
9678     if ((pm->op_pmflags & PMf_CONTINUE)) {
9679         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9680     }
9681
9682     if (es) {
9683         SV * const repl = newSVpvs("");
9684
9685         PL_multi_end = 0;
9686         pm->op_pmflags |= PMf_EVAL;
9687         while (es-- > 0) {
9688             if (es)
9689                 sv_catpvs(repl, "eval ");
9690             else
9691                 sv_catpvs(repl, "do ");
9692         }
9693         sv_catpvs(repl, "{");
9694         sv_catsv(repl, PL_parser->lex_sub_repl);
9695         sv_catpvs(repl, "}");
9696         SvREFCNT_dec(PL_parser->lex_sub_repl);
9697         PL_parser->lex_sub_repl = repl;
9698         es = 1;
9699     }
9700
9701
9702     linediff = CopLINE(PL_curcop) - first_line;
9703     if (linediff)
9704         CopLINE_set(PL_curcop, first_line);
9705
9706     if (linediff || es) {
9707         /* the IVX field indicates that the replacement string is a s///e;
9708          * the NVX field indicates how many src code lines the replacement
9709          * spreads over */
9710         sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
9711         ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = 0;
9712         ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
9713                                                                     cBOOL(es);
9714     }
9715
9716     PL_lex_op = (OP*)pm;
9717     pl_yylval.ival = OP_SUBST;
9718     return s;
9719 }
9720
9721 STATIC char *
9722 S_scan_trans(pTHX_ char *start)
9723 {
9724     char* s;
9725     OP *o;
9726     U8 squash;
9727     U8 del;
9728     U8 complement;
9729     bool nondestruct = 0;
9730     char *t;
9731
9732     PERL_ARGS_ASSERT_SCAN_TRANS;
9733
9734     pl_yylval.ival = OP_NULL;
9735
9736     s = scan_str(start,FALSE,FALSE,FALSE,&t);
9737     if (!s)
9738         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9739
9740     s = t;
9741
9742     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9743     if (!s) {
9744         SvREFCNT_dec_NN(PL_lex_stuff);
9745         PL_lex_stuff = NULL;
9746         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9747     }
9748
9749     complement = del = squash = 0;
9750     while (1) {
9751         switch (*s) {
9752         case 'c':
9753             complement = OPpTRANS_COMPLEMENT;
9754             break;
9755         case 'd':
9756             del = OPpTRANS_DELETE;
9757             break;
9758         case 's':
9759             squash = OPpTRANS_SQUASH;
9760             break;
9761         case 'r':
9762             nondestruct = 1;
9763             break;
9764         default:
9765             goto no_more;
9766         }
9767         s++;
9768     }
9769   no_more:
9770
9771     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9772     o->op_private &= ~OPpTRANS_ALL;
9773     o->op_private |= del|squash|complement|
9774       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9775       (DO_UTF8(PL_parser->lex_sub_repl) ? OPpTRANS_TO_UTF   : 0);
9776
9777     PL_lex_op = o;
9778     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9779
9780
9781     return s;
9782 }
9783
9784 /* scan_heredoc
9785    Takes a pointer to the first < in <<FOO.
9786    Returns a pointer to the byte following <<FOO.
9787
9788    This function scans a heredoc, which involves different methods
9789    depending on whether we are in a string eval, quoted construct, etc.
9790    This is because PL_linestr could containing a single line of input, or
9791    a whole string being evalled, or the contents of the current quote-
9792    like operator.
9793
9794    The two basic methods are:
9795     - Steal lines from the input stream
9796     - Scan the heredoc in PL_linestr and remove it therefrom
9797
9798    In a file scope or filtered eval, the first method is used; in a
9799    string eval, the second.
9800
9801    In a quote-like operator, we have to choose between the two,
9802    depending on where we can find a newline.  We peek into outer lex-
9803    ing scopes until we find one with a newline in it.  If we reach the
9804    outermost lexing scope and it is a file, we use the stream method.
9805    Otherwise it is treated as an eval.
9806 */
9807
9808 STATIC char *
9809 S_scan_heredoc(pTHX_ char *s)
9810 {
9811     I32 op_type = OP_SCALAR;
9812     I32 len;
9813     SV *tmpstr;
9814     char term;
9815     char *d;
9816     char *e;
9817     char *peek;
9818     char *indent = 0;
9819     I32 indent_len = 0;
9820     bool indented = FALSE;
9821     const bool infile = PL_rsfp || PL_parser->filtered;
9822     const line_t origline = CopLINE(PL_curcop);
9823     LEXSHARED *shared = PL_parser->lex_shared;
9824
9825     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9826
9827     s += 2;
9828     d = PL_tokenbuf + 1;
9829     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9830     *PL_tokenbuf = '\n';
9831     peek = s;
9832     if (*peek == '~') {
9833         indented = TRUE;
9834         peek++; s++;
9835     }
9836     while (SPACE_OR_TAB(*peek))
9837         peek++;
9838     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9839         s = peek;
9840         term = *s++;
9841         s = delimcpy(d, e, s, PL_bufend, term, &len);
9842         if (s == PL_bufend)
9843             Perl_croak(aTHX_ "Unterminated delimiter for here document");
9844         d += len;
9845         s++;
9846     }
9847     else {
9848         if (*s == '\\')
9849             /* <<\FOO is equivalent to <<'FOO' */
9850             s++, term = '\'';
9851         else
9852             term = '"';
9853         if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9854             Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
9855         peek = s;
9856         while (
9857                isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF))
9858         {
9859             peek += UTF ? UTF8SKIP(peek) : 1;
9860         }
9861         len = (peek - s >= e - d) ? (e - d) : (peek - s);
9862         Copy(s, d, len, char);
9863         s += len;
9864         d += len;
9865     }
9866     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9867         Perl_croak(aTHX_ "Delimiter for here document is too long");
9868     *d++ = '\n';
9869     *d = '\0';
9870     len = d - PL_tokenbuf;
9871
9872 #ifndef PERL_STRICT_CR
9873     d = strchr(s, '\r');
9874     if (d) {
9875         char * const olds = s;
9876         s = d;
9877         while (s < PL_bufend) {
9878             if (*s == '\r') {
9879                 *d++ = '\n';
9880                 if (*++s == '\n')
9881                     s++;
9882             }
9883             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
9884                 *d++ = *s++;
9885                 s++;
9886             }
9887             else
9888                 *d++ = *s++;
9889         }
9890         *d = '\0';
9891         PL_bufend = d;
9892         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9893         s = olds;
9894     }
9895 #endif
9896
9897     tmpstr = newSV_type(SVt_PVIV);
9898     SvGROW(tmpstr, 80);
9899     if (term == '\'') {
9900         op_type = OP_CONST;
9901         SvIV_set(tmpstr, -1);
9902     }
9903     else if (term == '`') {
9904         op_type = OP_BACKTICK;
9905         SvIV_set(tmpstr, '\\');
9906     }
9907
9908     PL_multi_start = origline + 1 + PL_parser->herelines;
9909     PL_multi_open = PL_multi_close = '<';
9910     /* inside a string eval or quote-like operator */
9911     if (!infile || PL_lex_inwhat) {
9912         SV *linestr;
9913         char *bufend;
9914         char * const olds = s;
9915         PERL_CONTEXT * const cx = CX_CUR();
9916         /* These two fields are not set until an inner lexing scope is
9917            entered.  But we need them set here. */
9918         shared->ls_bufptr  = s;
9919         shared->ls_linestr = PL_linestr;
9920         if (PL_lex_inwhat)
9921           /* Look for a newline.  If the current buffer does not have one,
9922              peek into the line buffer of the parent lexing scope, going
9923              up as many levels as necessary to find one with a newline
9924              after bufptr.
9925            */
9926           while (!(s = (char *)memchr(
9927                     (void *)shared->ls_bufptr, '\n',
9928                     SvEND(shared->ls_linestr)-shared->ls_bufptr
9929                 ))) {
9930             shared = shared->ls_prev;
9931             /* shared is only null if we have gone beyond the outermost
9932                lexing scope.  In a file, we will have broken out of the
9933                loop in the previous iteration.  In an eval, the string buf-
9934                fer ends with "\n;", so the while condition above will have
9935                evaluated to false.  So shared can never be null.  Or so you
9936                might think.  Odd syntax errors like s;@{<<; can gobble up
9937                the implicit semicolon at the end of a flie, causing the
9938                file handle to be closed even when we are not in a string
9939                eval.  So shared may be null in that case.
9940                (Closing '}' here to balance the earlier open brace for
9941                editors that look for matched pairs.) */
9942             if (UNLIKELY(!shared))
9943                 goto interminable;
9944             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9945                most lexing scope.  In a file, shared->ls_linestr at that
9946                level is just one line, so there is no body to steal. */
9947             if (infile && !shared->ls_prev) {
9948                 s = olds;
9949                 goto streaming;
9950             }
9951           }
9952         else {  /* eval or we've already hit EOF */
9953             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9954             if (!s)
9955                 goto interminable;
9956         }
9957         linestr = shared->ls_linestr;
9958         bufend = SvEND(linestr);
9959         d = s;
9960         if (indented) {
9961             char *myolds = s;
9962
9963             while (s < bufend - len + 1) {
9964                 if (*s++ == '\n')
9965                     ++PL_parser->herelines;
9966
9967                 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
9968                     char *backup = s;
9969                     indent_len = 0;
9970
9971                     /* Only valid if it's preceded by whitespace only */
9972                     while (backup != myolds && --backup >= myolds) {
9973                         if (! SPACE_OR_TAB(*backup)) {
9974                             break;
9975                         }
9976
9977                         indent_len++;
9978                     }
9979
9980                     /* No whitespace or all! */
9981                     if (backup == s || *backup == '\n') {
9982                         Newxz(indent, indent_len + 1, char);
9983                         memcpy(indent, backup + 1, indent_len);
9984                         s--; /* before our delimiter */
9985                         PL_parser->herelines--; /* this line doesn't count */
9986                         break;
9987                     }
9988                 }
9989             }
9990         } else {
9991             while (s < bufend - len + 1
9992                    && memNE(s,PL_tokenbuf,len) )
9993             {
9994                 if (*s++ == '\n')
9995                     ++PL_parser->herelines;
9996             }
9997         }
9998
9999         if (s >= bufend - len + 1) {
10000             goto interminable;
10001         }
10002         sv_setpvn(tmpstr,d+1,s-d);
10003         s += len - 1;
10004         /* the preceding stmt passes a newline */
10005         PL_parser->herelines++;
10006
10007         /* s now points to the newline after the heredoc terminator.
10008            d points to the newline before the body of the heredoc.
10009          */
10010
10011         /* We are going to modify linestr in place here, so set
10012            aside copies of the string if necessary for re-evals or
10013            (caller $n)[6]. */
10014         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10015            check shared->re_eval_str. */
10016         if (shared->re_eval_start || shared->re_eval_str) {
10017             /* Set aside the rest of the regexp */
10018             if (!shared->re_eval_str)
10019                 shared->re_eval_str =
10020                        newSVpvn(shared->re_eval_start,
10021                                 bufend - shared->re_eval_start);
10022             shared->re_eval_start -= s-d;
10023         }
10024         if (cxstack_ix >= 0
10025             && CxTYPE(cx) == CXt_EVAL
10026             && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10027             && cx->blk_eval.cur_text == linestr)
10028         {
10029             cx->blk_eval.cur_text = newSVsv(linestr);
10030             cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10031         }
10032         /* Copy everything from s onwards back to d. */
10033         Move(s,d,bufend-s + 1,char);
10034         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10035         /* Setting PL_bufend only applies when we have not dug deeper
10036            into other scopes, because sublex_done sets PL_bufend to
10037            SvEND(PL_linestr). */
10038         if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10039         s = olds;
10040     }
10041     else
10042     {
10043       SV *linestr_save;
10044       char *oldbufptr_save;
10045       char *oldoldbufptr_save;
10046      streaming:
10047       SvPVCLEAR(tmpstr);   /* avoid "uninitialized" warning */
10048       term = PL_tokenbuf[1];
10049       len--;
10050       linestr_save = PL_linestr; /* must restore this afterwards */
10051       d = s;                     /* and this */
10052       oldbufptr_save = PL_oldbufptr;
10053       oldoldbufptr_save = PL_oldoldbufptr;
10054       PL_linestr = newSVpvs("");
10055       PL_bufend = SvPVX(PL_linestr);
10056       while (1) {
10057         PL_bufptr = PL_bufend;
10058         CopLINE_set(PL_curcop,
10059                     origline + 1 + PL_parser->herelines);
10060         if (!lex_next_chunk(LEX_NO_TERM)
10061          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10062             /* Simply freeing linestr_save might seem simpler here, as it
10063                does not matter what PL_linestr points to, since we are
10064                about to croak; but in a quote-like op, linestr_save
10065                will have been prospectively freed already, via
10066                SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
10067                restore PL_linestr. */
10068             SvREFCNT_dec_NN(PL_linestr);
10069             PL_linestr = linestr_save;
10070             PL_oldbufptr = oldbufptr_save;
10071             PL_oldoldbufptr = oldoldbufptr_save;
10072             goto interminable;
10073         }
10074         CopLINE_set(PL_curcop, origline);
10075         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10076             s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10077             /* ^That should be enough to avoid this needing to grow:  */
10078             sv_catpvs(PL_linestr, "\n\0");
10079             assert(s == SvPVX(PL_linestr));
10080             PL_bufend = SvEND(PL_linestr);
10081         }
10082         s = PL_bufptr;
10083         PL_parser->herelines++;
10084         PL_last_lop = PL_last_uni = NULL;
10085 #ifndef PERL_STRICT_CR
10086         if (PL_bufend - PL_linestart >= 2) {
10087             if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10088                 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10089             {
10090                 PL_bufend[-2] = '\n';
10091                 PL_bufend--;
10092                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10093             }
10094             else if (PL_bufend[-1] == '\r')
10095                 PL_bufend[-1] = '\n';
10096         }
10097         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10098             PL_bufend[-1] = '\n';
10099 #endif
10100         if (indented && (PL_bufend-s) >= len) {
10101             char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
10102
10103             if (found) {
10104                 char *backup = found;
10105                 indent_len = 0;
10106
10107                 /* Only valid if it's preceded by whitespace only */
10108                 while (backup != s && --backup >= s) {
10109                     if (! SPACE_OR_TAB(*backup)) {
10110                         break;
10111                     }
10112                     indent_len++;
10113                 }
10114
10115                 /* All whitespace or none! */
10116                 if (backup == found || SPACE_OR_TAB(*backup)) {
10117                     Newxz(indent, indent_len + 1, char);
10118                     memcpy(indent, backup, indent_len);
10119                     SvREFCNT_dec(PL_linestr);
10120                     PL_linestr = linestr_save;
10121                     PL_linestart = SvPVX(linestr_save);
10122                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10123                     PL_oldbufptr = oldbufptr_save;
10124                     PL_oldoldbufptr = oldoldbufptr_save;
10125                     s = d;
10126                     break;
10127                 }
10128             }
10129
10130             /* Didn't find it */
10131             sv_catsv(tmpstr,PL_linestr);
10132         } else {
10133             if (*s == term && PL_bufend-s >= len
10134                 && memEQ(s,PL_tokenbuf + 1,len))
10135             {
10136                 SvREFCNT_dec(PL_linestr);
10137                 PL_linestr = linestr_save;
10138                 PL_linestart = SvPVX(linestr_save);
10139                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10140                 PL_oldbufptr = oldbufptr_save;
10141                 PL_oldoldbufptr = oldoldbufptr_save;
10142                 s = d;
10143                 break;
10144             } else {
10145                 sv_catsv(tmpstr,PL_linestr);
10146             }
10147         }
10148       }
10149     }
10150     PL_multi_end = origline + PL_parser->herelines;
10151     if (indented && indent) {
10152         STRLEN linecount = 1;
10153         STRLEN herelen = SvCUR(tmpstr);
10154         char *ss = SvPVX(tmpstr);
10155         char *se = ss + herelen;
10156         SV *newstr = newSV(herelen+1);
10157         SvPOK_on(newstr);
10158
10159         /* Trim leading whitespace */
10160         while (ss < se) {
10161             /* newline only? Copy and move on */
10162             if (*ss == '\n') {
10163                 sv_catpv(newstr,"\n");
10164                 ss++;
10165                 linecount++;
10166
10167             /* Found our indentation? Strip it */
10168             } else if (se - ss >= indent_len
10169                        && memEQ(ss, indent, indent_len))
10170             {
10171                 STRLEN le = 0;
10172
10173                 ss += indent_len;
10174
10175                 while ((ss + le) < se && *(ss + le) != '\n')
10176                     le++;
10177
10178                 sv_catpvn(newstr, ss, le);
10179
10180                 ss += le;
10181
10182             /* Line doesn't begin with our indentation? Croak */
10183             } else {
10184                 Perl_croak(aTHX_
10185                     "Indentation on line %d of here-doc doesn't match delimiter",
10186                     (int)linecount
10187                 );
10188             }
10189         }
10190         /* avoid sv_setsv() as we dont wan't to COW here */
10191         sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
10192         Safefree(indent);
10193         SvREFCNT_dec_NN(newstr);
10194     }
10195     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10196         SvPV_shrink_to_cur(tmpstr);
10197     }
10198     if (!IN_BYTES) {
10199         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10200             SvUTF8_on(tmpstr);
10201     }
10202     PL_lex_stuff = tmpstr;
10203     pl_yylval.ival = op_type;
10204     return s;
10205
10206   interminable:
10207     SvREFCNT_dec(tmpstr);
10208     CopLINE_set(PL_curcop, origline);
10209     missingterm(PL_tokenbuf + 1);
10210 }
10211
10212 /* scan_inputsymbol
10213    takes: position of first '<' in input buffer
10214    returns: position of first char following the matching '>' in
10215             input buffer
10216    side-effects: pl_yylval and lex_op are set.
10217
10218    This code handles:
10219
10220    <>           read from ARGV
10221    <<>>         read from ARGV without magic open
10222    <FH>         read from filehandle
10223    <pkg::FH>    read from package qualified filehandle
10224    <pkg'FH>     read from package qualified filehandle
10225    <$fh>        read from filehandle in $fh
10226    <*.h>        filename glob
10227
10228 */
10229
10230 STATIC char *
10231 S_scan_inputsymbol(pTHX_ char *start)
10232 {
10233     char *s = start;            /* current position in buffer */
10234     char *end;
10235     I32 len;
10236     bool nomagicopen = FALSE;
10237     char *d = PL_tokenbuf;                                      /* start of temp holding space */
10238     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
10239
10240     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10241
10242     end = strchr(s, '\n');
10243     if (!end)
10244         end = PL_bufend;
10245     if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
10246         nomagicopen = TRUE;
10247         *d = '\0';
10248         len = 0;
10249         s += 3;
10250     }
10251     else
10252         s = delimcpy(d, e, s + 1, end, '>', &len);      /* extract until > */
10253
10254     /* die if we didn't have space for the contents of the <>,
10255        or if it didn't end, or if we see a newline
10256     */
10257
10258     if (len >= (I32)sizeof PL_tokenbuf)
10259         Perl_croak(aTHX_ "Excessively long <> operator");
10260     if (s >= end)
10261         Perl_croak(aTHX_ "Unterminated <> operator");
10262
10263     s++;
10264
10265     /* check for <$fh>
10266        Remember, only scalar variables are interpreted as filehandles by
10267        this code.  Anything more complex (e.g., <$fh{$num}>) will be
10268        treated as a glob() call.
10269        This code makes use of the fact that except for the $ at the front,
10270        a scalar variable and a filehandle look the same.
10271     */
10272     if (*d == '$' && d[1]) d++;
10273
10274     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10275     while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
10276         d += UTF ? UTF8SKIP(d) : 1;
10277     }
10278
10279     /* If we've tried to read what we allow filehandles to look like, and
10280        there's still text left, then it must be a glob() and not a getline.
10281        Use scan_str to pull out the stuff between the <> and treat it
10282        as nothing more than a string.
10283     */
10284
10285     if (d - PL_tokenbuf != len) {
10286         pl_yylval.ival = OP_GLOB;
10287         s = scan_str(start,FALSE,FALSE,FALSE,NULL);
10288         if (!s)
10289            Perl_croak(aTHX_ "Glob not terminated");
10290         return s;
10291     }
10292     else {
10293         bool readline_overriden = FALSE;
10294         GV *gv_readline;
10295         /* we're in a filehandle read situation */
10296         d = PL_tokenbuf;
10297
10298         /* turn <> into <ARGV> */
10299         if (!len)
10300             Copy("ARGV",d,5,char);
10301
10302         /* Check whether readline() is overriden */
10303         if ((gv_readline = gv_override("readline",8)))
10304             readline_overriden = TRUE;
10305
10306         /* if <$fh>, create the ops to turn the variable into a
10307            filehandle
10308         */
10309         if (*d == '$') {
10310             /* try to find it in the pad for this block, otherwise find
10311                add symbol table ops
10312             */
10313             const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
10314             if (tmp != NOT_IN_PAD) {
10315                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10316                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10317                     HEK * const stashname = HvNAME_HEK(stash);
10318                     SV * const sym = sv_2mortal(newSVhek(stashname));
10319                     sv_catpvs(sym, "::");
10320                     sv_catpv(sym, d+1);
10321                     d = SvPVX(sym);
10322                     goto intro_sym;
10323                 }
10324                 else {
10325                     OP * const o = newOP(OP_PADSV, 0);
10326                     o->op_targ = tmp;
10327                     PL_lex_op = readline_overriden
10328                         ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10329                                 op_append_elem(OP_LIST, o,
10330                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10331                         : newUNOP(OP_READLINE, 0, o);
10332                 }
10333             }
10334             else {
10335                 GV *gv;
10336                 ++d;
10337               intro_sym:
10338                 gv = gv_fetchpv(d,
10339                                 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
10340                                 SVt_PV);
10341                 PL_lex_op = readline_overriden
10342                     ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10343                             op_append_elem(OP_LIST,
10344                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10345                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10346                     : newUNOP(OP_READLINE, 0,
10347                             newUNOP(OP_RV2SV, 0,
10348                                 newGVOP(OP_GV, 0, gv)));
10349             }
10350             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10351             pl_yylval.ival = OP_NULL;
10352         }
10353
10354         /* If it's none of the above, it must be a literal filehandle
10355            (<Foo::BAR> or <FOO>) so build a simple readline OP */
10356         else {
10357             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10358             PL_lex_op = readline_overriden
10359                 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10360                         op_append_elem(OP_LIST,
10361                             newGVOP(OP_GV, 0, gv),
10362                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10363                 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
10364             pl_yylval.ival = OP_NULL;
10365         }
10366     }
10367
10368     return s;
10369 }
10370
10371
10372 /* scan_str
10373    takes:
10374         start                   position in buffer
10375         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
10376                                 only if they are of the open/close form
10377         keep_delims             preserve the delimiters around the string
10378         re_reparse              compiling a run-time /(?{})/:
10379                                    collapse // to /,  and skip encoding src
10380         delimp                  if non-null, this is set to the position of
10381                                 the closing delimiter, or just after it if
10382                                 the closing and opening delimiters differ
10383                                 (i.e., the opening delimiter of a substitu-
10384                                 tion replacement)
10385    returns: position to continue reading from buffer
10386    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10387         updates the read buffer.
10388
10389    This subroutine pulls a string out of the input.  It is called for:
10390         q               single quotes           q(literal text)
10391         '               single quotes           'literal text'
10392         qq              double quotes           qq(interpolate $here please)
10393         "               double quotes           "interpolate $here please"
10394         qx              backticks               qx(/bin/ls -l)
10395         `               backticks               `/bin/ls -l`
10396         qw              quote words             @EXPORT_OK = qw( func() $spam )
10397         m//             regexp match            m/this/
10398         s///            regexp substitute       s/this/that/
10399         tr///           string transliterate    tr/this/that/
10400         y///            string transliterate    y/this/that/
10401         ($*@)           sub prototypes          sub foo ($)
10402         (stuff)         sub attr parameters     sub foo : attr(stuff)
10403         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
10404
10405    In most of these cases (all but <>, patterns and transliterate)
10406    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
10407    calls scan_str().  s/// makes yylex() call scan_subst() which calls
10408    scan_str().  tr/// and y/// make yylex() call scan_trans() which
10409    calls scan_str().
10410
10411    It skips whitespace before the string starts, and treats the first
10412    character as the delimiter.  If the delimiter is one of ([{< then
10413    the corresponding "close" character )]}> is used as the closing
10414    delimiter.  It allows quoting of delimiters, and if the string has
10415    balanced delimiters ([{<>}]) it allows nesting.
10416
10417    On success, the SV with the resulting string is put into lex_stuff or,
10418    if that is already non-NULL, into lex_repl. The second case occurs only
10419    when parsing the RHS of the special constructs s/// and tr/// (y///).
10420    For convenience, the terminating delimiter character is stuffed into
10421    SvIVX of the SV.
10422 */
10423
10424 STATIC char *
10425 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
10426                  char **delimp
10427     )
10428 {
10429     SV *sv;                     /* scalar value: string */
10430     const char *tmps;           /* temp string, used for delimiter matching */
10431     char *s = start;            /* current position in the buffer */
10432     char term;                  /* terminating character */
10433     char *to;                   /* current position in the sv's data */
10434     I32 brackets = 1;           /* bracket nesting level */
10435     bool has_utf8 = FALSE;      /* is there any utf8 content? */
10436     IV termcode;                /* terminating char. code */
10437     U8 termstr[UTF8_MAXBYTES];  /* terminating string */
10438     STRLEN termlen;             /* length of terminating string */
10439     line_t herelines;
10440
10441     /* The delimiters that have a mirror-image closing one */
10442     const char * opening_delims = "([{<";
10443     const char * closing_delims = ")]}>";
10444
10445     const char * non_grapheme_msg = "Use of unassigned code point or"
10446                                     " non-standalone grapheme for a delimiter"
10447                                     " will be a fatal error starting in Perl"
10448                                     " 5.30";
10449     /* The only non-UTF character that isn't a stand alone grapheme is
10450      * white-space, hence can't be a delimiter.  So can skip for non-UTF-8 */
10451     bool check_grapheme = UTF && ckWARN_d(WARN_DEPRECATED);
10452
10453     PERL_ARGS_ASSERT_SCAN_STR;
10454
10455     /* skip space before the delimiter */
10456     if (isSPACE(*s)) {
10457         s = skipspace(s);
10458     }
10459
10460     /* mark where we are, in case we need to report errors */
10461     CLINE;
10462
10463     /* after skipping whitespace, the next character is the terminator */
10464     term = *s;
10465     if (!UTF || UTF8_IS_INVARIANT(term)) {
10466         termcode = termstr[0] = term;
10467         termlen = 1;
10468     }
10469     else {
10470         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10471         if (check_grapheme) {
10472             if (   UNLIKELY(UNICODE_IS_SUPER(termcode))
10473                 || UNLIKELY(UNICODE_IS_NONCHAR(termcode)))
10474             {
10475                 /* These are considered graphemes, and since the ending
10476                  * delimiter will be the same, we don't have to check the other
10477                  * end */
10478                 check_grapheme = FALSE;
10479             }
10480             else if (UNLIKELY(! _is_grapheme((U8 *) start,
10481                                              (U8 *) s,
10482                                              (U8 *) PL_bufend,
10483                                              termcode)))
10484             {
10485                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "%s", non_grapheme_msg);
10486
10487                 /* Don't have to check the other end, as have already warned at
10488                  * this one */
10489                 check_grapheme = FALSE;
10490             }
10491         }
10492
10493         Copy(s, termstr, termlen, U8);
10494     }
10495
10496     /* mark where we are */
10497     PL_multi_start = CopLINE(PL_curcop);
10498     PL_multi_open = termcode;
10499     herelines = PL_parser->herelines;
10500
10501     /* If the delimiter has a mirror-image closing one, get it */
10502     if (term && (tmps = strchr(opening_delims, term))) {
10503         termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
10504     }
10505
10506     PL_multi_close = termcode;
10507
10508     if (PL_multi_open == PL_multi_close) {
10509         keep_bracketed_quoted = FALSE;
10510     }
10511
10512     /* create a new SV to hold the contents.  79 is the SV's initial length.
10513        What a random number. */
10514     sv = newSV_type(SVt_PVIV);
10515     SvGROW(sv, 80);
10516     SvIV_set(sv, termcode);
10517     (void)SvPOK_only(sv);               /* validate pointer */
10518
10519     /* move past delimiter and try to read a complete string */
10520     if (keep_delims)
10521         sv_catpvn(sv, s, termlen);
10522     s += termlen;
10523     for (;;) {
10524         /* extend sv if need be */
10525         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10526         /* set 'to' to the next character in the sv's string */
10527         to = SvPVX(sv)+SvCUR(sv);
10528
10529         /* if open delimiter is the close delimiter read unbridle */
10530         if (PL_multi_open == PL_multi_close) {
10531             for (; s < PL_bufend; s++,to++) {
10532                 /* embedded newlines increment the current line number */
10533                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10534                     COPLINE_INC_WITH_HERELINES;
10535                 /* handle quoted delimiters */
10536                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10537                     if (!keep_bracketed_quoted
10538                         && (s[1] == term
10539                             || (re_reparse && s[1] == '\\'))
10540                     )
10541                         s++;
10542                     else /* any other quotes are simply copied straight through */
10543                         *to++ = *s++;
10544                 }
10545                 /* terminate when run out of buffer (the for() condition), or
10546                    have found the terminator */
10547                 else if (*s == term) {  /* First byte of terminator matches */
10548                     if (termlen == 1)   /* If is the only byte, are done */
10549                         break;
10550
10551                     /* If the remainder of the terminator matches, also are
10552                      * done, after checking that is a separate grapheme */
10553                     if (   s + termlen <= PL_bufend
10554                         && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
10555                     {
10556                         if (   check_grapheme
10557                             && UNLIKELY(! _is_grapheme((U8 *) start,
10558                                                               (U8 *) s,
10559                                                               (U8 *) PL_bufend,
10560                                                               termcode)))
10561                         {
10562                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10563                                         "%s", non_grapheme_msg);
10564                         }
10565                         break;
10566                     }
10567                 }
10568                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
10569                     has_utf8 = TRUE;
10570                 }
10571
10572                 *to = *s;
10573             }
10574         }
10575
10576         /* if the terminator isn't the same as the start character (e.g.,
10577            matched brackets), we have to allow more in the quoting, and
10578            be prepared for nested brackets.
10579         */
10580         else {
10581             /* read until we run out of string, or we find the terminator */
10582             for (; s < PL_bufend; s++,to++) {
10583                 /* embedded newlines increment the line count */
10584                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10585                     COPLINE_INC_WITH_HERELINES;
10586                 /* backslashes can escape the open or closing characters */
10587                 if (*s == '\\' && s+1 < PL_bufend) {
10588                     if (!keep_bracketed_quoted
10589                        && ( ((UV)s[1] == PL_multi_open)
10590                          || ((UV)s[1] == PL_multi_close) ))
10591                     {
10592                         s++;
10593                     }
10594                     else
10595                         *to++ = *s++;
10596                 }
10597                 /* allow nested opens and closes */
10598                 else if ((UV)*s == PL_multi_close && --brackets <= 0)
10599                     break;
10600                 else if ((UV)*s == PL_multi_open)
10601                     brackets++;
10602                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10603                     has_utf8 = TRUE;
10604                 *to = *s;
10605             }
10606         }
10607         /* terminate the copied string and update the sv's end-of-string */
10608         *to = '\0';
10609         SvCUR_set(sv, to - SvPVX_const(sv));
10610
10611         /*
10612          * this next chunk reads more into the buffer if we're not done yet
10613          */
10614
10615         if (s < PL_bufend)
10616             break;              /* handle case where we are done yet :-) */
10617
10618 #ifndef PERL_STRICT_CR
10619         if (to - SvPVX_const(sv) >= 2) {
10620             if (   (to[-2] == '\r' && to[-1] == '\n')
10621                 || (to[-2] == '\n' && to[-1] == '\r'))
10622             {
10623                 to[-2] = '\n';
10624                 to--;
10625                 SvCUR_set(sv, to - SvPVX_const(sv));
10626             }
10627             else if (to[-1] == '\r')
10628                 to[-1] = '\n';
10629         }
10630         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10631             to[-1] = '\n';
10632 #endif
10633
10634         /* if we're out of file, or a read fails, bail and reset the current
10635            line marker so we can report where the unterminated string began
10636         */
10637         COPLINE_INC_WITH_HERELINES;
10638         PL_bufptr = PL_bufend;
10639         if (!lex_next_chunk(0)) {
10640             sv_free(sv);
10641             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10642             return NULL;
10643         }
10644         s = start = PL_bufptr;
10645     }
10646
10647     /* at this point, we have successfully read the delimited string */
10648
10649     if (keep_delims)
10650             sv_catpvn(sv, s, termlen);
10651     s += termlen;
10652
10653     if (has_utf8)
10654         SvUTF8_on(sv);
10655
10656     PL_multi_end = CopLINE(PL_curcop);
10657     CopLINE_set(PL_curcop, PL_multi_start);
10658     PL_parser->herelines = herelines;
10659
10660     /* if we allocated too much space, give some back */
10661     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10662         SvLEN_set(sv, SvCUR(sv) + 1);
10663         SvPV_renew(sv, SvLEN(sv));
10664     }
10665
10666     /* decide whether this is the first or second quoted string we've read
10667        for this op
10668     */
10669
10670     if (PL_lex_stuff)
10671         PL_parser->lex_sub_repl = sv;
10672     else
10673         PL_lex_stuff = sv;
10674     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10675     return s;
10676 }
10677
10678 /*
10679   scan_num
10680   takes: pointer to position in buffer
10681   returns: pointer to new position in buffer
10682   side-effects: builds ops for the constant in pl_yylval.op
10683
10684   Read a number in any of the formats that Perl accepts:
10685
10686   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10687   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10688   0b[01](_?[01])*                                       binary integers
10689   0[0-7](_?[0-7])*                                      octal integers
10690   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
10691   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
10692
10693   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10694   thing it reads.
10695
10696   If it reads a number without a decimal point or an exponent, it will
10697   try converting the number to an integer and see if it can do so
10698   without loss of precision.
10699 */
10700
10701 char *
10702 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10703 {
10704     const char *s = start;      /* current position in buffer */
10705     char *d;                    /* destination in temp buffer */
10706     char *e;                    /* end of temp buffer */
10707     NV nv;                              /* number read, as a double */
10708     SV *sv = NULL;                      /* place to put the converted number */
10709     bool floatit;                       /* boolean: int or float? */
10710     const char *lastub = NULL;          /* position of last underbar */
10711     static const char* const number_too_long = "Number too long";
10712     bool warned_about_underscore = 0;
10713 #define WARN_ABOUT_UNDERSCORE() \
10714         do { \
10715             if (!warned_about_underscore) { \
10716                 warned_about_underscore = 1; \
10717                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
10718                                "Misplaced _ in number"); \
10719             } \
10720         } while(0)
10721     /* Hexadecimal floating point.
10722      *
10723      * In many places (where we have quads and NV is IEEE 754 double)
10724      * we can fit the mantissa bits of a NV into an unsigned quad.
10725      * (Note that UVs might not be quads even when we have quads.)
10726      * This will not work everywhere, though (either no quads, or
10727      * using long doubles), in which case we have to resort to NV,
10728      * which will probably mean horrible loss of precision due to
10729      * multiple fp operations. */
10730     bool hexfp = FALSE;
10731     int total_bits = 0;
10732     int significant_bits = 0;
10733 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
10734 #  define HEXFP_UQUAD
10735     Uquad_t hexfp_uquad = 0;
10736     int hexfp_frac_bits = 0;
10737 #else
10738 #  define HEXFP_NV
10739     NV hexfp_nv = 0.0;
10740 #endif
10741     NV hexfp_mult = 1.0;
10742     UV high_non_zero = 0; /* highest digit */
10743     int non_zero_integer_digits = 0;
10744
10745     PERL_ARGS_ASSERT_SCAN_NUM;
10746
10747     /* We use the first character to decide what type of number this is */
10748
10749     switch (*s) {
10750     default:
10751         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10752
10753     /* if it starts with a 0, it could be an octal number, a decimal in
10754        0.13 disguise, or a hexadecimal number, or a binary number. */
10755     case '0':
10756         {
10757           /* variables:
10758              u          holds the "number so far"
10759              shift      the power of 2 of the base
10760                         (hex == 4, octal == 3, binary == 1)
10761              overflowed was the number more than we can hold?
10762
10763              Shift is used when we add a digit.  It also serves as an "are
10764              we in octal/hex/binary?" indicator to disallow hex characters
10765              when in octal mode.
10766            */
10767             NV n = 0.0;
10768             UV u = 0;
10769             I32 shift;
10770             bool overflowed = FALSE;
10771             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10772             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10773             static const char* const bases[5] =
10774               { "", "binary", "", "octal", "hexadecimal" };
10775             static const char* const Bases[5] =
10776               { "", "Binary", "", "Octal", "Hexadecimal" };
10777             static const char* const maxima[5] =
10778               { "",
10779                 "0b11111111111111111111111111111111",
10780                 "",
10781                 "037777777777",
10782                 "0xffffffff" };
10783             const char *base, *Base, *max;
10784
10785             /* check for hex */
10786             if (isALPHA_FOLD_EQ(s[1], 'x')) {
10787                 shift = 4;
10788                 s += 2;
10789                 just_zero = FALSE;
10790             } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
10791                 shift = 1;
10792                 s += 2;
10793                 just_zero = FALSE;
10794             }
10795             /* check for a decimal in disguise */
10796             else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
10797                 goto decimal;
10798             /* so it must be octal */
10799             else {
10800                 shift = 3;
10801                 s++;
10802             }
10803
10804             if (*s == '_') {
10805                 WARN_ABOUT_UNDERSCORE();
10806                lastub = s++;
10807             }
10808
10809             base = bases[shift];
10810             Base = Bases[shift];
10811             max  = maxima[shift];
10812
10813             /* read the rest of the number */
10814             for (;;) {
10815                 /* x is used in the overflow test,
10816                    b is the digit we're adding on. */
10817                 UV x, b;
10818
10819                 switch (*s) {
10820
10821                 /* if we don't mention it, we're done */
10822                 default:
10823                     goto out;
10824
10825                 /* _ are ignored -- but warned about if consecutive */
10826                 case '_':
10827                     if (lastub && s == lastub + 1)
10828                         WARN_ABOUT_UNDERSCORE();
10829                     lastub = s++;
10830                     break;
10831
10832                 /* 8 and 9 are not octal */
10833                 case '8': case '9':
10834                     if (shift == 3)
10835                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10836                     /* FALLTHROUGH */
10837
10838                 /* octal digits */
10839                 case '2': case '3': case '4':
10840                 case '5': case '6': case '7':
10841                     if (shift == 1)
10842                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10843                     /* FALLTHROUGH */
10844
10845                 case '0': case '1':
10846                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10847                     goto digit;
10848
10849                 /* hex digits */
10850                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10851                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10852                     /* make sure they said 0x */
10853                     if (shift != 4)
10854                         goto out;
10855                     b = (*s++ & 7) + 9;
10856
10857                     /* Prepare to put the digit we have onto the end
10858                        of the number so far.  We check for overflows.
10859                     */
10860
10861                   digit:
10862                     just_zero = FALSE;
10863                     if (!overflowed) {
10864                         x = u << shift; /* make room for the digit */
10865
10866                         total_bits += shift;
10867
10868                         if ((x >> shift) != u
10869                             && !(PL_hints & HINT_NEW_BINARY)) {
10870                             overflowed = TRUE;
10871                             n = (NV) u;
10872                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10873                                              "Integer overflow in %s number",
10874                                              base);
10875                         } else
10876                             u = x | b;          /* add the digit to the end */
10877                     }
10878                     if (overflowed) {
10879                         n *= nvshift[shift];
10880                         /* If an NV has not enough bits in its
10881                          * mantissa to represent an UV this summing of
10882                          * small low-order numbers is a waste of time
10883                          * (because the NV cannot preserve the
10884                          * low-order bits anyway): we could just
10885                          * remember when did we overflow and in the
10886                          * end just multiply n by the right
10887                          * amount. */
10888                         n += (NV) b;
10889                     }
10890
10891                     if (high_non_zero == 0 && b > 0)
10892                         high_non_zero = b;
10893
10894                     if (high_non_zero)
10895                         non_zero_integer_digits++;
10896
10897                     /* this could be hexfp, but peek ahead
10898                      * to avoid matching ".." */
10899                     if (UNLIKELY(HEXFP_PEEK(s))) {
10900                         goto out;
10901                     }
10902
10903                     break;
10904                 }
10905             }
10906
10907           /* if we get here, we had success: make a scalar value from
10908              the number.
10909           */
10910           out:
10911
10912             /* final misplaced underbar check */
10913             if (s[-1] == '_')
10914                 WARN_ABOUT_UNDERSCORE();
10915
10916             if (UNLIKELY(HEXFP_PEEK(s))) {
10917                 /* Do sloppy (on the underbars) but quick detection
10918                  * (and value construction) for hexfp, the decimal
10919                  * detection will shortly be more thorough with the
10920                  * underbar checks. */
10921                 const char* h = s;
10922                 significant_bits = non_zero_integer_digits * shift;
10923 #ifdef HEXFP_UQUAD
10924                 hexfp_uquad = u;
10925 #else /* HEXFP_NV */
10926                 hexfp_nv = u;
10927 #endif
10928                 /* Ignore the leading zero bits of
10929                  * the high (first) non-zero digit. */
10930                 if (high_non_zero) {
10931                     if (high_non_zero < 0x8)
10932                         significant_bits--;
10933                     if (high_non_zero < 0x4)
10934                         significant_bits--;
10935                     if (high_non_zero < 0x2)
10936                         significant_bits--;
10937                 }
10938
10939                 if (*h == '.') {
10940 #ifdef HEXFP_NV
10941                     NV nv_mult = 1.0;
10942 #endif
10943                     bool accumulate = TRUE;
10944                     for (h++; (isXDIGIT(*h) || *h == '_'); h++) {
10945                         if (isXDIGIT(*h)) {
10946                             U8 b = XDIGIT_VALUE(*h);
10947                             significant_bits += shift;
10948 #ifdef HEXFP_UQUAD
10949                             if (accumulate) {
10950                                 if (significant_bits < NV_MANT_DIG) {
10951                                     /* We are in the long "run" of xdigits,
10952                                      * accumulate the full four bits. */
10953                                     hexfp_uquad <<= shift;
10954                                     hexfp_uquad |= b;
10955                                     hexfp_frac_bits += shift;
10956                                 } else {
10957                                     /* We are at a hexdigit either at,
10958                                      * or straddling, the edge of mantissa.
10959                                      * We will try grabbing as many as
10960                                      * possible bits. */
10961                                     int tail =
10962                                       significant_bits - NV_MANT_DIG;
10963                                     if (tail <= 0)
10964                                        tail += shift;
10965                                     hexfp_uquad <<= tail;
10966                                     hexfp_uquad |= b >> (shift - tail);
10967                                     hexfp_frac_bits += tail;
10968
10969                                     /* Ignore the trailing zero bits
10970                                      * of the last non-zero xdigit.
10971                                      *
10972                                      * The assumption here is that if
10973                                      * one has input of e.g. the xdigit
10974                                      * eight (0x8), there is only one
10975                                      * bit being input, not the full
10976                                      * four bits.  Conversely, if one
10977                                      * specifies a zero xdigit, the
10978                                      * assumption is that one really
10979                                      * wants all those bits to be zero. */
10980                                     if (b) {
10981                                         if ((b & 0x1) == 0x0) {
10982                                             significant_bits--;
10983                                             if ((b & 0x2) == 0x0) {
10984                                                 significant_bits--;
10985                                                 if ((b & 0x4) == 0x0) {
10986                                                     significant_bits--;
10987                                                 }
10988                                             }
10989                                         }
10990                                     }
10991
10992                                     accumulate = FALSE;
10993                                 }
10994                             } else {
10995                                 /* Keep skipping the xdigits, and
10996                                  * accumulating the significant bits,
10997                                  * but do not shift the uquad
10998                                  * (which would catastrophically drop
10999                                  * high-order bits) or accumulate the
11000                                  * xdigits anymore. */
11001                             }
11002 #else /* HEXFP_NV */
11003                             if (accumulate) {
11004                                 nv_mult /= 16.0;
11005                                 if (nv_mult > 0.0)
11006                                     hexfp_nv += b * nv_mult;
11007                                 else
11008                                     accumulate = FALSE;
11009                             }
11010 #endif
11011                         }
11012                         if (significant_bits >= NV_MANT_DIG)
11013                             accumulate = FALSE;
11014                     }
11015                 }
11016
11017                 if ((total_bits > 0 || significant_bits > 0) &&
11018                     isALPHA_FOLD_EQ(*h, 'p')) {
11019                     bool negexp = FALSE;
11020                     h++;
11021                     if (*h == '+')
11022                         h++;
11023                     else if (*h == '-') {
11024                         negexp = TRUE;
11025                         h++;
11026                     }
11027                     if (isDIGIT(*h)) {
11028                         I32 hexfp_exp = 0;
11029                         while (isDIGIT(*h) || *h == '_') {
11030                             if (isDIGIT(*h)) {
11031                                 hexfp_exp *= 10;
11032                                 hexfp_exp += *h - '0';
11033 #ifdef NV_MIN_EXP
11034                                 if (negexp
11035                                     && -hexfp_exp < NV_MIN_EXP - 1) {
11036                                     /* NOTE: this means that the exponent
11037                                      * underflow warning happens for
11038                                      * the IEEE 754 subnormals (denormals),
11039                                      * because DBL_MIN_EXP etc are the lowest
11040                                      * possible binary (or, rather, DBL_RADIX-base)
11041                                      * exponent for normals, not subnormals.
11042                                      *
11043                                      * This may or may not be a good thing. */
11044                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11045                                                    "Hexadecimal float: exponent underflow");
11046                                     break;
11047                                 }
11048 #endif
11049 #ifdef NV_MAX_EXP
11050                                 if (!negexp
11051                                     && hexfp_exp > NV_MAX_EXP - 1) {
11052                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11053                                                    "Hexadecimal float: exponent overflow");
11054                                     break;
11055                                 }
11056 #endif
11057                             }
11058                             h++;
11059                         }
11060                         if (negexp)
11061                             hexfp_exp = -hexfp_exp;
11062 #ifdef HEXFP_UQUAD
11063                         hexfp_exp -= hexfp_frac_bits;
11064 #endif
11065                         hexfp_mult = Perl_pow(2.0, hexfp_exp);
11066                         hexfp = TRUE;
11067                         goto decimal;
11068                     }
11069                 }
11070             }
11071
11072             if (overflowed) {
11073                 if (n > 4294967295.0)
11074                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11075                                    "%s number > %s non-portable",
11076                                    Base, max);
11077                 sv = newSVnv(n);
11078             }
11079             else {
11080 #if UVSIZE > 4
11081                 if (u > 0xffffffff)
11082                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11083                                    "%s number > %s non-portable",
11084                                    Base, max);
11085 #endif
11086                 sv = newSVuv(u);
11087             }
11088             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11089                 sv = new_constant(start, s - start, "integer",
11090                                   sv, NULL, NULL, 0);
11091             else if (PL_hints & HINT_NEW_BINARY)
11092                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
11093         }
11094         break;
11095
11096     /*
11097       handle decimal numbers.
11098       we're also sent here when we read a 0 as the first digit
11099     */
11100     case '1': case '2': case '3': case '4': case '5':
11101     case '6': case '7': case '8': case '9': case '.':
11102       decimal:
11103         d = PL_tokenbuf;
11104         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11105         floatit = FALSE;
11106         if (hexfp) {
11107             floatit = TRUE;
11108             *d++ = '0';
11109             *d++ = 'x';
11110             s = start + 2;
11111         }
11112
11113         /* read next group of digits and _ and copy into d */
11114         while (isDIGIT(*s)
11115                || *s == '_'
11116                || UNLIKELY(hexfp && isXDIGIT(*s)))
11117         {
11118             /* skip underscores, checking for misplaced ones
11119                if -w is on
11120             */
11121             if (*s == '_') {
11122                 if (lastub && s == lastub + 1)
11123                     WARN_ABOUT_UNDERSCORE();
11124                 lastub = s++;
11125             }
11126             else {
11127                 /* check for end of fixed-length buffer */
11128                 if (d >= e)
11129                     Perl_croak(aTHX_ "%s", number_too_long);
11130                 /* if we're ok, copy the character */
11131                 *d++ = *s++;
11132             }
11133         }
11134
11135         /* final misplaced underbar check */
11136         if (lastub && s == lastub + 1)
11137             WARN_ABOUT_UNDERSCORE();
11138
11139         /* read a decimal portion if there is one.  avoid
11140            3..5 being interpreted as the number 3. followed
11141            by .5
11142         */
11143         if (*s == '.' && s[1] != '.') {
11144             floatit = TRUE;
11145             *d++ = *s++;
11146
11147             if (*s == '_') {
11148                 WARN_ABOUT_UNDERSCORE();
11149                 lastub = s;
11150             }
11151
11152             /* copy, ignoring underbars, until we run out of digits.
11153             */
11154             for (; isDIGIT(*s)
11155                    || *s == '_'
11156                    || UNLIKELY(hexfp && isXDIGIT(*s));
11157                  s++)
11158             {
11159                 /* fixed length buffer check */
11160                 if (d >= e)
11161                     Perl_croak(aTHX_ "%s", number_too_long);
11162                 if (*s == '_') {
11163                    if (lastub && s == lastub + 1)
11164                         WARN_ABOUT_UNDERSCORE();
11165                    lastub = s;
11166                 }
11167                 else
11168                     *d++ = *s;
11169             }
11170             /* fractional part ending in underbar? */
11171             if (s[-1] == '_')
11172                 WARN_ABOUT_UNDERSCORE();
11173             if (*s == '.' && isDIGIT(s[1])) {
11174                 /* oops, it's really a v-string, but without the "v" */
11175                 s = start;
11176                 goto vstring;
11177             }
11178         }
11179
11180         /* read exponent part, if present */
11181         if ((isALPHA_FOLD_EQ(*s, 'e')
11182               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
11183             && strchr("+-0123456789_", s[1]))
11184         {
11185             floatit = TRUE;
11186
11187             /* regardless of whether user said 3E5 or 3e5, use lower 'e',
11188                ditto for p (hexfloats) */
11189             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
11190                 /* At least some Mach atof()s don't grok 'E' */
11191                 *d++ = 'e';
11192             }
11193             else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
11194                 *d++ = 'p';
11195             }
11196
11197             s++;
11198
11199
11200             /* stray preinitial _ */
11201             if (*s == '_') {
11202                 WARN_ABOUT_UNDERSCORE();
11203                 lastub = s++;
11204             }
11205
11206             /* allow positive or negative exponent */
11207             if (*s == '+' || *s == '-')
11208                 *d++ = *s++;
11209
11210             /* stray initial _ */
11211             if (*s == '_') {
11212                 WARN_ABOUT_UNDERSCORE();
11213                 lastub = s++;
11214             }
11215
11216             /* read digits of exponent */
11217             while (isDIGIT(*s) || *s == '_') {
11218                 if (isDIGIT(*s)) {
11219                     if (d >= e)
11220                         Perl_croak(aTHX_ "%s", number_too_long);
11221                     *d++ = *s++;
11222                 }
11223                 else {
11224                    if (((lastub && s == lastub + 1)
11225                         || (!isDIGIT(s[1]) && s[1] != '_')))
11226                         WARN_ABOUT_UNDERSCORE();
11227                    lastub = s++;
11228                 }
11229             }
11230         }
11231
11232
11233         /*
11234            We try to do an integer conversion first if no characters
11235            indicating "float" have been found.
11236          */
11237
11238         if (!floatit) {
11239             UV uv;
11240             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11241
11242             if (flags == IS_NUMBER_IN_UV) {
11243               if (uv <= IV_MAX)
11244                 sv = newSViv(uv); /* Prefer IVs over UVs. */
11245               else
11246                 sv = newSVuv(uv);
11247             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11248               if (uv <= (UV) IV_MIN)
11249                 sv = newSViv(-(IV)uv);
11250               else
11251                 floatit = TRUE;
11252             } else
11253               floatit = TRUE;
11254         }
11255         if (floatit) {
11256             STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD();
11257             /* terminate the string */
11258             *d = '\0';
11259             if (UNLIKELY(hexfp)) {
11260 #  ifdef NV_MANT_DIG
11261                 if (significant_bits > NV_MANT_DIG)
11262                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11263                                    "Hexadecimal float: mantissa overflow");
11264 #  endif
11265 #ifdef HEXFP_UQUAD
11266                 nv = hexfp_uquad * hexfp_mult;
11267 #else /* HEXFP_NV */
11268                 nv = hexfp_nv * hexfp_mult;
11269 #endif
11270             } else {
11271                 nv = Atof(PL_tokenbuf);
11272             }
11273             RESTORE_LC_NUMERIC_UNDERLYING();
11274             sv = newSVnv(nv);
11275         }
11276
11277         if ( floatit
11278              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11279             const char *const key = floatit ? "float" : "integer";
11280             const STRLEN keylen = floatit ? 5 : 7;
11281             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11282                                 key, keylen, sv, NULL, NULL, 0);
11283         }
11284         break;
11285
11286     /* if it starts with a v, it could be a v-string */
11287     case 'v':
11288     vstring:
11289                 sv = newSV(5); /* preallocate storage space */
11290                 ENTER_with_name("scan_vstring");
11291                 SAVEFREESV(sv);
11292                 s = scan_vstring(s, PL_bufend, sv);
11293                 SvREFCNT_inc_simple_void_NN(sv);
11294                 LEAVE_with_name("scan_vstring");
11295         break;
11296     }
11297
11298     /* make the op for the constant and return */
11299
11300     if (sv)
11301         lvalp->opval = newSVOP(OP_CONST, 0, sv);
11302     else
11303         lvalp->opval = NULL;
11304
11305     return (char *)s;
11306 }
11307
11308 STATIC char *
11309 S_scan_formline(pTHX_ char *s)
11310 {
11311     SV * const stuff = newSVpvs("");
11312     bool needargs = FALSE;
11313     bool eofmt = FALSE;
11314
11315     PERL_ARGS_ASSERT_SCAN_FORMLINE;
11316
11317     while (!needargs) {
11318         char *eol;
11319         if (*s == '.') {
11320             char *t = s+1;
11321 #ifdef PERL_STRICT_CR
11322             while (SPACE_OR_TAB(*t))
11323                 t++;
11324 #else
11325             while (SPACE_OR_TAB(*t) || *t == '\r')
11326                 t++;
11327 #endif
11328             if (*t == '\n' || t == PL_bufend) {
11329                 eofmt = TRUE;
11330                 break;
11331             }
11332         }
11333         eol = (char *) memchr(s,'\n',PL_bufend-s);
11334         if (!eol++)
11335                 eol = PL_bufend;
11336         if (*s != '#') {
11337             char *t;
11338             for (t = s; t < eol; t++) {
11339                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11340                     needargs = FALSE;
11341                     goto enough;        /* ~~ must be first line in formline */
11342                 }
11343                 if (*t == '@' || *t == '^')
11344                     needargs = TRUE;
11345             }
11346             if (eol > s) {
11347                 sv_catpvn(stuff, s, eol-s);
11348 #ifndef PERL_STRICT_CR
11349                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11350                     char *end = SvPVX(stuff) + SvCUR(stuff);
11351                     end[-2] = '\n';
11352                     end[-1] = '\0';
11353                     SvCUR_set(stuff, SvCUR(stuff) - 1);
11354                 }
11355 #endif
11356             }
11357             else
11358               break;
11359         }
11360         s = (char*)eol;
11361         if ((PL_rsfp || PL_parser->filtered)
11362          && PL_parser->form_lex_state == LEX_NORMAL) {
11363             bool got_some;
11364             PL_bufptr = PL_bufend;
11365             COPLINE_INC_WITH_HERELINES;
11366             got_some = lex_next_chunk(0);
11367             CopLINE_dec(PL_curcop);
11368             s = PL_bufptr;
11369             if (!got_some)
11370                 break;
11371         }
11372         incline(s);
11373     }
11374   enough:
11375     if (!SvCUR(stuff) || needargs)
11376         PL_lex_state = PL_parser->form_lex_state;
11377     if (SvCUR(stuff)) {
11378         PL_expect = XSTATE;
11379         if (needargs) {
11380             const char *s2 = s;
11381             while (isSPACE(*s2) && *s2 != '\n')
11382                 s2++;
11383             if (*s2 == '{') {
11384                 PL_expect = XTERMBLOCK;
11385                 NEXTVAL_NEXTTOKE.ival = 0;
11386                 force_next(DO);
11387             }
11388             NEXTVAL_NEXTTOKE.ival = 0;
11389             force_next(FORMLBRACK);
11390         }
11391         if (!IN_BYTES) {
11392             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11393                 SvUTF8_on(stuff);
11394         }
11395         NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
11396         force_next(THING);
11397     }
11398     else {
11399         SvREFCNT_dec(stuff);
11400         if (eofmt)
11401             PL_lex_formbrack = 0;
11402     }
11403     return s;
11404 }
11405
11406 I32
11407 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11408 {
11409     const I32 oldsavestack_ix = PL_savestack_ix;
11410     CV* const outsidecv = PL_compcv;
11411
11412     SAVEI32(PL_subline);
11413     save_item(PL_subname);
11414     SAVESPTR(PL_compcv);
11415
11416     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11417     CvFLAGS(PL_compcv) |= flags;
11418
11419     PL_subline = CopLINE(PL_curcop);
11420     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11421     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11422     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11423     if (outsidecv && CvPADLIST(outsidecv))
11424         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
11425
11426     return oldsavestack_ix;
11427 }
11428
11429 static int
11430 S_yywarn(pTHX_ const char *const s, U32 flags)
11431 {
11432     PERL_ARGS_ASSERT_YYWARN;
11433
11434     PL_in_eval |= EVAL_WARNONLY;
11435     yyerror_pv(s, flags);
11436     return 0;
11437 }
11438
11439 void
11440 Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
11441 {
11442     PERL_ARGS_ASSERT_ABORT_EXECUTION;
11443
11444     if (PL_minus_c)
11445         Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
11446     else {
11447         Perl_croak(aTHX_
11448                 "%sExecution of %s aborted due to compilation errors.\n", msg, name);
11449     }
11450     NOT_REACHED; /* NOTREACHED */
11451 }
11452
11453 void
11454 Perl_yyquit(pTHX)
11455 {
11456     /* Called, after at least one error has been found, to abort the parse now,
11457      * instead of trying to forge ahead */
11458
11459     yyerror_pvn(NULL, 0, 0);
11460 }
11461
11462 int
11463 Perl_yyerror(pTHX_ const char *const s)
11464 {
11465     PERL_ARGS_ASSERT_YYERROR;
11466     return yyerror_pvn(s, strlen(s), 0);
11467 }
11468
11469 int
11470 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11471 {
11472     PERL_ARGS_ASSERT_YYERROR_PV;
11473     return yyerror_pvn(s, strlen(s), flags);
11474 }
11475
11476 int
11477 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11478 {
11479     const char *context = NULL;
11480     int contlen = -1;
11481     SV *msg;
11482     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11483     int yychar  = PL_parser->yychar;
11484
11485     /* Output error message 's' with length 'len'.  'flags' are SV flags that
11486      * apply.  If the number of errors found is large enough, it abandons
11487      * parsing.  If 's' is NULL, there is no message, and it abandons
11488      * processing unconditionally */
11489
11490     if (s != NULL) {
11491         if (!yychar || (yychar == ';' && !PL_rsfp))
11492             sv_catpvs(where_sv, "at EOF");
11493         else if (   PL_oldoldbufptr
11494                  && PL_bufptr > PL_oldoldbufptr
11495                  && PL_bufptr - PL_oldoldbufptr < 200
11496                  && PL_oldoldbufptr != PL_oldbufptr
11497                  && PL_oldbufptr != PL_bufptr)
11498         {
11499             /*
11500                     Only for NetWare:
11501                     The code below is removed for NetWare because it
11502                     abends/crashes on NetWare when the script has error such as
11503                     not having the closing quotes like:
11504                         if ($var eq "value)
11505                     Checking of white spaces is anyway done in NetWare code.
11506             */
11507 #ifndef NETWARE
11508             while (isSPACE(*PL_oldoldbufptr))
11509                 PL_oldoldbufptr++;
11510 #endif
11511             context = PL_oldoldbufptr;
11512             contlen = PL_bufptr - PL_oldoldbufptr;
11513         }
11514         else if (  PL_oldbufptr
11515                 && PL_bufptr > PL_oldbufptr
11516                 && PL_bufptr - PL_oldbufptr < 200
11517                 && PL_oldbufptr != PL_bufptr) {
11518             /*
11519                     Only for NetWare:
11520                     The code below is removed for NetWare because it
11521                     abends/crashes on NetWare when the script has error such as
11522                     not having the closing quotes like:
11523                         if ($var eq "value)
11524                     Checking of white spaces is anyway done in NetWare code.
11525             */
11526 #ifndef NETWARE
11527             while (isSPACE(*PL_oldbufptr))
11528                 PL_oldbufptr++;
11529 #endif
11530             context = PL_oldbufptr;
11531             contlen = PL_bufptr - PL_oldbufptr;
11532         }
11533         else if (yychar > 255)
11534             sv_catpvs(where_sv, "next token ???");
11535         else if (yychar == YYEMPTY) {
11536             if (PL_lex_state == LEX_NORMAL)
11537                 sv_catpvs(where_sv, "at end of line");
11538             else if (PL_lex_inpat)
11539                 sv_catpvs(where_sv, "within pattern");
11540             else
11541                 sv_catpvs(where_sv, "within string");
11542         }
11543         else {
11544             sv_catpvs(where_sv, "next char ");
11545             if (yychar < 32)
11546                 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11547             else if (isPRINT_LC(yychar)) {
11548                 const char string = yychar;
11549                 sv_catpvn(where_sv, &string, 1);
11550             }
11551             else
11552                 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11553         }
11554         msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11555         Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
11556             OutCopFILE(PL_curcop),
11557             (IV)(PL_parser->preambling == NOLINE
11558                    ? CopLINE(PL_curcop)
11559                    : PL_parser->preambling));
11560         if (context)
11561             Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
11562                                  UTF8fARG(UTF, contlen, context));
11563         else
11564             Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
11565         if (   PL_multi_start < PL_multi_end
11566             && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
11567         {
11568             Perl_sv_catpvf(aTHX_ msg,
11569             "  (Might be a runaway multi-line %c%c string starting on"
11570             " line %" IVdf ")\n",
11571                     (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11572             PL_multi_end = 0;
11573         }
11574         if (PL_in_eval & EVAL_WARNONLY) {
11575             PL_in_eval &= ~EVAL_WARNONLY;
11576             Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
11577         }
11578         else {
11579             qerror(msg);
11580         }
11581     }
11582     if (s == NULL || PL_error_count >= 10) {
11583         const char * msg = "";
11584         const char * const name = OutCopFILE(PL_curcop);
11585
11586         if (PL_in_eval) {
11587             SV * errsv = ERRSV;
11588             if (SvCUR(errsv)) {
11589                 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
11590             }
11591         }
11592
11593         if (s == NULL) {
11594             abort_execution(msg, name);
11595         }
11596         else {
11597             Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
11598         }
11599     }
11600     PL_in_my = 0;
11601     PL_in_my_stash = NULL;
11602     return 0;
11603 }
11604
11605 STATIC char*
11606 S_swallow_bom(pTHX_ U8 *s)
11607 {
11608     const STRLEN slen = SvCUR(PL_linestr);
11609
11610     PERL_ARGS_ASSERT_SWALLOW_BOM;
11611
11612     switch (s[0]) {
11613     case 0xFF:
11614         if (s[1] == 0xFE) {
11615             /* UTF-16 little-endian? (or UTF-32LE?) */
11616             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11617                 /* diag_listed_as: Unsupported script encoding %s */
11618                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11619 #ifndef PERL_NO_UTF16_FILTER
11620             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11621             s += 2;
11622             if (PL_bufend > (char*)s) {
11623                 s = add_utf16_textfilter(s, TRUE);
11624             }
11625 #else
11626             /* diag_listed_as: Unsupported script encoding %s */
11627             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11628 #endif
11629         }
11630         break;
11631     case 0xFE:
11632         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11633 #ifndef PERL_NO_UTF16_FILTER
11634             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11635             s += 2;
11636             if (PL_bufend > (char *)s) {
11637                 s = add_utf16_textfilter(s, FALSE);
11638             }
11639 #else
11640             /* diag_listed_as: Unsupported script encoding %s */
11641             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11642 #endif
11643         }
11644         break;
11645     case BOM_UTF8_FIRST_BYTE: {
11646         const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
11647         if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
11648             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11649             s += len + 1;                      /* UTF-8 */
11650         }
11651         break;
11652     }
11653     case 0:
11654         if (slen > 3) {
11655              if (s[1] == 0) {
11656                   if (s[2] == 0xFE && s[3] == 0xFF) {
11657                        /* UTF-32 big-endian */
11658                        /* diag_listed_as: Unsupported script encoding %s */
11659                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11660                   }
11661              }
11662              else if (s[2] == 0 && s[3] != 0) {
11663                   /* Leading bytes
11664                    * 00 xx 00 xx
11665                    * are a good indicator of UTF-16BE. */
11666 #ifndef PERL_NO_UTF16_FILTER
11667                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11668                   s = add_utf16_textfilter(s, FALSE);
11669 #else
11670                   /* diag_listed_as: Unsupported script encoding %s */
11671                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11672 #endif
11673              }
11674         }
11675         break;
11676
11677     default:
11678          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11679                   /* Leading bytes
11680                    * xx 00 xx 00
11681                    * are a good indicator of UTF-16LE. */
11682 #ifndef PERL_NO_UTF16_FILTER
11683               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11684               s = add_utf16_textfilter(s, TRUE);
11685 #else
11686               /* diag_listed_as: Unsupported script encoding %s */
11687               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11688 #endif
11689          }
11690     }
11691     return (char*)s;
11692 }
11693
11694
11695 #ifndef PERL_NO_UTF16_FILTER
11696 static I32
11697 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11698 {
11699     SV *const filter = FILTER_DATA(idx);
11700     /* We re-use this each time round, throwing the contents away before we
11701        return.  */
11702     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11703     SV *const utf8_buffer = filter;
11704     IV status = IoPAGE(filter);
11705     const bool reverse = cBOOL(IoLINES(filter));
11706     I32 retval;
11707
11708     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11709
11710     /* As we're automatically added, at the lowest level, and hence only called
11711        from this file, we can be sure that we're not called in block mode. Hence
11712        don't bother writing code to deal with block mode.  */
11713     if (maxlen) {
11714         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11715     }
11716     if (status < 0) {
11717         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
11718     }
11719     DEBUG_P(PerlIO_printf(Perl_debug_log,
11720                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
11721                           FPTR2DPTR(void *, S_utf16_textfilter),
11722                           reverse ? 'l' : 'b', idx, maxlen, status,
11723                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11724
11725     while (1) {
11726         STRLEN chars;
11727         STRLEN have;
11728         I32 newlen;
11729         U8 *end;
11730         /* First, look in our buffer of existing UTF-8 data:  */
11731         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11732
11733         if (nl) {
11734             ++nl;
11735         } else if (status == 0) {
11736             /* EOF */
11737             IoPAGE(filter) = 0;
11738             nl = SvEND(utf8_buffer);
11739         }
11740         if (nl) {
11741             STRLEN got = nl - SvPVX(utf8_buffer);
11742             /* Did we have anything to append?  */
11743             retval = got != 0;
11744             sv_catpvn(sv, SvPVX(utf8_buffer), got);
11745             /* Everything else in this code works just fine if SVp_POK isn't
11746                set.  This, however, needs it, and we need it to work, else
11747                we loop infinitely because the buffer is never consumed.  */
11748             sv_chop(utf8_buffer, nl);
11749             break;
11750         }
11751
11752         /* OK, not a complete line there, so need to read some more UTF-16.
11753            Read an extra octect if the buffer currently has an odd number. */
11754         while (1) {
11755             if (status <= 0)
11756                 break;
11757             if (SvCUR(utf16_buffer) >= 2) {
11758                 /* Location of the high octet of the last complete code point.
11759                    Gosh, UTF-16 is a pain. All the benefits of variable length,
11760                    *coupled* with all the benefits of partial reads and
11761                    endianness.  */
11762                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11763                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11764
11765                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11766                     break;
11767                 }
11768
11769                 /* We have the first half of a surrogate. Read more.  */
11770                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11771             }
11772
11773             status = FILTER_READ(idx + 1, utf16_buffer,
11774                                  160 + (SvCUR(utf16_buffer) & 1));
11775             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
11776             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11777             if (status < 0) {
11778                 /* Error */
11779                 IoPAGE(filter) = status;
11780                 return status;
11781             }
11782         }
11783
11784         chars = SvCUR(utf16_buffer) >> 1;
11785         have = SvCUR(utf8_buffer);
11786         SvGROW(utf8_buffer, have + chars * 3 + 1);
11787
11788         if (reverse) {
11789             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11790                                          (U8*)SvPVX_const(utf8_buffer) + have,
11791                                          chars * 2, &newlen);
11792         } else {
11793             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11794                                 (U8*)SvPVX_const(utf8_buffer) + have,
11795                                 chars * 2, &newlen);
11796         }
11797         SvCUR_set(utf8_buffer, have + newlen);
11798         *end = '\0';
11799
11800         /* No need to keep this SV "well-formed" with a '\0' after the end, as
11801            it's private to us, and utf16_to_utf8{,reversed} take a
11802            (pointer,length) pair, rather than a NUL-terminated string.  */
11803         if(SvCUR(utf16_buffer) & 1) {
11804             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11805             SvCUR_set(utf16_buffer, 1);
11806         } else {
11807             SvCUR_set(utf16_buffer, 0);
11808         }
11809     }
11810     DEBUG_P(PerlIO_printf(Perl_debug_log,
11811                           "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
11812                           status,
11813                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11814     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11815     return retval;
11816 }
11817
11818 static U8 *
11819 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11820 {
11821     SV *filter = filter_add(S_utf16_textfilter, NULL);
11822
11823     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11824
11825     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11826     SvPVCLEAR(filter);
11827     IoLINES(filter) = reversed;
11828     IoPAGE(filter) = 1; /* Not EOF */
11829
11830     /* Sadly, we have to return a valid pointer, come what may, so we have to
11831        ignore any error return from this.  */
11832     SvCUR_set(PL_linestr, 0);
11833     if (FILTER_READ(0, PL_linestr, 0)) {
11834         SvUTF8_on(PL_linestr);
11835     } else {
11836         SvUTF8_on(PL_linestr);
11837     }
11838     PL_bufend = SvEND(PL_linestr);
11839     return (U8*)SvPVX(PL_linestr);
11840 }
11841 #endif
11842
11843 /*
11844 Returns a pointer to the next character after the parsed
11845 vstring, as well as updating the passed in sv.
11846
11847 Function must be called like
11848
11849         sv = sv_2mortal(newSV(5));
11850         s = scan_vstring(s,e,sv);
11851
11852 where s and e are the start and end of the string.
11853 The sv should already be large enough to store the vstring
11854 passed in, for performance reasons.
11855
11856 This function may croak if fatal warnings are enabled in the
11857 calling scope, hence the sv_2mortal in the example (to prevent
11858 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
11859 sv_2mortal.
11860
11861 */
11862
11863 char *
11864 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11865 {
11866     const char *pos = s;
11867     const char *start = s;
11868
11869     PERL_ARGS_ASSERT_SCAN_VSTRING;
11870
11871     if (*pos == 'v') pos++;  /* get past 'v' */
11872     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11873         pos++;
11874     if ( *pos != '.') {
11875         /* this may not be a v-string if followed by => */
11876         const char *next = pos;
11877         while (next < e && isSPACE(*next))
11878             ++next;
11879         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11880             /* return string not v-string */
11881             sv_setpvn(sv,(char *)s,pos-s);
11882             return (char *)pos;
11883         }
11884     }
11885
11886     if (!isALPHA(*pos)) {
11887         U8 tmpbuf[UTF8_MAXBYTES+1];
11888
11889         if (*s == 'v')
11890             s++;  /* get past 'v' */
11891
11892         SvPVCLEAR(sv);
11893
11894         for (;;) {
11895             /* this is atoi() that tolerates underscores */
11896             U8 *tmpend;
11897             UV rev = 0;
11898             const char *end = pos;
11899             UV mult = 1;
11900             while (--end >= s) {
11901                 if (*end != '_') {
11902                     const UV orev = rev;
11903                     rev += (*end - '0') * mult;
11904                     mult *= 10;
11905                     if (orev > rev)
11906                         /* diag_listed_as: Integer overflow in %s number */
11907                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11908                                          "Integer overflow in decimal number");
11909                 }
11910             }
11911
11912             /* Append native character for the rev point */
11913             tmpend = uvchr_to_utf8(tmpbuf, rev);
11914             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11915             if (!UVCHR_IS_INVARIANT(rev))
11916                  SvUTF8_on(sv);
11917             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11918                  s = ++pos;
11919             else {
11920                  s = pos;
11921                  break;
11922             }
11923             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11924                  pos++;
11925         }
11926         SvPOK_on(sv);
11927         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11928         SvRMAGICAL_on(sv);
11929     }
11930     return (char *)s;
11931 }
11932
11933 int
11934 Perl_keyword_plugin_standard(pTHX_
11935         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11936 {
11937     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11938     PERL_UNUSED_CONTEXT;
11939     PERL_UNUSED_ARG(keyword_ptr);
11940     PERL_UNUSED_ARG(keyword_len);
11941     PERL_UNUSED_ARG(op_ptr);
11942     return KEYWORD_PLUGIN_DECLINE;
11943 }
11944
11945 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11946 static void
11947 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11948 {
11949     SAVEI32(PL_lex_brackets);
11950     if (PL_lex_brackets > 100)
11951         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11952     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11953     SAVEI32(PL_lex_allbrackets);
11954     PL_lex_allbrackets = 0;
11955     SAVEI8(PL_lex_fakeeof);
11956     PL_lex_fakeeof = (U8)fakeeof;
11957     if(yyparse(gramtype) && !PL_parser->error_count)
11958         qerror(Perl_mess(aTHX_ "Parse error"));
11959 }
11960
11961 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11962 static OP *
11963 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11964 {
11965     OP *o;
11966     ENTER;
11967     SAVEVPTR(PL_eval_root);
11968     PL_eval_root = NULL;
11969     parse_recdescent(gramtype, fakeeof);
11970     o = PL_eval_root;
11971     LEAVE;
11972     return o;
11973 }
11974
11975 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11976 static OP *
11977 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11978 {
11979     OP *exprop;
11980     if (flags & ~PARSE_OPTIONAL)
11981         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11982     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11983     if (!exprop && !(flags & PARSE_OPTIONAL)) {
11984         if (!PL_parser->error_count)
11985             qerror(Perl_mess(aTHX_ "Parse error"));
11986         exprop = newOP(OP_NULL, 0);
11987     }
11988     return exprop;
11989 }
11990
11991 /*
11992 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11993
11994 Parse a Perl arithmetic expression.  This may contain operators of precedence
11995 down to the bit shift operators.  The expression must be followed (and thus
11996 terminated) either by a comparison or lower-precedence operator or by
11997 something that would normally terminate an expression such as semicolon.
11998 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11999 otherwise it is mandatory.  It is up to the caller to ensure that the
12000 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12001 the source of the code to be parsed and the lexical context for the
12002 expression.
12003
12004 The op tree representing the expression is returned.  If an optional
12005 expression is absent, a null pointer is returned, otherwise the pointer
12006 will be non-null.
12007
12008 If an error occurs in parsing or compilation, in most cases a valid op
12009 tree is returned anyway.  The error is reflected in the parser state,
12010 normally resulting in a single exception at the top level of parsing
12011 which covers all the compilation errors that occurred.  Some compilation
12012 errors, however, will throw an exception immediately.
12013
12014 =cut
12015 */
12016
12017 OP *
12018 Perl_parse_arithexpr(pTHX_ U32 flags)
12019 {
12020     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12021 }
12022
12023 /*
12024 =for apidoc Amx|OP *|parse_termexpr|U32 flags
12025
12026 Parse a Perl term expression.  This may contain operators of precedence
12027 down to the assignment operators.  The expression must be followed (and thus
12028 terminated) either by a comma or lower-precedence operator or by
12029 something that would normally terminate an expression such as semicolon.
12030 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12031 otherwise it is mandatory.  It is up to the caller to ensure that the
12032 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12033 the source of the code to be parsed and the lexical context for the
12034 expression.
12035
12036 The op tree representing the expression is returned.  If an optional
12037 expression is absent, a null pointer is returned, otherwise the pointer
12038 will be non-null.
12039
12040 If an error occurs in parsing or compilation, in most cases a valid op
12041 tree is returned anyway.  The error is reflected in the parser state,
12042 normally resulting in a single exception at the top level of parsing
12043 which covers all the compilation errors that occurred.  Some compilation
12044 errors, however, will throw an exception immediately.
12045
12046 =cut
12047 */
12048
12049 OP *
12050 Perl_parse_termexpr(pTHX_ U32 flags)
12051 {
12052     return parse_expr(LEX_FAKEEOF_COMMA, flags);
12053 }
12054
12055 /*
12056 =for apidoc Amx|OP *|parse_listexpr|U32 flags
12057
12058 Parse a Perl list expression.  This may contain operators of precedence
12059 down to the comma operator.  The expression must be followed (and thus
12060 terminated) either by a low-precedence logic operator such as C<or> or by
12061 something that would normally terminate an expression such as semicolon.
12062 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12063 otherwise it is mandatory.  It is up to the caller to ensure that the
12064 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12065 the source of the code to be parsed and the lexical context for the
12066 expression.
12067
12068 The op tree representing the expression is returned.  If an optional
12069 expression is absent, a null pointer is returned, otherwise the pointer
12070 will be non-null.
12071
12072 If an error occurs in parsing or compilation, in most cases a valid op
12073 tree is returned anyway.  The error is reflected in the parser state,
12074 normally resulting in a single exception at the top level of parsing
12075 which covers all the compilation errors that occurred.  Some compilation
12076 errors, however, will throw an exception immediately.
12077
12078 =cut
12079 */
12080
12081 OP *
12082 Perl_parse_listexpr(pTHX_ U32 flags)
12083 {
12084     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12085 }
12086
12087 /*
12088 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
12089
12090 Parse a single complete Perl expression.  This allows the full
12091 expression grammar, including the lowest-precedence operators such
12092 as C<or>.  The expression must be followed (and thus terminated) by a
12093 token that an expression would normally be terminated by: end-of-file,
12094 closing bracketing punctuation, semicolon, or one of the keywords that
12095 signals a postfix expression-statement modifier.  If C<flags> has the
12096 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
12097 mandatory.  It is up to the caller to ensure that the dynamic parser
12098 state (L</PL_parser> et al) is correctly set to reflect the source of
12099 the code to be parsed and the lexical context for the expression.
12100
12101 The op tree representing the expression is returned.  If an optional
12102 expression is absent, a null pointer is returned, otherwise the pointer
12103 will be non-null.
12104
12105 If an error occurs in parsing or compilation, in most cases a valid op
12106 tree is returned anyway.  The error is reflected in the parser state,
12107 normally resulting in a single exception at the top level of parsing
12108 which covers all the compilation errors that occurred.  Some compilation
12109 errors, however, will throw an exception immediately.
12110
12111 =cut
12112 */
12113
12114 OP *
12115 Perl_parse_fullexpr(pTHX_ U32 flags)
12116 {
12117     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12118 }
12119
12120 /*
12121 =for apidoc Amx|OP *|parse_block|U32 flags
12122
12123 Parse a single complete Perl code block.  This consists of an opening
12124 brace, a sequence of statements, and a closing brace.  The block
12125 constitutes a lexical scope, so C<my> variables and various compile-time
12126 effects can be contained within it.  It is up to the caller to ensure
12127 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12128 reflect the source of the code to be parsed and the lexical context for
12129 the statement.
12130
12131 The op tree representing the code block is returned.  This is always a
12132 real op, never a null pointer.  It will normally be a C<lineseq> list,
12133 including C<nextstate> or equivalent ops.  No ops to construct any kind
12134 of runtime scope are included by virtue of it being a block.
12135
12136 If an error occurs in parsing or compilation, in most cases a valid op
12137 tree (most likely null) is returned anyway.  The error is reflected in
12138 the parser state, normally resulting in a single exception at the top
12139 level of parsing which covers all the compilation errors that occurred.
12140 Some compilation errors, however, will throw an exception immediately.
12141
12142 The C<flags> parameter is reserved for future use, and must always
12143 be zero.
12144
12145 =cut
12146 */
12147
12148 OP *
12149 Perl_parse_block(pTHX_ U32 flags)
12150 {
12151     if (flags)
12152         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12153     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12154 }
12155
12156 /*
12157 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12158
12159 Parse a single unadorned Perl statement.  This may be a normal imperative
12160 statement or a declaration that has compile-time effect.  It does not
12161 include any label or other affixture.  It is up to the caller to ensure
12162 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12163 reflect the source of the code to be parsed and the lexical context for
12164 the statement.
12165
12166 The op tree representing the statement is returned.  This may be a
12167 null pointer if the statement is null, for example if it was actually
12168 a subroutine definition (which has compile-time side effects).  If not
12169 null, it will be ops directly implementing the statement, suitable to
12170 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
12171 equivalent op (except for those embedded in a scope contained entirely
12172 within the statement).
12173
12174 If an error occurs in parsing or compilation, in most cases a valid op
12175 tree (most likely null) is returned anyway.  The error is reflected in
12176 the parser state, normally resulting in a single exception at the top
12177 level of parsing which covers all the compilation errors that occurred.
12178 Some compilation errors, however, will throw an exception immediately.
12179
12180 The C<flags> parameter is reserved for future use, and must always
12181 be zero.
12182
12183 =cut
12184 */
12185
12186 OP *
12187 Perl_parse_barestmt(pTHX_ U32 flags)
12188 {
12189     if (flags)
12190         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12191     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12192 }
12193
12194 /*
12195 =for apidoc Amx|SV *|parse_label|U32 flags
12196
12197 Parse a single label, possibly optional, of the type that may prefix a
12198 Perl statement.  It is up to the caller to ensure that the dynamic parser
12199 state (L</PL_parser> et al) is correctly set to reflect the source of
12200 the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
12201 label is optional, otherwise it is mandatory.
12202
12203 The name of the label is returned in the form of a fresh scalar.  If an
12204 optional label is absent, a null pointer is returned.
12205
12206 If an error occurs in parsing, which can only occur if the label is
12207 mandatory, a valid label is returned anyway.  The error is reflected in
12208 the parser state, normally resulting in a single exception at the top
12209 level of parsing which covers all the compilation errors that occurred.
12210
12211 =cut
12212 */
12213
12214 SV *
12215 Perl_parse_label(pTHX_ U32 flags)
12216 {
12217     if (flags & ~PARSE_OPTIONAL)
12218         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12219     if (PL_nexttoke) {
12220         PL_parser->yychar = yylex();
12221         if (PL_parser->yychar == LABEL) {
12222             char * const lpv = pl_yylval.pval;
12223             STRLEN llen = strlen(lpv);
12224             PL_parser->yychar = YYEMPTY;
12225             return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12226         } else {
12227             yyunlex();
12228             goto no_label;
12229         }
12230     } else {
12231         char *s, *t;
12232         STRLEN wlen, bufptr_pos;
12233         lex_read_space(0);
12234         t = s = PL_bufptr;
12235         if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
12236             goto no_label;
12237         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12238         if (word_takes_any_delimiter(s, wlen))
12239             goto no_label;
12240         bufptr_pos = s - SvPVX(PL_linestr);
12241         PL_bufptr = t;
12242         lex_read_space(LEX_KEEP_PREVIOUS);
12243         t = PL_bufptr;
12244         s = SvPVX(PL_linestr) + bufptr_pos;
12245         if (t[0] == ':' && t[1] != ':') {
12246             PL_oldoldbufptr = PL_oldbufptr;
12247             PL_oldbufptr = s;
12248             PL_bufptr = t+1;
12249             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12250         } else {
12251             PL_bufptr = s;
12252             no_label:
12253             if (flags & PARSE_OPTIONAL) {
12254                 return NULL;
12255             } else {
12256                 qerror(Perl_mess(aTHX_ "Parse error"));
12257                 return newSVpvs("x");
12258             }
12259         }
12260     }
12261 }
12262
12263 /*
12264 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12265
12266 Parse a single complete Perl statement.  This may be a normal imperative
12267 statement or a declaration that has compile-time effect, and may include
12268 optional labels.  It is up to the caller to ensure that the dynamic
12269 parser state (L</PL_parser> et al) is correctly set to reflect the source
12270 of the code to be parsed and the lexical context for the statement.
12271
12272 The op tree representing the statement is returned.  This may be a
12273 null pointer if the statement is null, for example if it was actually
12274 a subroutine definition (which has compile-time side effects).  If not
12275 null, it will be the result of a L</newSTATEOP> call, normally including
12276 a C<nextstate> or equivalent op.
12277
12278 If an error occurs in parsing or compilation, in most cases a valid op
12279 tree (most likely null) is returned anyway.  The error is reflected in
12280 the parser state, normally resulting in a single exception at the top
12281 level of parsing which covers all the compilation errors that occurred.
12282 Some compilation errors, however, will throw an exception immediately.
12283
12284 The C<flags> parameter is reserved for future use, and must always
12285 be zero.
12286
12287 =cut
12288 */
12289
12290 OP *
12291 Perl_parse_fullstmt(pTHX_ U32 flags)
12292 {
12293     if (flags)
12294         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12295     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12296 }
12297
12298 /*
12299 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12300
12301 Parse a sequence of zero or more Perl statements.  These may be normal
12302 imperative statements, including optional labels, or declarations
12303 that have compile-time effect, or any mixture thereof.  The statement
12304 sequence ends when a closing brace or end-of-file is encountered in a
12305 place where a new statement could have validly started.  It is up to
12306 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12307 is correctly set to reflect the source of the code to be parsed and the
12308 lexical context for the statements.
12309
12310 The op tree representing the statement sequence is returned.  This may
12311 be a null pointer if the statements were all null, for example if there
12312 were no statements or if there were only subroutine definitions (which
12313 have compile-time side effects).  If not null, it will be a C<lineseq>
12314 list, normally including C<nextstate> or equivalent ops.
12315
12316 If an error occurs in parsing or compilation, in most cases a valid op
12317 tree is returned anyway.  The error is reflected in the parser state,
12318 normally resulting in a single exception at the top level of parsing
12319 which covers all the compilation errors that occurred.  Some compilation
12320 errors, however, will throw an exception immediately.
12321
12322 The C<flags> parameter is reserved for future use, and must always
12323 be zero.
12324
12325 =cut
12326 */
12327
12328 OP *
12329 Perl_parse_stmtseq(pTHX_ U32 flags)
12330 {
12331     OP *stmtseqop;
12332     I32 c;
12333     if (flags)
12334         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12335     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12336     c = lex_peek_unichar(0);
12337     if (c != -1 && c != /*{*/'}')
12338         qerror(Perl_mess(aTHX_ "Parse error"));
12339     return stmtseqop;
12340 }
12341
12342 /*
12343  * ex: set ts=8 sts=4 sw=4 et:
12344  */