toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26 This is the lower layer of the Perl parser, managing characters and tokens.
  27
  28 =for apidoc AmU|yy_parser *|PL_parser
  29
  30 Pointer to a structure encapsulating the state of the parsing operation
  31 currently in progress.  The pointer can be locally changed to perform
  32 a nested parse without interfering with the state of an outer parse.
  33 Individual members of C<PL_parser> have their own documentation.
  34
  35 =cut
  36 */
  37
  38 #include "EXTERN.h"
  39 #define PERL_IN_TOKE_C
  40 #include "perl.h"
  41 #include "dquote_inline.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g)     \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* XXX temporary backwards compatibility */
  49 #define PL_lex_brackets         (PL_parser->lex_brackets)
  50 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  51 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  52 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  53 #define PL_lex_casemods         (PL_parser->lex_casemods)
  54 #define PL_lex_casestack        (PL_parser->lex_casestack)
  55 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  56 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  57 #define PL_lex_inpat            (PL_parser->lex_inpat)
  58 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  59 #define PL_lex_op               (PL_parser->lex_op)
  60 #define PL_lex_repl             (PL_parser->lex_repl)
  61 #define PL_lex_starts           (PL_parser->lex_starts)
  62 #define PL_lex_stuff            (PL_parser->lex_stuff)
  63 #define PL_multi_start          (PL_parser->multi_start)
  64 #define PL_multi_open           (PL_parser->multi_open)
  65 #define PL_multi_close          (PL_parser->multi_close)
  66 #define PL_preambled            (PL_parser->preambled)
  67 #define PL_linestr              (PL_parser->linestr)
  68 #define PL_expect               (PL_parser->expect)
  69 #define PL_copline              (PL_parser->copline)
  70 #define PL_bufptr               (PL_parser->bufptr)
  71 #define PL_oldbufptr            (PL_parser->oldbufptr)
  72 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  73 #define PL_linestart            (PL_parser->linestart)
  74 #define PL_bufend               (PL_parser->bufend)
  75 #define PL_last_uni             (PL_parser->last_uni)
  76 #define PL_last_lop             (PL_parser->last_lop)
  77 #define PL_last_lop_op          (PL_parser->last_lop_op)
  78 #define PL_lex_state            (PL_parser->lex_state)
  79 #define PL_rsfp                 (PL_parser->rsfp)
  80 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  81 #define PL_in_my                (PL_parser->in_my)
  82 #define PL_in_my_stash          (PL_parser->in_my_stash)
  83 #define PL_tokenbuf             (PL_parser->tokenbuf)
  84 #define PL_multi_end            (PL_parser->multi_end)
  85 #define PL_error_count          (PL_parser->error_count)
  86
  87 #  define PL_nexttoke           (PL_parser->nexttoke)
  88 #  define PL_nexttype           (PL_parser->nexttype)
  89 #  define PL_nextval            (PL_parser->nextval)
  90
  91
  92 #define SvEVALED(sv) \
  93     (SvTYPE(sv) >= SVt_PVNV \
  94     && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
  95
  96 static const char* const ident_too_long = "Identifier too long";
  97
  98 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
  99
 100 #define XENUMMASK  0x3f
 101 #define XFAKEEOF   0x40
 102 #define XFAKEBRACK 0x80
 103
 104 #ifdef USE_UTF8_SCRIPTS
 105 #   define UTF cBOOL(!IN_BYTES)
 106 #else
 107 #   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 108 #endif
 109
 110 /* The maximum number of characters preceding the unrecognized one to display */
 111 #define UNRECOGNIZED_PRECEDE_COUNT 10
 112
 113 /* In variables named $^X, these are the legal values for X.
 114  * 1999-02-27 mjd-perl-patch@plover.com */
 115 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 116
 117 #define SPACE_OR_TAB(c) isBLANK_A(c)
 118
 119 #define HEXFP_PEEK(s)     \
 120     (((s[0] == '.') && \
 121       (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
 122      isALPHA_FOLD_EQ(s[0], 'p'))
 123
 124 /* LEX_* are values for PL_lex_state, the state of the lexer.
 125  * They are arranged oddly so that the guard on the switch statement
 126  * can get by with a single comparison (if the compiler is smart enough).
 127  *
 128  * These values refer to the various states within a sublex parse,
 129  * i.e. within a double quotish string
 130  */
 131
 132 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 133
 134 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 135 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 136 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 137 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 138 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 139
 140                                    /* at end of code, eg "$x" followed by:  */
 141 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 142 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 143
 144 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 145                                         string or after \E, $foo, etc       */
 146 #define LEX_INTERPCONST          2 /* NOT USED */
 147 #define LEX_FORMLINE             1 /* expecting a format line               */
 148
 149
 150 #ifdef DEBUGGING
 151 static const char* const lex_state_names[] = {
 152     "KNOWNEXT",
 153     "FORMLINE",
 154     "INTERPCONST",
 155     "INTERPCONCAT",
 156     "INTERPENDMAYBE",
 157     "INTERPEND",
 158     "INTERPSTART",
 159     "INTERPPUSH",
 160     "INTERPCASEMOD",
 161     "INTERPNORMAL",
 162     "NORMAL"
 163 };
 164 #endif
 165
 166 #include "keywords.h"
 167
 168 /* CLINE is a macro that ensures PL_copline has a sane value */
 169
 170 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 171
 172 /*
 173  * Convenience functions to return different tokens and prime the
 174  * lexer for the next token.  They all take an argument.
 175  *
 176  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 177  * OPERATOR     : generic operator
 178  * AOPERATOR    : assignment operator
 179  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 180  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 181  * PREREF       : *EXPR where EXPR is not a simple identifier
 182  * TERM         : expression term
 183  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 184  * LOOPX        : loop exiting command (goto, last, dump, etc)
 185  * FTST         : file test operator
 186  * FUN0         : zero-argument function
 187  * FUN0OP       : zero-argument function, with its op created in this file
 188  * FUN1         : not used, except for not, which isn't a UNIOP
 189  * BOop         : bitwise or or xor
 190  * BAop         : bitwise and
 191  * BCop         : bitwise complement
 192  * SHop         : shift operator
 193  * PWop         : power operator
 194  * PMop         : pattern-matching operator
 195  * Aop          : addition-level operator
 196  * AopNOASSIGN  : addition-level operator that is never part of .=
 197  * Mop          : multiplication-level operator
 198  * Eop          : equality-testing operator
 199  * Rop          : relational operator <= != gt
 200  *
 201  * Also see LOP and lop() below.
 202  */
 203
 204 #ifdef DEBUGGING /* Serve -DT. */
 205 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 206 #else
 207 #   define REPORT(retval) (retval)
 208 #endif
 209
 210 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 211 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 212 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
 213 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 214 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 215 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 216 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 217 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 218 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
 219                          pl_yylval.ival=f, \
 220                          PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
 221                          REPORT((int)LOOPEX))
 222 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 223 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 224 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 225 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 226 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
 227 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
 228 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
 229                        REPORT('~')
 230 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
 231 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
 232 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 233 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
 234 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
 235 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
 236 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 237 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 238
 239 /* This bit of chicanery makes a unary function followed by
 240  * a parenthesis into a function with one argument, highest precedence.
 241  * The UNIDOR macro is for unary functions that can be followed by the //
 242  * operator (such as C<shift // 0>).
 243  */
 244 #define UNI3(f,x,have_x) { \
 245         pl_yylval.ival = f; \
 246         if (have_x) PL_expect = x; \
 247         PL_bufptr = s; \
 248         PL_last_uni = PL_oldbufptr; \
 249         PL_last_lop_op = (f) < 0 ? -(f) : (f); \
 250         if (*s == '(') \
 251             return REPORT( (int)FUNC1 ); \
 252         s = skipspace(s); \
 253         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 254         }
 255 #define UNI(f)    UNI3(f,XTERM,1)
 256 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 257 #define UNIPROTO(f,optional) { \
 258         if (optional) PL_last_uni = PL_oldbufptr; \
 259         OPERATOR(f); \
 260         }
 261
 262 #define UNIBRACK(f) UNI3(f,0,0)
 263
 264 /* grandfather return to old style */
 265 #define OLDLOP(f) \
 266         do { \
 267             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 268                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 269             pl_yylval.ival = (f); \
 270             PL_expect = XTERM; \
 271             PL_bufptr = s; \
 272             return (int)LSTOP; \
 273         } while(0)
 274
 275 #define COPLINE_INC_WITH_HERELINES                  \
 276     STMT_START {                                     \
 277         CopLINE_inc(PL_curcop);                       \
 278         if (PL_parser->herelines)                      \
 279             CopLINE(PL_curcop) += PL_parser->herelines, \
 280             PL_parser->herelines = 0;                    \
 281     } STMT_END
 282 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 283  * is no sublex_push to follow. */
 284 #define COPLINE_SET_FROM_MULTI_END            \
 285     STMT_START {                               \
 286         CopLINE_set(PL_curcop, PL_multi_end);   \
 287         if (PL_multi_end != PL_multi_start)      \
 288             PL_parser->herelines = 0;             \
 289     } STMT_END
 290
 291
 292 #ifdef DEBUGGING
 293
 294 /* how to interpret the pl_yylval associated with the token */
 295 enum token_type {
 296     TOKENTYPE_NONE,
 297     TOKENTYPE_IVAL,
 298     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 299     TOKENTYPE_PVAL,
 300     TOKENTYPE_OPVAL
 301 };
 302
 303 static struct debug_tokens {
 304     const int token;
 305     enum token_type type;
 306     const char *name;
 307 } const debug_tokens[] =
 308 {
 309     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 310     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 311     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 312     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 313     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 314     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 315     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 316     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 317     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 318     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 319     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 320     { DO,               TOKENTYPE_NONE,         "DO" },
 321     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 322     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 323     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 324     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 325     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 326     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 327     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 328     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 329     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 330     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 331     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 332     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 333     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 334     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 335     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 336     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 337     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 338     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 339     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 340     { IF,               TOKENTYPE_IVAL,         "IF" },
 341     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 342     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 343     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 344     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 345     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 346     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 347     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 348     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 349     { MY,               TOKENTYPE_IVAL,         "MY" },
 350     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 351     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 352     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 353     { OROR,             TOKENTYPE_NONE,         "OROR" },
 354     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 355     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 356     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 357     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 358     { POSTJOIN,         TOKENTYPE_NONE,         "POSTJOIN" },
 359     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 360     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 361     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 362     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 363     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 364     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 365     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 366     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 367     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 368     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 369     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 370     { SUB,              TOKENTYPE_NONE,         "SUB" },
 371     { THING,            TOKENTYPE_OPVAL,        "THING" },
 372     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 373     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 374     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 375     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 376     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 377     { USE,              TOKENTYPE_IVAL,         "USE" },
 378     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 379     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 380     { BAREWORD,         TOKENTYPE_OPVAL,        "BAREWORD" },
 381     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 382     { 0,                TOKENTYPE_NONE,         NULL }
 383 };
 384
 385 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 386
 387 STATIC int
 388 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 389 {
 390     PERL_ARGS_ASSERT_TOKEREPORT;
 391
 392     if (DEBUG_T_TEST) {
 393         const char *name = NULL;
 394         enum token_type type = TOKENTYPE_NONE;
 395         const struct debug_tokens *p;
 396         SV* const report = newSVpvs("<== ");
 397
 398         for (p = debug_tokens; p->token; p++) {
 399             if (p->token == (int)rv) {
 400                 name = p->name;
 401                 type = p->type;
 402                 break;
 403             }
 404         }
 405         if (name)
 406             Perl_sv_catpv(aTHX_ report, name);
 407         else if (isGRAPH(rv))
 408         {
 409             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 410             if ((char)rv == 'p')
 411                 sv_catpvs(report, " (pending identifier)");
 412         }
 413         else if (!rv)
 414             sv_catpvs(report, "EOF");
 415         else
 416             Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
 417         switch (type) {
 418         case TOKENTYPE_NONE:
 419             break;
 420         case TOKENTYPE_IVAL:
 421             Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
 422             break;
 423         case TOKENTYPE_OPNUM:
 424             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 425                                     PL_op_name[lvalp->ival]);
 426             break;
 427         case TOKENTYPE_PVAL:
 428             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 429             break;
 430         case TOKENTYPE_OPVAL:
 431             if (lvalp->opval) {
 432                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 433                                     PL_op_name[lvalp->opval->op_type]);
 434                 if (lvalp->opval->op_type == OP_CONST) {
 435                     Perl_sv_catpvf(aTHX_ report, " %s",
 436                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 437                 }
 438
 439             }
 440             else
 441                 sv_catpvs(report, "(opval=null)");
 442             break;
 443         }
 444         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 445     };
 446     return (int)rv;
 447 }
 448
 449
 450 /* print the buffer with suitable escapes */
 451
 452 STATIC void
 453 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 454 {
 455     SV* const tmp = newSVpvs("");
 456
 457     PERL_ARGS_ASSERT_PRINTBUF;
 458
 459     GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
 460     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 461     GCC_DIAG_RESTORE_STMT;
 462     SvREFCNT_dec(tmp);
 463 }
 464
 465 #endif
 466
 467 /*
 468  * S_ao
 469  *
 470  * This subroutine looks for an '=' next to the operator that has just been
 471  * parsed and turns it into an ASSIGNOP if it finds one.
 472  */
 473
 474 STATIC int
 475 S_ao(pTHX_ int toketype)
 476 {
 477     if (*PL_bufptr == '=') {
 478         PL_bufptr++;
 479         if (toketype == ANDAND)
 480             pl_yylval.ival = OP_ANDASSIGN;
 481         else if (toketype == OROR)
 482             pl_yylval.ival = OP_ORASSIGN;
 483         else if (toketype == DORDOR)
 484             pl_yylval.ival = OP_DORASSIGN;
 485         toketype = ASSIGNOP;
 486     }
 487     return REPORT(toketype);
 488 }
 489
 490 /*
 491  * S_no_op
 492  * When Perl expects an operator and finds something else, no_op
 493  * prints the warning.  It always prints "<something> found where
 494  * operator expected.  It prints "Missing semicolon on previous line?"
 495  * if the surprise occurs at the start of the line.  "do you need to
 496  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 497  * where the compiler doesn't know if foo is a method call or a function.
 498  * It prints "Missing operator before end of line" if there's nothing
 499  * after the missing operator, or "... before <...>" if there is something
 500  * after the missing operator.
 501  *
 502  * PL_bufptr is expected to point to the start of the thing that was found,
 503  * and s after the next token or partial token.
 504  */
 505
 506 STATIC void
 507 S_no_op(pTHX_ const char *const what, char *s)
 508 {
 509     char * const oldbp = PL_bufptr;
 510     const bool is_first = (PL_oldbufptr == PL_linestart);
 511
 512     PERL_ARGS_ASSERT_NO_OP;
 513
 514     if (!s)
 515         s = oldbp;
 516     else
 517         PL_bufptr = s;
 518     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 519     if (ckWARN_d(WARN_SYNTAX)) {
 520         if (is_first)
 521             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 522                     "\t(Missing semicolon on previous line?)\n");
 523         else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
 524                                                            PL_bufend,
 525                                                            UTF))
 526         {
 527             const char *t;
 528             for (t = PL_oldoldbufptr;
 529                  (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
 530                  t += UTF ? UTF8SKIP(t) : 1)
 531             {
 532                 NOOP;
 533             }
 534             if (t < PL_bufptr && isSPACE(*t))
 535                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 536                         "\t(Do you need to predeclare %" UTF8f "?)\n",
 537                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 538         }
 539         else {
 540             assert(s >= oldbp);
 541             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 542                     "\t(Missing operator before %" UTF8f "?)\n",
 543                      UTF8fARG(UTF, s - oldbp, oldbp));
 544         }
 545     }
 546     PL_bufptr = oldbp;
 547 }
 548
 549 /*
 550  * S_missingterm
 551  * Complain about missing quote/regexp/heredoc terminator.
 552  * If it's called with NULL then it cauterizes the line buffer.
 553  * If we're in a delimited string and the delimiter is a control
 554  * character, it's reformatted into a two-char sequence like ^C.
 555  * This is fatal.
 556  */
 557
 558 STATIC void
 559 S_missingterm(pTHX_ char *s, STRLEN len)
 560 {
 561     char tmpbuf[UTF8_MAXBYTES + 1];
 562     char q;
 563     bool uni = FALSE;
 564     SV *sv;
 565     if (s) {
 566         char * const nl = (char *) my_memrchr(s, '\n', len);
 567         if (nl) {
 568             *nl = '\0';
 569             len = nl - s;
 570         }
 571         uni = UTF;
 572     }
 573     else if (PL_multi_close < 32) {
 574         *tmpbuf = '^';
 575         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 576         tmpbuf[2] = '\0';
 577         s = tmpbuf;
 578         len = 2;
 579     }
 580     else {
 581         if (LIKELY(PL_multi_close < 256)) {
 582             *tmpbuf = (char)PL_multi_close;
 583             tmpbuf[1] = '\0';
 584             len = 1;
 585         }
 586         else {
 587             char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
 588             *end = '\0';
 589             len = end - tmpbuf;
 590             uni = TRUE;
 591         }
 592         s = tmpbuf;
 593     }
 594     q = memchr(s, '"', len) ? '\'' : '"';
 595     sv = sv_2mortal(newSVpvn(s, len));
 596     if (uni)
 597         SvUTF8_on(sv);
 598     Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c"
 599                      " anywhere before EOF", q, SVfARG(sv), q);
 600 }
 601
 602 #include "feature.h"
 603
 604 /*
 605  * Check whether the named feature is enabled.
 606  */
 607 bool
 608 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 609 {
 610     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 611
 612     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 613
 614     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 615
 616     if (namelen > MAX_FEATURE_LEN)
 617         return FALSE;
 618     memcpy(&he_name[8], name, namelen);
 619
 620     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 621                                      REFCOUNTED_HE_EXISTS));
 622 }
 623
 624 /*
 625  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 626  * utf16-to-utf8-reversed.
 627  */
 628
 629 #ifdef PERL_CR_FILTER
 630 static void
 631 strip_return(SV *sv)
 632 {
 633     const char *s = SvPVX_const(sv);
 634     const char * const e = s + SvCUR(sv);
 635
 636     PERL_ARGS_ASSERT_STRIP_RETURN;
 637
 638     /* outer loop optimized to do nothing if there are no CR-LFs */
 639     while (s < e) {
 640         if (*s++ == '\r' && *s == '\n') {
 641             /* hit a CR-LF, need to copy the rest */
 642             char *d = s - 1;
 643             *d++ = *s++;
 644             while (s < e) {
 645                 if (*s == '\r' && s[1] == '\n')
 646                     s++;
 647                 *d++ = *s++;
 648             }
 649             SvCUR(sv) -= s - d;
 650             return;
 651         }
 652     }
 653 }
 654
 655 STATIC I32
 656 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 657 {
 658     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 659     if (count > 0 && !maxlen)
 660         strip_return(sv);
 661     return count;
 662 }
 663 #endif
 664
 665 /*
 666 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 667
 668 Creates and initialises a new lexer/parser state object, supplying
 669 a context in which to lex and parse from a new source of Perl code.
 670 A pointer to the new state object is placed in L</PL_parser>.  An entry
 671 is made on the save stack so that upon unwinding, the new state object
 672 will be destroyed and the former value of L</PL_parser> will be restored.
 673 Nothing else need be done to clean up the parsing context.
 674
 675 The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
 676 non-null, provides a string (in SV form) containing code to be parsed.
 677 A copy of the string is made, so subsequent modification of C<line>
 678 does not affect parsing.  C<rsfp>, if non-null, provides an input stream
 679 from which code will be read to be parsed.  If both are non-null, the
 680 code in C<line> comes first and must consist of complete lines of input,
 681 and C<rsfp> supplies the remainder of the source.
 682
 683 The C<flags> parameter is reserved for future use.  Currently it is only
 684 used by perl internally, so extensions should always pass zero.
 685
 686 =cut
 687 */
 688
 689 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 690    can share filters with the current parser.
 691    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 692    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 693    destruction. This is used to handle the case of defaulting to reading the
 694    script from the standard input because no filename was given on the command
 695    line (without getting confused by situation where STDIN has been closed, so
 696    the script handle is opened on fd 0)  */
 697
 698 void
 699 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 700 {
 701     const char *s = NULL;
 702     yy_parser *parser, *oparser;
 703
 704     if (flags && flags & ~LEX_START_FLAGS)
 705         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 706
 707     /* create and initialise a parser */
 708
 709     Newxz(parser, 1, yy_parser);
 710     parser->old_parser = oparser = PL_parser;
 711     PL_parser = parser;
 712
 713     parser->stack = NULL;
 714     parser->stack_max1 = NULL;
 715     parser->ps = NULL;
 716
 717     /* on scope exit, free this parser and restore any outer one */
 718     SAVEPARSER(parser);
 719     parser->saved_curcop = PL_curcop;
 720
 721     /* initialise lexer state */
 722
 723     parser->nexttoke = 0;
 724     parser->error_count = oparser ? oparser->error_count : 0;
 725     parser->copline = parser->preambling = NOLINE;
 726     parser->lex_state = LEX_NORMAL;
 727     parser->expect = XSTATE;
 728     parser->rsfp = rsfp;
 729     parser->recheck_utf8_validity = FALSE;
 730     parser->rsfp_filters =
 731       !(flags & LEX_START_SAME_FILTER) || !oparser
 732         ? NULL
 733         : MUTABLE_AV(SvREFCNT_inc(
 734             oparser->rsfp_filters
 735              ? oparser->rsfp_filters
 736              : (oparser->rsfp_filters = newAV())
 737           ));
 738
 739     Newx(parser->lex_brackstack, 120, char);
 740     Newx(parser->lex_casestack, 12, char);
 741     *parser->lex_casestack = '\0';
 742     Newxz(parser->lex_shared, 1, LEXSHARED);
 743
 744     if (line) {
 745         STRLEN len;
 746         const U8* first_bad_char_loc;
 747
 748         s = SvPV_const(line, len);
 749
 750         if (   SvUTF8(line)
 751             && UNLIKELY(! is_utf8_string_loc((U8 *) s,
 752                                              SvCUR(line),
 753                                              &first_bad_char_loc)))
 754         {
 755             _force_out_malformed_utf8_message(first_bad_char_loc,
 756                                               (U8 *) s + SvCUR(line),
 757                                               0,
 758                                               1 /* 1 means die */ );
 759             NOT_REACHED; /* NOTREACHED */
 760         }
 761
 762         parser->linestr = flags & LEX_START_COPIED
 763                             ? SvREFCNT_inc_simple_NN(line)
 764                             : newSVpvn_flags(s, len, SvUTF8(line));
 765         if (!rsfp)
 766             sv_catpvs(parser->linestr, "\n;");
 767     } else {
 768         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 769     }
 770
 771     parser->oldoldbufptr =
 772         parser->oldbufptr =
 773         parser->bufptr =
 774         parser->linestart = SvPVX(parser->linestr);
 775     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 776     parser->last_lop = parser->last_uni = NULL;
 777
 778     STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 779                                                         |LEX_DONT_CLOSE_RSFP));
 780     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 781                                                         |LEX_DONT_CLOSE_RSFP));
 782
 783     parser->in_pod = parser->filtered = 0;
 784 }
 785
 786
 787 /* delete a parser object */
 788
 789 void
 790 Perl_parser_free(pTHX_  const yy_parser *parser)
 791 {
 792     PERL_ARGS_ASSERT_PARSER_FREE;
 793
 794     PL_curcop = parser->saved_curcop;
 795     SvREFCNT_dec(parser->linestr);
 796
 797     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 798         PerlIO_clearerr(parser->rsfp);
 799     else if (parser->rsfp && (!parser->old_parser
 800           || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 801         PerlIO_close(parser->rsfp);
 802     SvREFCNT_dec(parser->rsfp_filters);
 803     SvREFCNT_dec(parser->lex_stuff);
 804     SvREFCNT_dec(parser->lex_sub_repl);
 805
 806     Safefree(parser->lex_brackstack);
 807     Safefree(parser->lex_casestack);
 808     Safefree(parser->lex_shared);
 809     PL_parser = parser->old_parser;
 810     Safefree(parser);
 811 }
 812
 813 void
 814 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 815 {
 816     I32 nexttoke = parser->nexttoke;
 817     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 818     while (nexttoke--) {
 819         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 820          && parser->nextval[nexttoke].opval
 821          && parser->nextval[nexttoke].opval->op_slabbed
 822          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 823             op_free(parser->nextval[nexttoke].opval);
 824             parser->nextval[nexttoke].opval = NULL;
 825         }
 826     }
 827 }
 828
 829
 830 /*
 831 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 832
 833 Buffer scalar containing the chunk currently under consideration of the
 834 text currently being lexed.  This is always a plain string scalar (for
 835 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 836 normal scalar means; instead refer to the buffer directly by the pointer
 837 variables described below.
 838
 839 The lexer maintains various C<char*> pointers to things in the
 840 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 841 reallocated, all of these pointers must be updated.  Don't attempt to
 842 do this manually, but rather use L</lex_grow_linestr> if you need to
 843 reallocate the buffer.
 844
 845 The content of the text chunk in the buffer is commonly exactly one
 846 complete line of input, up to and including a newline terminator,
 847 but there are situations where it is otherwise.  The octets of the
 848 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 849 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 850 flag on this scalar, which may disagree with it.
 851
 852 For direct examination of the buffer, the variable
 853 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 854 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 855 of these pointers is usually preferable to examination of the scalar
 856 through normal scalar means.
 857
 858 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 859
 860 Direct pointer to the end of the chunk of text currently being lexed, the
 861 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 862 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
 863 always located at the end of the buffer, and does not count as part of
 864 the buffer's contents.
 865
 866 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 867
 868 Points to the current position of lexing inside the lexer buffer.
 869 Characters around this point may be freely examined, within
 870 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 871 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 872 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 873
 874 Lexing code (whether in the Perl core or not) moves this pointer past
 875 the characters that it consumes.  It is also expected to perform some
 876 bookkeeping whenever a newline character is consumed.  This movement
 877 can be more conveniently performed by the function L</lex_read_to>,
 878 which handles newlines appropriately.
 879
 880 Interpretation of the buffer's octets can be abstracted out by
 881 using the slightly higher-level functions L</lex_peek_unichar> and
 882 L</lex_read_unichar>.
 883
 884 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 885
 886 Points to the start of the current line inside the lexer buffer.
 887 This is useful for indicating at which column an error occurred, and
 888 not much else.  This must be updated by any lexing code that consumes
 889 a newline; the function L</lex_read_to> handles this detail.
 890
 891 =cut
 892 */
 893
 894 /*
 895 =for apidoc Amx|bool|lex_bufutf8
 896
 897 Indicates whether the octets in the lexer buffer
 898 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 899 of Unicode characters.  If not, they should be interpreted as Latin-1
 900 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 901
 902 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 903 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 904 encoding.
 905
 906 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 907 is significant, but not the whole story regarding the input character
 908 encoding.  Normally, when a file is being read, the scalar contains octets
 909 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 910 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 911 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 912 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 913 is in effect.  This logic may change in the future; use this function
 914 instead of implementing the logic yourself.
 915
 916 =cut
 917 */
 918
 919 bool
 920 Perl_lex_bufutf8(pTHX)
 921 {
 922     return UTF;
 923 }
 924
 925 /*
 926 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 927
 928 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 929 at least C<len> octets (including terminating C<NUL>).  Returns a
 930 pointer to the reallocated buffer.  This is necessary before making
 931 any direct modification of the buffer that would increase its length.
 932 L</lex_stuff_pvn> provides a more convenient way to insert text into
 933 the buffer.
 934
 935 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 936 this function updates all of the lexer's variables that point directly
 937 into the buffer.
 938
 939 =cut
 940 */
 941
 942 char *
 943 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 944 {
 945     SV *linestr;
 946     char *buf;
 947     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 948     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 949     bool current;
 950
 951     linestr = PL_parser->linestr;
 952     buf = SvPVX(linestr);
 953     if (len <= SvLEN(linestr))
 954         return buf;
 955
 956     /* Is the lex_shared linestr SV the same as the current linestr SV?
 957      * Only in this case does re_eval_start need adjusting, since it
 958      * points within lex_shared->ls_linestr's buffer */
 959     current = (   !PL_parser->lex_shared->ls_linestr
 960                || linestr == PL_parser->lex_shared->ls_linestr);
 961
 962     bufend_pos = PL_parser->bufend - buf;
 963     bufptr_pos = PL_parser->bufptr - buf;
 964     oldbufptr_pos = PL_parser->oldbufptr - buf;
 965     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 966     linestart_pos = PL_parser->linestart - buf;
 967     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 968     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 969     re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
 970                             PL_parser->lex_shared->re_eval_start - buf : 0;
 971
 972     buf = sv_grow(linestr, len);
 973
 974     PL_parser->bufend = buf + bufend_pos;
 975     PL_parser->bufptr = buf + bufptr_pos;
 976     PL_parser->oldbufptr = buf + oldbufptr_pos;
 977     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 978     PL_parser->linestart = buf + linestart_pos;
 979     if (PL_parser->last_uni)
 980         PL_parser->last_uni = buf + last_uni_pos;
 981     if (PL_parser->last_lop)
 982         PL_parser->last_lop = buf + last_lop_pos;
 983     if (current && PL_parser->lex_shared->re_eval_start)
 984         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 985     return buf;
 986 }
 987
 988 /*
 989 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 990
 991 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 992 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 993 reallocating the buffer if necessary.  This means that lexing code that
 994 runs later will see the characters as if they had appeared in the input.
 995 It is not recommended to do this as part of normal parsing, and most
 996 uses of this facility run the risk of the inserted characters being
 997 interpreted in an unintended manner.
 998
 999 The string to be inserted is represented by C<len> octets starting
1000 at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
1001 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1002 The characters are recoded for the lexer buffer, according to how the
1003 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
1004 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1005 function is more convenient.
1006
1007 =cut
1008 */
1009
1010 void
1011 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1012 {
1013     dVAR;
1014     char *bufptr;
1015     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1016     if (flags & ~(LEX_STUFF_UTF8))
1017         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1018     if (UTF) {
1019         if (flags & LEX_STUFF_UTF8) {
1020             goto plain_copy;
1021         } else {
1022             STRLEN highhalf = 0;    /* Count of variants */
1023             const char *p, *e = pv+len;
1024             for (p = pv; p != e; p++) {
1025                 if (! UTF8_IS_INVARIANT(*p)) {
1026                     highhalf++;
1027                 }
1028             }
1029             if (!highhalf)
1030                 goto plain_copy;
1031             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1032             bufptr = PL_parser->bufptr;
1033             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1034             SvCUR_set(PL_parser->linestr,
1035                 SvCUR(PL_parser->linestr) + len+highhalf);
1036             PL_parser->bufend += len+highhalf;
1037             for (p = pv; p != e; p++) {
1038                 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1039             }
1040         }
1041     } else {
1042         if (flags & LEX_STUFF_UTF8) {
1043             STRLEN highhalf = 0;
1044             const char *p, *e = pv+len;
1045             for (p = pv; p != e; p++) {
1046                 U8 c = (U8)*p;
1047                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1048                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1049                                 "non-Latin-1 character into Latin-1 input");
1050                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1051                     p++;
1052                     highhalf++;
1053                 } else assert(UTF8_IS_INVARIANT(c));
1054             }
1055             if (!highhalf)
1056                 goto plain_copy;
1057             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1058             bufptr = PL_parser->bufptr;
1059             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1060             SvCUR_set(PL_parser->linestr,
1061                 SvCUR(PL_parser->linestr) + len-highhalf);
1062             PL_parser->bufend += len-highhalf;
1063             p = pv;
1064             while (p < e) {
1065                 if (UTF8_IS_INVARIANT(*p)) {
1066                     *bufptr++ = *p;
1067                     p++;
1068                 }
1069                 else {
1070                     assert(p < e -1 );
1071                     *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1072                     p += 2;
1073                 }
1074             }
1075         } else {
1076           plain_copy:
1077             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1078             bufptr = PL_parser->bufptr;
1079             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1080             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1081             PL_parser->bufend += len;
1082             Copy(pv, bufptr, len, char);
1083         }
1084     }
1085 }
1086
1087 /*
1088 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1089
1090 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1091 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1092 reallocating the buffer if necessary.  This means that lexing code that
1093 runs later will see the characters as if they had appeared in the input.
1094 It is not recommended to do this as part of normal parsing, and most
1095 uses of this facility run the risk of the inserted characters being
1096 interpreted in an unintended manner.
1097
1098 The string to be inserted is represented by octets starting at C<pv>
1099 and continuing to the first nul.  These octets are interpreted as either
1100 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1101 in C<flags>.  The characters are recoded for the lexer buffer, according
1102 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1103 If it is not convenient to nul-terminate a string to be inserted, the
1104 L</lex_stuff_pvn> function is more appropriate.
1105
1106 =cut
1107 */
1108
1109 void
1110 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1111 {
1112     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1113     lex_stuff_pvn(pv, strlen(pv), flags);
1114 }
1115
1116 /*
1117 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1118
1119 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1120 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1121 reallocating the buffer if necessary.  This means that lexing code that
1122 runs later will see the characters as if they had appeared in the input.
1123 It is not recommended to do this as part of normal parsing, and most
1124 uses of this facility run the risk of the inserted characters being
1125 interpreted in an unintended manner.
1126
1127 The string to be inserted is the string value of C<sv>.  The characters
1128 are recoded for the lexer buffer, according to how the buffer is currently
1129 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1130 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1131 need to construct a scalar.
1132
1133 =cut
1134 */
1135
1136 void
1137 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1138 {
1139     char *pv;
1140     STRLEN len;
1141     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1142     if (flags)
1143         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1144     pv = SvPV(sv, len);
1145     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1146 }
1147
1148 /*
1149 =for apidoc Amx|void|lex_unstuff|char *ptr
1150
1151 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1152 C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
1153 This hides the discarded text from any lexing code that runs later,
1154 as if the text had never appeared.
1155
1156 This is not the normal way to consume lexed text.  For that, use
1157 L</lex_read_to>.
1158
1159 =cut
1160 */
1161
1162 void
1163 Perl_lex_unstuff(pTHX_ char *ptr)
1164 {
1165     char *buf, *bufend;
1166     STRLEN unstuff_len;
1167     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1168     buf = PL_parser->bufptr;
1169     if (ptr < buf)
1170         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1171     if (ptr == buf)
1172         return;
1173     bufend = PL_parser->bufend;
1174     if (ptr > bufend)
1175         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1176     unstuff_len = ptr - buf;
1177     Move(ptr, buf, bufend+1-ptr, char);
1178     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1179     PL_parser->bufend = bufend - unstuff_len;
1180 }
1181
1182 /*
1183 =for apidoc Amx|void|lex_read_to|char *ptr
1184
1185 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1186 to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1187 performing the correct bookkeeping whenever a newline character is passed.
1188 This is the normal way to consume lexed text.
1189
1190 Interpretation of the buffer's octets can be abstracted out by
1191 using the slightly higher-level functions L</lex_peek_unichar> and
1192 L</lex_read_unichar>.
1193
1194 =cut
1195 */
1196
1197 void
1198 Perl_lex_read_to(pTHX_ char *ptr)
1199 {
1200     char *s;
1201     PERL_ARGS_ASSERT_LEX_READ_TO;
1202     s = PL_parser->bufptr;
1203     if (ptr < s || ptr > PL_parser->bufend)
1204         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1205     for (; s != ptr; s++)
1206         if (*s == '\n') {
1207             COPLINE_INC_WITH_HERELINES;
1208             PL_parser->linestart = s+1;
1209         }
1210     PL_parser->bufptr = ptr;
1211 }
1212
1213 /*
1214 =for apidoc Amx|void|lex_discard_to|char *ptr
1215
1216 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1217 up to C<ptr>.  The remaining content of the buffer will be moved, and
1218 all pointers into the buffer updated appropriately.  C<ptr> must not
1219 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1220 it is not permitted to discard text that has yet to be lexed.
1221
1222 Normally it is not necessarily to do this directly, because it suffices to
1223 use the implicit discarding behaviour of L</lex_next_chunk> and things
1224 based on it.  However, if a token stretches across multiple lines,
1225 and the lexing code has kept multiple lines of text in the buffer for
1226 that purpose, then after completion of the token it would be wise to
1227 explicitly discard the now-unneeded earlier lines, to avoid future
1228 multi-line tokens growing the buffer without bound.
1229
1230 =cut
1231 */
1232
1233 void
1234 Perl_lex_discard_to(pTHX_ char *ptr)
1235 {
1236     char *buf;
1237     STRLEN discard_len;
1238     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1239     buf = SvPVX(PL_parser->linestr);
1240     if (ptr < buf)
1241         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1242     if (ptr == buf)
1243         return;
1244     if (ptr > PL_parser->bufptr)
1245         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1246     discard_len = ptr - buf;
1247     if (PL_parser->oldbufptr < ptr)
1248         PL_parser->oldbufptr = ptr;
1249     if (PL_parser->oldoldbufptr < ptr)
1250         PL_parser->oldoldbufptr = ptr;
1251     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1252         PL_parser->last_uni = NULL;
1253     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1254         PL_parser->last_lop = NULL;
1255     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1256     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1257     PL_parser->bufend -= discard_len;
1258     PL_parser->bufptr -= discard_len;
1259     PL_parser->oldbufptr -= discard_len;
1260     PL_parser->oldoldbufptr -= discard_len;
1261     if (PL_parser->last_uni)
1262         PL_parser->last_uni -= discard_len;
1263     if (PL_parser->last_lop)
1264         PL_parser->last_lop -= discard_len;
1265 }
1266
1267 void
1268 Perl_notify_parser_that_changed_to_utf8(pTHX)
1269 {
1270     /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1271      * off to on.  At compile time, this has the effect of entering a 'use
1272      * utf8' section.  This means that any input was not previously checked for
1273      * UTF-8 (because it was off), but now we do need to check it, or our
1274      * assumptions about the input being sane could be wrong, and we could
1275      * segfault.  This routine just sets a flag so that the next time we look
1276      * at the input we do the well-formed UTF-8 check.  If we aren't in the
1277      * proper phase, there may not be a parser object, but if there is, setting
1278      * the flag is harmless */
1279
1280     if (PL_parser) {
1281         PL_parser->recheck_utf8_validity = TRUE;
1282     }
1283 }
1284
1285 /*
1286 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1287
1288 Reads in the next chunk of text to be lexed, appending it to
1289 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1290 looked to the end of the current chunk and wants to know more.  It is
1291 usual, but not necessary, for lexing to have consumed the entirety of
1292 the current chunk at this time.
1293
1294 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1295 chunk (i.e., the current chunk has been entirely consumed), normally the
1296 current chunk will be discarded at the same time that the new chunk is
1297 read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1298 will not be discarded.  If the current chunk has not been entirely
1299 consumed, then it will not be discarded regardless of the flag.
1300
1301 Returns true if some new text was added to the buffer, or false if the
1302 buffer has reached the end of the input text.
1303
1304 =cut
1305 */
1306
1307 #define LEX_FAKE_EOF 0x80000000
1308 #define LEX_NO_TERM  0x40000000 /* here-doc */
1309
1310 bool
1311 Perl_lex_next_chunk(pTHX_ U32 flags)
1312 {
1313     SV *linestr;
1314     char *buf;
1315     STRLEN old_bufend_pos, new_bufend_pos;
1316     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1317     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1318     bool got_some_for_debugger = 0;
1319     bool got_some;
1320
1321     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1322         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1323     if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1324         return FALSE;
1325     linestr = PL_parser->linestr;
1326     buf = SvPVX(linestr);
1327     if (!(flags & LEX_KEEP_PREVIOUS)
1328           && PL_parser->bufptr == PL_parser->bufend)
1329     {
1330         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1331         linestart_pos = 0;
1332         if (PL_parser->last_uni != PL_parser->bufend)
1333             PL_parser->last_uni = NULL;
1334         if (PL_parser->last_lop != PL_parser->bufend)
1335             PL_parser->last_lop = NULL;
1336         last_uni_pos = last_lop_pos = 0;
1337         *buf = 0;
1338         SvCUR(linestr) = 0;
1339     } else {
1340         old_bufend_pos = PL_parser->bufend - buf;
1341         bufptr_pos = PL_parser->bufptr - buf;
1342         oldbufptr_pos = PL_parser->oldbufptr - buf;
1343         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1344         linestart_pos = PL_parser->linestart - buf;
1345         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1346         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1347     }
1348     if (flags & LEX_FAKE_EOF) {
1349         goto eof;
1350     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1351         got_some = 0;
1352     } else if (filter_gets(linestr, old_bufend_pos)) {
1353         got_some = 1;
1354         got_some_for_debugger = 1;
1355     } else if (flags & LEX_NO_TERM) {
1356         got_some = 0;
1357     } else {
1358         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1359             SvPVCLEAR(linestr);
1360         eof:
1361         /* End of real input.  Close filehandle (unless it was STDIN),
1362          * then add implicit termination.
1363          */
1364         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1365             PerlIO_clearerr(PL_parser->rsfp);
1366         else if (PL_parser->rsfp)
1367             (void)PerlIO_close(PL_parser->rsfp);
1368         PL_parser->rsfp = NULL;
1369         PL_parser->in_pod = PL_parser->filtered = 0;
1370         if (!PL_in_eval && PL_minus_p) {
1371             sv_catpvs(linestr,
1372                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1373             PL_minus_n = PL_minus_p = 0;
1374         } else if (!PL_in_eval && PL_minus_n) {
1375             sv_catpvs(linestr, /*{*/";}");
1376             PL_minus_n = 0;
1377         } else
1378             sv_catpvs(linestr, ";");
1379         got_some = 1;
1380     }
1381     buf = SvPVX(linestr);
1382     new_bufend_pos = SvCUR(linestr);
1383     PL_parser->bufend = buf + new_bufend_pos;
1384     PL_parser->bufptr = buf + bufptr_pos;
1385
1386     if (UTF) {
1387         const U8* first_bad_char_loc;
1388         if (UNLIKELY(! is_utf8_string_loc(
1389                             (U8 *) PL_parser->bufptr,
1390                                    PL_parser->bufend - PL_parser->bufptr,
1391                                    &first_bad_char_loc)))
1392         {
1393             _force_out_malformed_utf8_message(first_bad_char_loc,
1394                                               (U8 *) PL_parser->bufend,
1395                                               0,
1396                                               1 /* 1 means die */ );
1397             NOT_REACHED; /* NOTREACHED */
1398         }
1399     }
1400
1401     PL_parser->oldbufptr = buf + oldbufptr_pos;
1402     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1403     PL_parser->linestart = buf + linestart_pos;
1404     if (PL_parser->last_uni)
1405         PL_parser->last_uni = buf + last_uni_pos;
1406     if (PL_parser->last_lop)
1407         PL_parser->last_lop = buf + last_lop_pos;
1408     if (PL_parser->preambling != NOLINE) {
1409         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1410         PL_parser->preambling = NOLINE;
1411     }
1412     if (   got_some_for_debugger
1413         && PERLDB_LINE_OR_SAVESRC
1414         && PL_curstash != PL_debstash)
1415     {
1416         /* debugger active and we're not compiling the debugger code,
1417          * so store the line into the debugger's array of lines
1418          */
1419         update_debugger_info(NULL, buf+old_bufend_pos,
1420             new_bufend_pos-old_bufend_pos);
1421     }
1422     return got_some;
1423 }
1424
1425 /*
1426 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1427
1428 Looks ahead one (Unicode) character in the text currently being lexed.
1429 Returns the codepoint (unsigned integer value) of the next character,
1430 or -1 if lexing has reached the end of the input text.  To consume the
1431 peeked character, use L</lex_read_unichar>.
1432
1433 If the next character is in (or extends into) the next chunk of input
1434 text, the next chunk will be read in.  Normally the current chunk will be
1435 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1436 bit set, then the current chunk will not be discarded.
1437
1438 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1439 is encountered, an exception is generated.
1440
1441 =cut
1442 */
1443
1444 I32
1445 Perl_lex_peek_unichar(pTHX_ U32 flags)
1446 {
1447     dVAR;
1448     char *s, *bufend;
1449     if (flags & ~(LEX_KEEP_PREVIOUS))
1450         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1451     s = PL_parser->bufptr;
1452     bufend = PL_parser->bufend;
1453     if (UTF) {
1454         U8 head;
1455         I32 unichar;
1456         STRLEN len, retlen;
1457         if (s == bufend) {
1458             if (!lex_next_chunk(flags))
1459                 return -1;
1460             s = PL_parser->bufptr;
1461             bufend = PL_parser->bufend;
1462         }
1463         head = (U8)*s;
1464         if (UTF8_IS_INVARIANT(head))
1465             return head;
1466         if (UTF8_IS_START(head)) {
1467             len = UTF8SKIP(&head);
1468             while ((STRLEN)(bufend-s) < len) {
1469                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1470                     break;
1471                 s = PL_parser->bufptr;
1472                 bufend = PL_parser->bufend;
1473             }
1474         }
1475         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1476         if (retlen == (STRLEN)-1) {
1477             _force_out_malformed_utf8_message((U8 *) s,
1478                                               (U8 *) bufend,
1479                                               0,
1480                                               1 /* 1 means die */ );
1481             NOT_REACHED; /* NOTREACHED */
1482         }
1483         return unichar;
1484     } else {
1485         if (s == bufend) {
1486             if (!lex_next_chunk(flags))
1487                 return -1;
1488             s = PL_parser->bufptr;
1489         }
1490         return (U8)*s;
1491     }
1492 }
1493
1494 /*
1495 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1496
1497 Reads the next (Unicode) character in the text currently being lexed.
1498 Returns the codepoint (unsigned integer value) of the character read,
1499 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1500 if lexing has reached the end of the input text.  To non-destructively
1501 examine the next character, use L</lex_peek_unichar> instead.
1502
1503 If the next character is in (or extends into) the next chunk of input
1504 text, the next chunk will be read in.  Normally the current chunk will be
1505 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1506 bit set, then the current chunk will not be discarded.
1507
1508 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1509 is encountered, an exception is generated.
1510
1511 =cut
1512 */
1513
1514 I32
1515 Perl_lex_read_unichar(pTHX_ U32 flags)
1516 {
1517     I32 c;
1518     if (flags & ~(LEX_KEEP_PREVIOUS))
1519         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1520     c = lex_peek_unichar(flags);
1521     if (c != -1) {
1522         if (c == '\n')
1523             COPLINE_INC_WITH_HERELINES;
1524         if (UTF)
1525             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1526         else
1527             ++(PL_parser->bufptr);
1528     }
1529     return c;
1530 }
1531
1532 /*
1533 =for apidoc Amx|void|lex_read_space|U32 flags
1534
1535 Reads optional spaces, in Perl style, in the text currently being
1536 lexed.  The spaces may include ordinary whitespace characters and
1537 Perl-style comments.  C<#line> directives are processed if encountered.
1538 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1539 at a non-space character (or the end of the input text).
1540
1541 If spaces extend into the next chunk of input text, the next chunk will
1542 be read in.  Normally the current chunk will be discarded at the same
1543 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1544 chunk will not be discarded.
1545
1546 =cut
1547 */
1548
1549 #define LEX_NO_INCLINE    0x40000000
1550 #define LEX_NO_NEXT_CHUNK 0x80000000
1551
1552 void
1553 Perl_lex_read_space(pTHX_ U32 flags)
1554 {
1555     char *s, *bufend;
1556     const bool can_incline = !(flags & LEX_NO_INCLINE);
1557     bool need_incline = 0;
1558     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1559         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1560     s = PL_parser->bufptr;
1561     bufend = PL_parser->bufend;
1562     while (1) {
1563         char c = *s;
1564         if (c == '#') {
1565             do {
1566                 c = *++s;
1567             } while (!(c == '\n' || (c == 0 && s == bufend)));
1568         } else if (c == '\n') {
1569             s++;
1570             if (can_incline) {
1571                 PL_parser->linestart = s;
1572                 if (s == bufend)
1573                     need_incline = 1;
1574                 else
1575                     incline(s, bufend);
1576             }
1577         } else if (isSPACE(c)) {
1578             s++;
1579         } else if (c == 0 && s == bufend) {
1580             bool got_more;
1581             line_t l;
1582             if (flags & LEX_NO_NEXT_CHUNK)
1583                 break;
1584             PL_parser->bufptr = s;
1585             l = CopLINE(PL_curcop);
1586             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1587             got_more = lex_next_chunk(flags);
1588             CopLINE_set(PL_curcop, l);
1589             s = PL_parser->bufptr;
1590             bufend = PL_parser->bufend;
1591             if (!got_more)
1592                 break;
1593             if (can_incline && need_incline && PL_parser->rsfp) {
1594                 incline(s, bufend);
1595                 need_incline = 0;
1596             }
1597         } else if (!c) {
1598             s++;
1599         } else {
1600             break;
1601         }
1602     }
1603     PL_parser->bufptr = s;
1604 }
1605
1606 /*
1607
1608 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1609
1610 This function performs syntax checking on a prototype, C<proto>.
1611 If C<warn> is true, any illegal characters or mismatched brackets
1612 will trigger illegalproto warnings, declaring that they were
1613 detected in the prototype for C<name>.
1614
1615 The return value is C<true> if this is a valid prototype, and
1616 C<false> if it is not, regardless of whether C<warn> was C<true> or
1617 C<false>.
1618
1619 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1620
1621 =cut
1622
1623  */
1624
1625 bool
1626 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1627 {
1628     STRLEN len, origlen;
1629     char *p;
1630     bool bad_proto = FALSE;
1631     bool in_brackets = FALSE;
1632     bool after_slash = FALSE;
1633     char greedy_proto = ' ';
1634     bool proto_after_greedy_proto = FALSE;
1635     bool must_be_last = FALSE;
1636     bool underscore = FALSE;
1637     bool bad_proto_after_underscore = FALSE;
1638
1639     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1640
1641     if (!proto)
1642         return TRUE;
1643
1644     p = SvPV(proto, len);
1645     origlen = len;
1646     for (; len--; p++) {
1647         if (!isSPACE(*p)) {
1648             if (must_be_last)
1649                 proto_after_greedy_proto = TRUE;
1650             if (underscore) {
1651                 if (!strchr(";@%", *p))
1652                     bad_proto_after_underscore = TRUE;
1653                 underscore = FALSE;
1654             }
1655             if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1656                 bad_proto = TRUE;
1657             }
1658             else {
1659                 if (*p == '[')
1660                     in_brackets = TRUE;
1661                 else if (*p == ']')
1662                     in_brackets = FALSE;
1663                 else if ((*p == '@' || *p == '%')
1664                          && !after_slash
1665                          && !in_brackets )
1666                 {
1667                     must_be_last = TRUE;
1668                     greedy_proto = *p;
1669                 }
1670                 else if (*p == '_')
1671                     underscore = TRUE;
1672             }
1673             if (*p == '\\')
1674                 after_slash = TRUE;
1675             else
1676                 after_slash = FALSE;
1677         }
1678     }
1679
1680     if (warn) {
1681         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1682         p -= origlen;
1683         p = SvUTF8(proto)
1684             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1685                              origlen, UNI_DISPLAY_ISPRINT)
1686             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1687
1688         if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1689             SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1690             sv_catpvs(name2, "::");
1691             sv_catsv(name2, (SV *)name);
1692             name = name2;
1693         }
1694
1695         if (proto_after_greedy_proto)
1696             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1697                         "Prototype after '%c' for %" SVf " : %s",
1698                         greedy_proto, SVfARG(name), p);
1699         if (in_brackets)
1700             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1701                         "Missing ']' in prototype for %" SVf " : %s",
1702                         SVfARG(name), p);
1703         if (bad_proto)
1704             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1705                         "Illegal character in prototype for %" SVf " : %s",
1706                         SVfARG(name), p);
1707         if (bad_proto_after_underscore)
1708             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1709                         "Illegal character after '_' in prototype for %" SVf " : %s",
1710                         SVfARG(name), p);
1711     }
1712
1713     return (! (proto_after_greedy_proto || bad_proto) );
1714 }
1715
1716 /*
1717  * S_incline
1718  * This subroutine has nothing to do with tilting, whether at windmills
1719  * or pinball tables.  Its name is short for "increment line".  It
1720  * increments the current line number in CopLINE(PL_curcop) and checks
1721  * to see whether the line starts with a comment of the form
1722  *    # line 500 "foo.pm"
1723  * If so, it sets the current line number and file to the values in the comment.
1724  */
1725
1726 STATIC void
1727 S_incline(pTHX_ const char *s, const char *end)
1728 {
1729     const char *t;
1730     const char *n;
1731     const char *e;
1732     line_t line_num;
1733     UV uv;
1734
1735     PERL_ARGS_ASSERT_INCLINE;
1736
1737     assert(end >= s);
1738
1739     COPLINE_INC_WITH_HERELINES;
1740     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1741      && s+1 == PL_bufend && *s == ';') {
1742         /* fake newline in string eval */
1743         CopLINE_dec(PL_curcop);
1744         return;
1745     }
1746     if (*s++ != '#')
1747         return;
1748     while (SPACE_OR_TAB(*s))
1749         s++;
1750     if (memBEGINs(s, (STRLEN) (end - s), "line"))
1751         s += sizeof("line") - 1;
1752     else
1753         return;
1754     if (SPACE_OR_TAB(*s))
1755         s++;
1756     else
1757         return;
1758     while (SPACE_OR_TAB(*s))
1759         s++;
1760     if (!isDIGIT(*s))
1761         return;
1762
1763     n = s;
1764     while (isDIGIT(*s))
1765         s++;
1766     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1767         return;
1768     while (SPACE_OR_TAB(*s))
1769         s++;
1770     if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1771         s++;
1772         e = t + 1;
1773     }
1774     else {
1775         t = s;
1776         while (*t && !isSPACE(*t))
1777             t++;
1778         e = t;
1779     }
1780     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1781         e++;
1782     if (*e != '\n' && *e != '\0')
1783         return;         /* false alarm */
1784
1785     if (!grok_atoUV(n, &uv, &e))
1786         return;
1787     line_num = ((line_t)uv) - 1;
1788
1789     if (t - s > 0) {
1790         const STRLEN len = t - s;
1791
1792         if (!PL_rsfp && !PL_parser->filtered) {
1793             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1794              * to *{"::_<newfilename"} */
1795             /* However, the long form of evals is only turned on by the
1796                debugger - usually they're "(eval %lu)" */
1797             GV * const cfgv = CopFILEGV(PL_curcop);
1798             if (cfgv) {
1799                 char smallbuf[128];
1800                 STRLEN tmplen2 = len;
1801                 char *tmpbuf2;
1802                 GV *gv2;
1803
1804                 if (tmplen2 + 2 <= sizeof smallbuf)
1805                     tmpbuf2 = smallbuf;
1806                 else
1807                     Newx(tmpbuf2, tmplen2 + 2, char);
1808
1809                 tmpbuf2[0] = '_';
1810                 tmpbuf2[1] = '<';
1811
1812                 memcpy(tmpbuf2 + 2, s, tmplen2);
1813                 tmplen2 += 2;
1814
1815                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1816                 if (!isGV(gv2)) {
1817                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1818                     /* adjust ${"::_<newfilename"} to store the new file name */
1819                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1820                     /* The line number may differ. If that is the case,
1821                        alias the saved lines that are in the array.
1822                        Otherwise alias the whole array. */
1823                     if (CopLINE(PL_curcop) == line_num) {
1824                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1825                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1826                     }
1827                     else if (GvAV(cfgv)) {
1828                         AV * const av = GvAV(cfgv);
1829                         const I32 start = CopLINE(PL_curcop)+1;
1830                         I32 items = AvFILLp(av) - start;
1831                         if (items > 0) {
1832                             AV * const av2 = GvAVn(gv2);
1833                             SV **svp = AvARRAY(av) + start;
1834                             I32 l = (I32)line_num+1;
1835                             while (items--)
1836                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1837                         }
1838                     }
1839                 }
1840
1841                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1842             }
1843         }
1844         CopFILE_free(PL_curcop);
1845         CopFILE_setn(PL_curcop, s, len);
1846     }
1847     CopLINE_set(PL_curcop, line_num);
1848 }
1849
1850 STATIC void
1851 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1852 {
1853     AV *av = CopFILEAVx(PL_curcop);
1854     if (av) {
1855         SV * sv;
1856         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1857         else {
1858             sv = *av_fetch(av, 0, 1);
1859             SvUPGRADE(sv, SVt_PVMG);
1860         }
1861         if (!SvPOK(sv)) SvPVCLEAR(sv);
1862         if (orig_sv)
1863             sv_catsv(sv, orig_sv);
1864         else
1865             sv_catpvn(sv, buf, len);
1866         if (!SvIOK(sv)) {
1867             (void)SvIOK_on(sv);
1868             SvIV_set(sv, 0);
1869         }
1870         if (PL_parser->preambling == NOLINE)
1871             av_store(av, CopLINE(PL_curcop), sv);
1872     }
1873 }
1874
1875 /*
1876  * skipspace
1877  * Called to gobble the appropriate amount and type of whitespace.
1878  * Skips comments as well.
1879  * Returns the next character after the whitespace that is skipped.
1880  *
1881  * peekspace
1882  * Same thing, but look ahead without incrementing line numbers or
1883  * adjusting PL_linestart.
1884  */
1885
1886 #define skipspace(s) skipspace_flags(s, 0)
1887 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1888
1889 STATIC char *
1890 S_skipspace_flags(pTHX_ char *s, U32 flags)
1891 {
1892     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1893     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1894         while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1895             s++;
1896     } else {
1897         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1898         PL_bufptr = s;
1899         lex_read_space(flags | LEX_KEEP_PREVIOUS |
1900                 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1901                     LEX_NO_NEXT_CHUNK : 0));
1902         s = PL_bufptr;
1903         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1904         if (PL_linestart > PL_bufptr)
1905             PL_bufptr = PL_linestart;
1906         return s;
1907     }
1908     return s;
1909 }
1910
1911 /*
1912  * S_check_uni
1913  * Check the unary operators to ensure there's no ambiguity in how they're
1914  * used.  An ambiguous piece of code would be:
1915  *     rand + 5
1916  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1917  * the +5 is its argument.
1918  */
1919
1920 STATIC void
1921 S_check_uni(pTHX)
1922 {
1923     const char *s;
1924
1925     if (PL_oldoldbufptr != PL_last_uni)
1926         return;
1927     while (isSPACE(*PL_last_uni))
1928         PL_last_uni++;
1929     s = PL_last_uni;
1930     while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
1931         s += UTF ? UTF8SKIP(s) : 1;
1932     if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
1933         return;
1934
1935     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1936                      "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
1937                      UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1938 }
1939
1940 /*
1941  * LOP : macro to build a list operator.  Its behaviour has been replaced
1942  * with a subroutine, S_lop() for which LOP is just another name.
1943  */
1944
1945 #define LOP(f,x) return lop(f,x,s)
1946
1947 /*
1948  * S_lop
1949  * Build a list operator (or something that might be one).  The rules:
1950  *  - if we have a next token, then it's a list operator (no parens) for
1951  *    which the next token has already been parsed; e.g.,
1952  *       sort foo @args
1953  *       sort foo (@args)
1954  *  - if the next thing is an opening paren, then it's a function
1955  *  - else it's a list operator
1956  */
1957
1958 STATIC I32
1959 S_lop(pTHX_ I32 f, U8 x, char *s)
1960 {
1961     PERL_ARGS_ASSERT_LOP;
1962
1963     pl_yylval.ival = f;
1964     CLINE;
1965     PL_bufptr = s;
1966     PL_last_lop = PL_oldbufptr;
1967     PL_last_lop_op = (OPCODE)f;
1968     if (PL_nexttoke)
1969         goto lstop;
1970     PL_expect = x;
1971     if (*s == '(')
1972         return REPORT(FUNC);
1973     s = skipspace(s);
1974     if (*s == '(')
1975         return REPORT(FUNC);
1976     else {
1977         lstop:
1978         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1979             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1980         return REPORT(LSTOP);
1981     }
1982 }
1983
1984 /*
1985  * S_force_next
1986  * When the lexer realizes it knows the next token (for instance,
1987  * it is reordering tokens for the parser) then it can call S_force_next
1988  * to know what token to return the next time the lexer is called.  Caller
1989  * will need to set PL_nextval[] and possibly PL_expect to ensure
1990  * the lexer handles the token correctly.
1991  */
1992
1993 STATIC void
1994 S_force_next(pTHX_ I32 type)
1995 {
1996 #ifdef DEBUGGING
1997     if (DEBUG_T_TEST) {
1998         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1999         tokereport(type, &NEXTVAL_NEXTTOKE);
2000     }
2001 #endif
2002     assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2003     PL_nexttype[PL_nexttoke] = type;
2004     PL_nexttoke++;
2005 }
2006
2007 /*
2008  * S_postderef
2009  *
2010  * This subroutine handles postfix deref syntax after the arrow has already
2011  * been emitted.  @* $* etc. are emitted as two separate tokens right here.
2012  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2013  * only the first, leaving yylex to find the next.
2014  */
2015
2016 static int
2017 S_postderef(pTHX_ int const funny, char const next)
2018 {
2019     assert(funny == DOLSHARP || strchr("$@%&*", funny));
2020     if (next == '*') {
2021         PL_expect = XOPERATOR;
2022         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2023             assert('@' == funny || '$' == funny || DOLSHARP == funny);
2024             PL_lex_state = LEX_INTERPEND;
2025             if ('@' == funny)
2026                 force_next(POSTJOIN);
2027         }
2028         force_next(next);
2029         PL_bufptr+=2;
2030     }
2031     else {
2032         if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2033          && !PL_lex_brackets)
2034             PL_lex_dojoin = 2;
2035         PL_expect = XOPERATOR;
2036         PL_bufptr++;
2037     }
2038     return funny;
2039 }
2040
2041 void
2042 Perl_yyunlex(pTHX)
2043 {
2044     int yyc = PL_parser->yychar;
2045     if (yyc != YYEMPTY) {
2046         if (yyc) {
2047             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2048             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2049                 PL_lex_allbrackets--;
2050                 PL_lex_brackets--;
2051                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2052             } else if (yyc == '('/*)*/) {
2053                 PL_lex_allbrackets--;
2054                 yyc |= (2<<24);
2055             }
2056             force_next(yyc);
2057         }
2058         PL_parser->yychar = YYEMPTY;
2059     }
2060 }
2061
2062 STATIC SV *
2063 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2064 {
2065     SV * const sv = newSVpvn_utf8(start, len,
2066                     ! IN_BYTES
2067                   &&  UTF
2068                   &&  is_utf8_non_invariant_string((const U8*)start, len));
2069     return sv;
2070 }
2071
2072 /*
2073  * S_force_word
2074  * When the lexer knows the next thing is a word (for instance, it has
2075  * just seen -> and it knows that the next char is a word char, then
2076  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2077  * lookahead.
2078  *
2079  * Arguments:
2080  *   char *start : buffer position (must be within PL_linestr)
2081  *   int token   : PL_next* will be this type of bare word
2082  *                 (e.g., METHOD,BAREWORD)
2083  *   int check_keyword : if true, Perl checks to make sure the word isn't
2084  *       a keyword (do this if the word is a label, e.g. goto FOO)
2085  *   int allow_pack : if true, : characters will also be allowed (require,
2086  *       use, etc. do this)
2087  */
2088
2089 STATIC char *
2090 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2091 {
2092     char *s;
2093     STRLEN len;
2094
2095     PERL_ARGS_ASSERT_FORCE_WORD;
2096
2097     start = skipspace(start);
2098     s = start;
2099     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2100         || (allow_pack && *s == ':' && s[1] == ':') )
2101     {
2102         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2103         if (check_keyword) {
2104           char *s2 = PL_tokenbuf;
2105           STRLEN len2 = len;
2106           if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2107             s2 += sizeof("CORE::") - 1;
2108             len2 -= sizeof("CORE::") - 1;
2109           }
2110           if (keyword(s2, len2, 0))
2111             return start;
2112         }
2113         if (token == METHOD) {
2114             s = skipspace(s);
2115             if (*s == '(')
2116                 PL_expect = XTERM;
2117             else {
2118                 PL_expect = XOPERATOR;
2119             }
2120         }
2121         NEXTVAL_NEXTTOKE.opval
2122             = newSVOP(OP_CONST,0,
2123                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2124         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2125         force_next(token);
2126     }
2127     return s;
2128 }
2129
2130 /*
2131  * S_force_ident
2132  * Called when the lexer wants $foo *foo &foo etc, but the program
2133  * text only contains the "foo" portion.  The first argument is a pointer
2134  * to the "foo", and the second argument is the type symbol to prefix.
2135  * Forces the next token to be a "BAREWORD".
2136  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2137  */
2138
2139 STATIC void
2140 S_force_ident(pTHX_ const char *s, int kind)
2141 {
2142     PERL_ARGS_ASSERT_FORCE_IDENT;
2143
2144     if (s[0]) {
2145         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2146         OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2147                                                                 UTF ? SVf_UTF8 : 0));
2148         NEXTVAL_NEXTTOKE.opval = o;
2149         force_next(BAREWORD);
2150         if (kind) {
2151             o->op_private = OPpCONST_ENTERED;
2152             /* XXX see note in pp_entereval() for why we forgo typo
2153                warnings if the symbol must be introduced in an eval.
2154                GSAR 96-10-12 */
2155             gv_fetchpvn_flags(s, len,
2156                               (PL_in_eval ? GV_ADDMULTI
2157                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2158                               kind == '$' ? SVt_PV :
2159                               kind == '@' ? SVt_PVAV :
2160                               kind == '%' ? SVt_PVHV :
2161                               SVt_PVGV
2162                               );
2163         }
2164     }
2165 }
2166
2167 static void
2168 S_force_ident_maybe_lex(pTHX_ char pit)
2169 {
2170     NEXTVAL_NEXTTOKE.ival = pit;
2171     force_next('p');
2172 }
2173
2174 NV
2175 Perl_str_to_version(pTHX_ SV *sv)
2176 {
2177     NV retval = 0.0;
2178     NV nshift = 1.0;
2179     STRLEN len;
2180     const char *start = SvPV_const(sv,len);
2181     const char * const end = start + len;
2182     const bool utf = cBOOL(SvUTF8(sv));
2183
2184     PERL_ARGS_ASSERT_STR_TO_VERSION;
2185
2186     while (start < end) {
2187         STRLEN skip;
2188         UV n;
2189         if (utf)
2190             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2191         else {
2192             n = *(U8*)start;
2193             skip = 1;
2194         }
2195         retval += ((NV)n)/nshift;
2196         start += skip;
2197         nshift *= 1000;
2198     }
2199     return retval;
2200 }
2201
2202 /*
2203  * S_force_version
2204  * Forces the next token to be a version number.
2205  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2206  * and if "guessing" is TRUE, then no new token is created (and the caller
2207  * must use an alternative parsing method).
2208  */
2209
2210 STATIC char *
2211 S_force_version(pTHX_ char *s, int guessing)
2212 {
2213     OP *version = NULL;
2214     char *d;
2215
2216     PERL_ARGS_ASSERT_FORCE_VERSION;
2217
2218     s = skipspace(s);
2219
2220     d = s;
2221     if (*d == 'v')
2222         d++;
2223     if (isDIGIT(*d)) {
2224         while (isDIGIT(*d) || *d == '_' || *d == '.')
2225             d++;
2226         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2227             SV *ver;
2228             s = scan_num(s, &pl_yylval);
2229             version = pl_yylval.opval;
2230             ver = cSVOPx(version)->op_sv;
2231             if (SvPOK(ver) && !SvNIOK(ver)) {
2232                 SvUPGRADE(ver, SVt_PVNV);
2233                 SvNV_set(ver, str_to_version(ver));
2234                 SvNOK_on(ver);          /* hint that it is a version */
2235             }
2236         }
2237         else if (guessing) {
2238             return s;
2239         }
2240     }
2241
2242     /* NOTE: The parser sees the package name and the VERSION swapped */
2243     NEXTVAL_NEXTTOKE.opval = version;
2244     force_next(BAREWORD);
2245
2246     return s;
2247 }
2248
2249 /*
2250  * S_force_strict_version
2251  * Forces the next token to be a version number using strict syntax rules.
2252  */
2253
2254 STATIC char *
2255 S_force_strict_version(pTHX_ char *s)
2256 {
2257     OP *version = NULL;
2258     const char *errstr = NULL;
2259
2260     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2261
2262     while (isSPACE(*s)) /* leading whitespace */
2263         s++;
2264
2265     if (is_STRICT_VERSION(s,&errstr)) {
2266         SV *ver = newSV(0);
2267         s = (char *)scan_version(s, ver, 0);
2268         version = newSVOP(OP_CONST, 0, ver);
2269     }
2270     else if ((*s != ';' && *s != '{' && *s != '}' )
2271              && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2272     {
2273         PL_bufptr = s;
2274         if (errstr)
2275             yyerror(errstr); /* version required */
2276         return s;
2277     }
2278
2279     /* NOTE: The parser sees the package name and the VERSION swapped */
2280     NEXTVAL_NEXTTOKE.opval = version;
2281     force_next(BAREWORD);
2282
2283     return s;
2284 }
2285
2286 /*
2287  * S_tokeq
2288  * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2289  * modified as necessary.  However, if HINT_NEW_STRING is on, 'sv' is
2290  * unchanged, and a new SV containing the modified input is returned.
2291  */
2292
2293 STATIC SV *
2294 S_tokeq(pTHX_ SV *sv)
2295 {
2296     char *s;
2297     char *send;
2298     char *d;
2299     SV *pv = sv;
2300
2301     PERL_ARGS_ASSERT_TOKEQ;
2302
2303     assert (SvPOK(sv));
2304     assert (SvLEN(sv));
2305     assert (!SvIsCOW(sv));
2306     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2307         goto finish;
2308     s = SvPVX(sv);
2309     send = SvEND(sv);
2310     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2311     while (s < send && !(*s == '\\' && s[1] == '\\'))
2312         s++;
2313     if (s == send)
2314         goto finish;
2315     d = s;
2316     if ( PL_hints & HINT_NEW_STRING ) {
2317         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2318                             SVs_TEMP | SvUTF8(sv));
2319     }
2320     while (s < send) {
2321         if (*s == '\\') {
2322             if (s + 1 < send && (s[1] == '\\'))
2323                 s++;            /* all that, just for this */
2324         }
2325         *d++ = *s++;
2326     }
2327     *d = '\0';
2328     SvCUR_set(sv, d - SvPVX_const(sv));
2329   finish:
2330     if ( PL_hints & HINT_NEW_STRING )
2331        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2332     return sv;
2333 }
2334
2335 /*
2336  * Now come three functions related to double-quote context,
2337  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2338  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2339  * interact with PL_lex_state, and create fake ( ... ) argument lists
2340  * to handle functions and concatenation.
2341  * For example,
2342  *   "foo\lbar"
2343  * is tokenised as
2344  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2345  */
2346
2347 /*
2348  * S_sublex_start
2349  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2350  *
2351  * Pattern matching will set PL_lex_op to the pattern-matching op to
2352  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2353  *
2354  * OP_CONST is easy--just make the new op and return.
2355  *
2356  * Everything else becomes a FUNC.
2357  *
2358  * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2359  * had an OP_CONST.  This just sets us up for a
2360  * call to S_sublex_push().
2361  */
2362
2363 STATIC I32
2364 S_sublex_start(pTHX)
2365 {
2366     const I32 op_type = pl_yylval.ival;
2367
2368     if (op_type == OP_NULL) {
2369         pl_yylval.opval = PL_lex_op;
2370         PL_lex_op = NULL;
2371         return THING;
2372     }
2373     if (op_type == OP_CONST) {
2374         SV *sv = PL_lex_stuff;
2375         PL_lex_stuff = NULL;
2376         sv = tokeq(sv);
2377
2378         if (SvTYPE(sv) == SVt_PVIV) {
2379             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2380             STRLEN len;
2381             const char * const p = SvPV_const(sv, len);
2382             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2383             SvREFCNT_dec(sv);
2384             sv = nsv;
2385         }
2386         pl_yylval.opval = newSVOP(op_type, 0, sv);
2387         return THING;
2388     }
2389
2390     PL_parser->lex_super_state = PL_lex_state;
2391     PL_parser->lex_sub_inwhat = (U16)op_type;
2392     PL_parser->lex_sub_op = PL_lex_op;
2393     PL_lex_state = LEX_INTERPPUSH;
2394
2395     PL_expect = XTERM;
2396     if (PL_lex_op) {
2397         pl_yylval.opval = PL_lex_op;
2398         PL_lex_op = NULL;
2399         return PMFUNC;
2400     }
2401     else
2402         return FUNC;
2403 }
2404
2405 /*
2406  * S_sublex_push
2407  * Create a new scope to save the lexing state.  The scope will be
2408  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2409  * to the uc, lc, etc. found before.
2410  * Sets PL_lex_state to LEX_INTERPCONCAT.
2411  */
2412
2413 STATIC I32
2414 S_sublex_push(pTHX)
2415 {
2416     LEXSHARED *shared;
2417     const bool is_heredoc = PL_multi_close == '<';
2418     ENTER;
2419
2420     PL_lex_state = PL_parser->lex_super_state;
2421     SAVEI8(PL_lex_dojoin);
2422     SAVEI32(PL_lex_brackets);
2423     SAVEI32(PL_lex_allbrackets);
2424     SAVEI32(PL_lex_formbrack);
2425     SAVEI8(PL_lex_fakeeof);
2426     SAVEI32(PL_lex_casemods);
2427     SAVEI32(PL_lex_starts);
2428     SAVEI8(PL_lex_state);
2429     SAVESPTR(PL_lex_repl);
2430     SAVEVPTR(PL_lex_inpat);
2431     SAVEI16(PL_lex_inwhat);
2432     if (is_heredoc)
2433     {
2434         SAVECOPLINE(PL_curcop);
2435         SAVEI32(PL_multi_end);
2436         SAVEI32(PL_parser->herelines);
2437         PL_parser->herelines = 0;
2438     }
2439     SAVEIV(PL_multi_close);
2440     SAVEPPTR(PL_bufptr);
2441     SAVEPPTR(PL_bufend);
2442     SAVEPPTR(PL_oldbufptr);
2443     SAVEPPTR(PL_oldoldbufptr);
2444     SAVEPPTR(PL_last_lop);
2445     SAVEPPTR(PL_last_uni);
2446     SAVEPPTR(PL_linestart);
2447     SAVESPTR(PL_linestr);
2448     SAVEGENERICPV(PL_lex_brackstack);
2449     SAVEGENERICPV(PL_lex_casestack);
2450     SAVEGENERICPV(PL_parser->lex_shared);
2451     SAVEBOOL(PL_parser->lex_re_reparsing);
2452     SAVEI32(PL_copline);
2453
2454     /* The here-doc parser needs to be able to peek into outer lexing
2455        scopes to find the body of the here-doc.  So we put PL_linestr and
2456        PL_bufptr into lex_shared, to ‘share’ those values.
2457      */
2458     PL_parser->lex_shared->ls_linestr = PL_linestr;
2459     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2460
2461     PL_linestr = PL_lex_stuff;
2462     PL_lex_repl = PL_parser->lex_sub_repl;
2463     PL_lex_stuff = NULL;
2464     PL_parser->lex_sub_repl = NULL;
2465
2466     /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2467        set for an inner quote-like operator and then an error causes scope-
2468        popping.  We must not have a PL_lex_stuff value left dangling, as
2469        that breaks assumptions elsewhere.  See bug #123617.  */
2470     SAVEGENERICSV(PL_lex_stuff);
2471     SAVEGENERICSV(PL_parser->lex_sub_repl);
2472
2473     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2474         = SvPVX(PL_linestr);
2475     PL_bufend += SvCUR(PL_linestr);
2476     PL_last_lop = PL_last_uni = NULL;
2477     SAVEFREESV(PL_linestr);
2478     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2479
2480     PL_lex_dojoin = FALSE;
2481     PL_lex_brackets = PL_lex_formbrack = 0;
2482     PL_lex_allbrackets = 0;
2483     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2484     Newx(PL_lex_brackstack, 120, char);
2485     Newx(PL_lex_casestack, 12, char);
2486     PL_lex_casemods = 0;
2487     *PL_lex_casestack = '\0';
2488     PL_lex_starts = 0;
2489     PL_lex_state = LEX_INTERPCONCAT;
2490     if (is_heredoc)
2491         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2492     PL_copline = NOLINE;
2493
2494     Newxz(shared, 1, LEXSHARED);
2495     shared->ls_prev = PL_parser->lex_shared;
2496     PL_parser->lex_shared = shared;
2497
2498     PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2499     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2500     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2501         PL_lex_inpat = PL_parser->lex_sub_op;
2502     else
2503         PL_lex_inpat = NULL;
2504
2505     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2506     PL_in_eval &= ~EVAL_RE_REPARSING;
2507
2508     return '(';
2509 }
2510
2511 /*
2512  * S_sublex_done
2513  * Restores lexer state after a S_sublex_push.
2514  */
2515
2516 STATIC I32
2517 S_sublex_done(pTHX)
2518 {
2519     if (!PL_lex_starts++) {
2520         SV * const sv = newSVpvs("");
2521         if (SvUTF8(PL_linestr))
2522             SvUTF8_on(sv);
2523         PL_expect = XOPERATOR;
2524         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2525         return THING;
2526     }
2527
2528     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2529         PL_lex_state = LEX_INTERPCASEMOD;
2530         return yylex();
2531     }
2532
2533     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2534     assert(PL_lex_inwhat != OP_TRANSR);
2535     if (PL_lex_repl) {
2536         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2537         PL_linestr = PL_lex_repl;
2538         PL_lex_inpat = 0;
2539         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2540         PL_bufend += SvCUR(PL_linestr);
2541         PL_last_lop = PL_last_uni = NULL;
2542         PL_lex_dojoin = FALSE;
2543         PL_lex_brackets = 0;
2544         PL_lex_allbrackets = 0;
2545         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2546         PL_lex_casemods = 0;
2547         *PL_lex_casestack = '\0';
2548         PL_lex_starts = 0;
2549         if (SvEVALED(PL_lex_repl)) {
2550             PL_lex_state = LEX_INTERPNORMAL;
2551             PL_lex_starts++;
2552             /*  we don't clear PL_lex_repl here, so that we can check later
2553                 whether this is an evalled subst; that means we rely on the
2554                 logic to ensure sublex_done() is called again only via the
2555                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2556         }
2557         else {
2558             PL_lex_state = LEX_INTERPCONCAT;
2559             PL_lex_repl = NULL;
2560         }
2561         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2562             CopLINE(PL_curcop) +=
2563                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2564                  + PL_parser->herelines;
2565             PL_parser->herelines = 0;
2566         }
2567         return '/';
2568     }
2569     else {
2570         const line_t l = CopLINE(PL_curcop);
2571         LEAVE;
2572         if (PL_multi_close == '<')
2573             PL_parser->herelines += l - PL_multi_end;
2574         PL_bufend = SvPVX(PL_linestr);
2575         PL_bufend += SvCUR(PL_linestr);
2576         PL_expect = XOPERATOR;
2577         return ')';
2578     }
2579 }
2580
2581 STATIC SV*
2582 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2583 {
2584     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2585      * interior, hence to the "}".  Finds what the name resolves to, returning
2586      * an SV* containing it; NULL if no valid one found */
2587
2588     SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2589
2590     HV * table;
2591     SV **cvp;
2592     SV *cv;
2593     SV *rv;
2594     HV *stash;
2595     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2596
2597     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2598
2599     if (!SvCUR(res)) {
2600         SvREFCNT_dec_NN(res);
2601         /* diag_listed_as: Unknown charname '%s' */
2602         yyerror("Unknown charname ''");
2603         return NULL;
2604     }
2605
2606     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2607                         /* include the <}> */
2608                         e - backslash_ptr + 1);
2609     if (! SvPOK(res)) {
2610         SvREFCNT_dec_NN(res);
2611         return NULL;
2612     }
2613
2614     /* See if the charnames handler is the Perl core's, and if so, we can skip
2615      * the validation needed for a user-supplied one, as Perl's does its own
2616      * validation. */
2617     table = GvHV(PL_hintgv);             /* ^H */
2618     cvp = hv_fetchs(table, "charnames", FALSE);
2619     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2620         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2621     {
2622         const char * const name = HvNAME(stash);
2623          if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2624            return res;
2625        }
2626     }
2627
2628     /* Here, it isn't Perl's charname handler.  We can't rely on a
2629      * user-supplied handler to validate the input name.  For non-ut8 input,
2630      * look to see that the first character is legal.  Then loop through the
2631      * rest checking that each is a continuation */
2632
2633     /* This code makes the reasonable assumption that the only Latin1-range
2634      * characters that begin a character name alias are alphabetic, otherwise
2635      * would have to create a isCHARNAME_BEGIN macro */
2636
2637     if (! UTF) {
2638         if (! isALPHAU(*s)) {
2639             goto bad_charname;
2640         }
2641         s++;
2642         while (s < e) {
2643             if (! isCHARNAME_CONT(*s)) {
2644                 goto bad_charname;
2645             }
2646             if (*s == ' ' && *(s-1) == ' ') {
2647                 goto multi_spaces;
2648             }
2649             s++;
2650         }
2651     }
2652     else {
2653         /* Similarly for utf8.  For invariants can check directly; for other
2654          * Latin1, can calculate their code point and check; otherwise  use a
2655          * swash */
2656         if (UTF8_IS_INVARIANT(*s)) {
2657             if (! isALPHAU(*s)) {
2658                 goto bad_charname;
2659             }
2660             s++;
2661         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2662             if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2663                 goto bad_charname;
2664             }
2665             s += 2;
2666         }
2667         else {
2668             if (! PL_utf8_charname_begin) {
2669                 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2670                 PL_utf8_charname_begin = _core_swash_init("utf8",
2671                                                         "_Perl_Charname_Begin",
2672                                                         &PL_sv_undef,
2673                                                         1, 0, NULL, &flags);
2674             }
2675             if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2676                 goto bad_charname;
2677             }
2678             s += UTF8SKIP(s);
2679         }
2680
2681         while (s < e) {
2682             if (UTF8_IS_INVARIANT(*s)) {
2683                 if (! isCHARNAME_CONT(*s)) {
2684                     goto bad_charname;
2685                 }
2686                 if (*s == ' ' && *(s-1) == ' ') {
2687                     goto multi_spaces;
2688                 }
2689                 s++;
2690             }
2691             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2692                 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2693                 {
2694                     goto bad_charname;
2695                 }
2696                 s += 2;
2697             }
2698             else {
2699                 if (! PL_utf8_charname_continue) {
2700                     U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2701                     PL_utf8_charname_continue = _core_swash_init("utf8",
2702                                                 "_Perl_Charname_Continue",
2703                                                 &PL_sv_undef,
2704                                                 1, 0, NULL, &flags);
2705                 }
2706                 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2707                     goto bad_charname;
2708                 }
2709                 s += UTF8SKIP(s);
2710             }
2711         }
2712     }
2713     if (*(s-1) == ' ') {
2714         /* diag_listed_as: charnames alias definitions may not contain
2715                            trailing white-space; marked by <-- HERE in %s
2716          */
2717         yyerror_pv(
2718             Perl_form(aTHX_
2719             "charnames alias definitions may not contain trailing "
2720             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2721             (int)(s - backslash_ptr + 1), backslash_ptr,
2722             (int)(e - s + 1), s + 1
2723             ),
2724         UTF ? SVf_UTF8 : 0);
2725         return NULL;
2726     }
2727
2728     if (SvUTF8(res)) { /* Don't accept malformed input */
2729         const U8* first_bad_char_loc;
2730         STRLEN len;
2731         const char* const str = SvPV_const(res, len);
2732         if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2733                                           &first_bad_char_loc)))
2734         {
2735             _force_out_malformed_utf8_message(first_bad_char_loc,
2736                                               (U8 *) PL_parser->bufend,
2737                                               0,
2738                                               0 /* 0 means don't die */ );
2739             /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
2740                                immediately after '%s' */
2741             yyerror_pv(
2742               Perl_form(aTHX_
2743                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2744                  (int) (e - backslash_ptr + 1), backslash_ptr,
2745                  (int) ((char *) first_bad_char_loc - str), str
2746               ),
2747               SVf_UTF8);
2748             return NULL;
2749         }
2750     }
2751
2752     return res;
2753
2754   bad_charname: {
2755
2756         /* The final %.*s makes sure that should the trailing NUL be missing
2757          * that this print won't run off the end of the string */
2758         /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
2759                            in \N{%s} */
2760         yyerror_pv(
2761           Perl_form(aTHX_
2762             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2763             (int)(s - backslash_ptr + 1), backslash_ptr,
2764             (int)(e - s + 1), s + 1
2765           ),
2766           UTF ? SVf_UTF8 : 0);
2767         return NULL;
2768     }
2769
2770   multi_spaces:
2771         /* diag_listed_as: charnames alias definitions may not contain a
2772                            sequence of multiple spaces; marked by <-- HERE
2773                            in %s */
2774         yyerror_pv(
2775           Perl_form(aTHX_
2776             "charnames alias definitions may not contain a sequence of "
2777             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2778             (int)(s - backslash_ptr + 1), backslash_ptr,
2779             (int)(e - s + 1), s + 1
2780           ),
2781           UTF ? SVf_UTF8 : 0);
2782         return NULL;
2783 }
2784
2785 /*
2786   scan_const
2787
2788   Extracts the next constant part of a pattern, double-quoted string,
2789   or transliteration.  This is terrifying code.
2790
2791   For example, in parsing the double-quoted string "ab\x63$d", it would
2792   stop at the '$' and return an OP_CONST containing 'abc'.
2793
2794   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2795   processing a pattern (PL_lex_inpat is true), a transliteration
2796   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2797
2798   Returns a pointer to the character scanned up to. If this is
2799   advanced from the start pointer supplied (i.e. if anything was
2800   successfully parsed), will leave an OP_CONST for the substring scanned
2801   in pl_yylval. Caller must intuit reason for not parsing further
2802   by looking at the next characters herself.
2803
2804   In patterns:
2805     expand:
2806       \N{FOO}  => \N{U+hex_for_character_FOO}
2807       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2808
2809     pass through:
2810         all other \-char, including \N and \N{ apart from \N{ABC}
2811
2812     stops on:
2813         @ and $ where it appears to be a var, but not for $ as tail anchor
2814         \l \L \u \U \Q \E
2815         (?{  or  (??{
2816
2817   In transliterations:
2818     characters are VERY literal, except for - not at the start or end
2819     of the string, which indicates a range.  However some backslash sequences
2820     are recognized: \r, \n, and the like
2821                     \007 \o{}, \x{}, \N{}
2822     If all elements in the transliteration are below 256,
2823     scan_const expands the range to the full set of intermediate
2824     characters. If the range is in utf8, the hyphen is replaced with
2825     a certain range mark which will be handled by pmtrans() in op.c.
2826
2827   In double-quoted strings:
2828     backslashes:
2829       all those recognized in transliterations
2830       deprecated backrefs: \1 (in substitution replacements)
2831       case and quoting: \U \Q \E
2832     stops on @ and $
2833
2834   scan_const does *not* construct ops to handle interpolated strings.
2835   It stops processing as soon as it finds an embedded $ or @ variable
2836   and leaves it to the caller to work out what's going on.
2837
2838   embedded arrays (whether in pattern or not) could be:
2839       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2840
2841   $ in double-quoted strings must be the symbol of an embedded scalar.
2842
2843   $ in pattern could be $foo or could be tail anchor.  Assumption:
2844   it's a tail anchor if $ is the last thing in the string, or if it's
2845   followed by one of "()| \r\n\t"
2846
2847   \1 (backreferences) are turned into $1 in substitutions
2848
2849   The structure of the code is
2850       while (there's a character to process) {
2851           handle transliteration ranges
2852           skip regexp comments /(?#comment)/ and codes /(?{code})/
2853           skip #-initiated comments in //x patterns
2854           check for embedded arrays
2855           check for embedded scalars
2856           if (backslash) {
2857               deprecate \1 in substitution replacements
2858               handle string-changing backslashes \l \U \Q \E, etc.
2859               switch (what was escaped) {
2860                   handle \- in a transliteration (becomes a literal -)
2861                   if a pattern and not \N{, go treat as regular character
2862                   handle \132 (octal characters)
2863                   handle \x15 and \x{1234} (hex characters)
2864                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2865                   handle \cV (control characters)
2866                   handle printf-style backslashes (\f, \r, \n, etc)
2867               } (end switch)
2868               continue
2869           } (end if backslash)
2870           handle regular character
2871     } (end while character to read)
2872
2873 */
2874
2875 STATIC char *
2876 S_scan_const(pTHX_ char *start)
2877 {
2878     char *send = PL_bufend;             /* end of the constant */
2879     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
2880                                            on sizing. */
2881     char *s = start;                    /* start of the constant */
2882     char *d = SvPVX(sv);                /* destination for copies */
2883     bool dorange = FALSE;               /* are we in a translit range? */
2884     bool didrange = FALSE;              /* did we just finish a range? */
2885     bool in_charclass = FALSE;          /* within /[...]/ */
2886     bool has_utf8 = FALSE;              /* Output constant is UTF8 */
2887     bool  this_utf8 = cBOOL(UTF);       /* Is the source string assumed to be
2888                                            UTF8?  But, this can show as true
2889                                            when the source isn't utf8, as for
2890                                            example when it is entirely composed
2891                                            of hex constants */
2892     STRLEN utf8_variant_count = 0;      /* When not in UTF-8, this counts the
2893                                            number of characters found so far
2894                                            that will expand (into 2 bytes)
2895                                            should we have to convert to
2896                                            UTF-8) */
2897     SV *res;                            /* result from charnames */
2898     STRLEN offset_to_max = 0;   /* The offset in the output to where the range
2899                                    high-end character is temporarily placed */
2900
2901     /* Does something require special handling in tr/// ?  This avoids extra
2902      * work in a less likely case.  As such, khw didn't feel it was worth
2903      * adding any branches to the more mainline code to handle this, which
2904      * means that this doesn't get set in some circumstances when things like
2905      * \x{100} get expanded out.  As a result there needs to be extra testing
2906      * done in the tr code */
2907     bool has_above_latin1 = FALSE;
2908
2909     /* Note on sizing:  The scanned constant is placed into sv, which is
2910      * initialized by newSV() assuming one byte of output for every byte of
2911      * input.  This routine expects newSV() to allocate an extra byte for a
2912      * trailing NUL, which this routine will append if it gets to the end of
2913      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2914      * CAPITAL LETTER A}), or more output than input if the constant ends up
2915      * recoded to utf8, but each time a construct is found that might increase
2916      * the needed size, SvGROW() is called.  Its size parameter each time is
2917      * based on the best guess estimate at the time, namely the length used so
2918      * far, plus the length the current construct will occupy, plus room for
2919      * the trailing NUL, plus one byte for every input byte still unscanned */
2920
2921     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2922                        before set */
2923 #ifdef EBCDIC
2924     int backslash_N = 0;            /* ? was the character from \N{} */
2925     int non_portable_endpoint = 0;  /* ? In a range is an endpoint
2926                                        platform-specific like \x65 */
2927 #endif
2928
2929     PERL_ARGS_ASSERT_SCAN_CONST;
2930
2931     assert(PL_lex_inwhat != OP_TRANSR);
2932     if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
2933         /* If we are doing a trans and we know we want UTF8 set expectation */
2934         has_utf8   = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2935         this_utf8  = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2936     }
2937
2938     /* Protect sv from errors and fatal warnings. */
2939     ENTER_with_name("scan_const");
2940     SAVEFREESV(sv);
2941
2942     while (s < send
2943            || dorange   /* Handle tr/// range at right edge of input */
2944     ) {
2945
2946         /* get transliterations out of the way (they're most literal) */
2947         if (PL_lex_inwhat == OP_TRANS) {
2948
2949             /* But there isn't any special handling necessary unless there is a
2950              * range, so for most cases we just drop down and handle the value
2951              * as any other.  There are two exceptions.
2952              *
2953              * 1.  A hyphen indicates that we are actually going to have a
2954              *     range.  In this case, skip the '-', set a flag, then drop
2955              *     down to handle what should be the end range value.
2956              * 2.  After we've handled that value, the next time through, that
2957              *     flag is set and we fix up the range.
2958              *
2959              * Ranges entirely within Latin1 are expanded out entirely, in
2960              * order to make the transliteration a simple table look-up.
2961              * Ranges that extend above Latin1 have to be done differently, so
2962              * there is no advantage to expanding them here, so they are
2963              * stored here as Min, ILLEGAL_UTF8_BYTE, Max.  The illegal byte
2964              * signifies a hyphen without any possible ambiguity.  On EBCDIC
2965              * machines, if the range is expressed as Unicode, the Latin1
2966              * portion is expanded out even if the range extends above
2967              * Latin1.  This is because each code point in it has to be
2968              * processed here individually to get its native translation */
2969
2970             if (! dorange) {
2971
2972                 /* Here, we don't think we're in a range.  If the new character
2973                  * is not a hyphen; or if it is a hyphen, but it's too close to
2974                  * either edge to indicate a range, or if we haven't output any
2975                  * characters yet then it's a regular character. */
2976                 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv)) {
2977
2978                     /* A regular character.  Process like any other, but first
2979                      * clear any flags */
2980                     didrange = FALSE;
2981                     dorange = FALSE;
2982 #ifdef EBCDIC
2983                     non_portable_endpoint = 0;
2984                     backslash_N = 0;
2985 #endif
2986                     /* The tests here for being above Latin1 and similar ones
2987                      * in the following 'else' suffice to find all such
2988                      * occurences in the constant, except those added by a
2989                      * backslash escape sequence, like \x{100}.  Mostly, those
2990                      * set 'has_above_latin1' as appropriate */
2991                     if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
2992                         has_above_latin1 = TRUE;
2993                     }
2994
2995                     /* Drops down to generic code to process current byte */
2996                 }
2997                 else {  /* Is a '-' in the context where it means a range */
2998                     if (didrange) { /* Something like y/A-C-Z// */
2999                         Perl_croak(aTHX_ "Ambiguous range in transliteration"
3000                                          " operator");
3001                     }
3002
3003                     dorange = TRUE;
3004
3005                     s++;    /* Skip past the hyphen */
3006
3007                     /* d now points to where the end-range character will be
3008                      * placed.  Save it so won't have to go finding it later,
3009                      * and drop down to get that character.  (Actually we
3010                      * instead save the offset, to handle the case where a
3011                      * realloc in the meantime could change the actual
3012                      * pointer).  We'll finish processing the range the next
3013                      * time through the loop */
3014                     offset_to_max = d - SvPVX_const(sv);
3015
3016                     if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3017                         has_above_latin1 = TRUE;
3018                     }
3019
3020                     /* Drops down to generic code to process current byte */
3021                 }
3022             }  /* End of not a range */
3023             else {
3024                 /* Here we have parsed a range.  Now must handle it.  At this
3025                  * point:
3026                  * 'sv' is a SV* that contains the output string we are
3027                  *      constructing.  The final two characters in that string
3028                  *      are the range start and range end, in order.
3029                  * 'd'  points to just beyond the range end in the 'sv' string,
3030                  *      where we would next place something
3031                  * 'offset_to_max' is the offset in 'sv' at which the character
3032                  *      (the range's maximum end point) before 'd'  begins.
3033                  */
3034                 char * max_ptr = SvPVX(sv) + offset_to_max;
3035                 char * min_ptr;
3036                 IV range_min;
3037                 IV range_max;   /* last character in range */
3038                 STRLEN grow;
3039                 Size_t offset_to_min = 0;
3040                 Size_t extras = 0;
3041 #ifdef EBCDIC
3042                 bool convert_unicode;
3043                 IV real_range_max = 0;
3044 #endif
3045                 /* Get the code point values of the range ends. */
3046                 if (has_utf8) {
3047                     /* We know the utf8 is valid, because we just constructed
3048                      * it ourselves in previous loop iterations */
3049                     min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3050                     range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3051                     range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3052
3053                     /* This compensates for not all code setting
3054                      * 'has_above_latin1', so that we don't skip stuff that
3055                      * should be executed */
3056                     if (range_max > 255) {
3057                         has_above_latin1 = TRUE;
3058                     }
3059                 }
3060                 else {
3061                     min_ptr = max_ptr - 1;
3062                     range_min = * (U8*) min_ptr;
3063                     range_max = * (U8*) max_ptr;
3064                 }
3065
3066                 /* If the range is just a single code point, like tr/a-a/.../,
3067                  * that code point is already in the output, twice.  We can
3068                  * just back up over the second instance and avoid all the rest
3069                  * of the work.  But if it is a variant character, it's been
3070                  * counted twice, so decrement.  (This unlikely scenario is
3071                  * special cased, like the one for a range of 2 code points
3072                  * below, only because the main-line code below needs a range
3073                  * of 3 or more to work without special casing.  Might as well
3074                  * get it out of the way now.) */
3075                 if (UNLIKELY(range_max == range_min)) {
3076                     d = max_ptr;
3077                     if (! has_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3078                         utf8_variant_count--;
3079                     }
3080                     goto range_done;
3081                 }
3082
3083 #ifdef EBCDIC
3084                 /* On EBCDIC platforms, we may have to deal with portable
3085                  * ranges.  These happen if at least one range endpoint is a
3086                  * Unicode value (\N{...}), or if the range is a subset of
3087                  * [A-Z] or [a-z], and both ends are literal characters,
3088                  * like 'A', and not like \x{C1} */
3089                 convert_unicode =
3090                                cBOOL(backslash_N)   /* \N{} forces Unicode,
3091                                                        hence portable range */
3092                     || (     ! non_portable_endpoint
3093                         && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
3094                            || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3095                 if (convert_unicode) {
3096
3097                     /* Special handling is needed for these portable ranges.
3098                      * They are defined to be in Unicode terms, which includes
3099                      * all the Unicode code points between the end points.
3100                      * Convert to Unicode to get the Unicode range.  Later we
3101                      * will convert each code point in the range back to
3102                      * native.  */
3103                     range_min = NATIVE_TO_UNI(range_min);
3104                     range_max = NATIVE_TO_UNI(range_max);
3105                 }
3106 #endif
3107
3108                 if (range_min > range_max) {
3109 #ifdef EBCDIC
3110                     if (convert_unicode) {
3111                         /* Need to convert back to native for meaningful
3112                          * messages for this platform */
3113                         range_min = UNI_TO_NATIVE(range_min);
3114                         range_max = UNI_TO_NATIVE(range_max);
3115                     }
3116 #endif
3117                     /* Use the characters themselves for the error message if
3118                      * ASCII printables; otherwise some visible representation
3119                      * of them */
3120                     if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3121                         Perl_croak(aTHX_
3122                          "Invalid range \"%c-%c\" in transliteration operator",
3123                          (char)range_min, (char)range_max);
3124                     }
3125 #ifdef EBCDIC
3126                     else if (convert_unicode) {
3127         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3128                         Perl_croak(aTHX_
3129                            "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3130                            UVXf "}\" in transliteration operator",
3131                            range_min, range_max);
3132                     }
3133 #endif
3134                     else {
3135         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3136                         Perl_croak(aTHX_
3137                            "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3138                            " in transliteration operator",
3139                            range_min, range_max);
3140                     }
3141                 }
3142
3143                 /* If the range is exactly two code points long, they are
3144                  * already both in the output */
3145                 if (UNLIKELY(range_min + 1 == range_max)) {
3146                     goto range_done;
3147                 }
3148
3149                 /* Here the range contains at least 3 code points */
3150
3151                 if (has_utf8) {
3152
3153                     /* If everything in the transliteration is below 256, we
3154                      * can avoid special handling later.  A translation table
3155                      * for each of those bytes is created by op.c.  So we
3156                      * expand out all ranges to their constituent code points.
3157                      * But if we've encountered something above 255, the
3158                      * expanding won't help, so skip doing that.  But if it's
3159                      * EBCDIC, we may have to look at each character below 256
3160                      * if we have to convert to/from Unicode values */
3161                     if (   has_above_latin1
3162 #ifdef EBCDIC
3163                         && (range_min > 255 || ! convert_unicode)
3164 #endif
3165                     ) {
3166                         /* Move the high character one byte to the right; then
3167                          * insert between it and the range begin, an illegal
3168                          * byte which serves to indicate this is a range (using
3169                          * a '-' would be ambiguous). */
3170                         char *e = d++;
3171                         while (e-- > max_ptr) {
3172                             *(e + 1) = *e;
3173                         }
3174                         *(e + 1) = (char) ILLEGAL_UTF8_BYTE;
3175                         goto range_done;
3176                     }
3177
3178                     /* Here, we're going to expand out the range.  For EBCDIC
3179                      * the range can extend above 255 (not so in ASCII), so
3180                      * for EBCDIC, split it into the parts above and below
3181                      * 255/256 */
3182 #ifdef EBCDIC
3183                     if (range_max > 255) {
3184                         real_range_max = range_max;
3185                         range_max = 255;
3186                     }
3187 #endif
3188                 }
3189
3190                 /* Here we need to expand out the string to contain each
3191                  * character in the range.  Grow the output to handle this.
3192                  * For non-UTF8, we need a byte for each code point in the
3193                  * range, minus the three that we've already allocated for: the
3194                  * hyphen, the min, and the max.  For UTF-8, we need this
3195                  * plus an extra byte for each code point that occupies two
3196                  * bytes (is variant) when in UTF-8 (except we've already
3197                  * allocated for the end points, including if they are
3198                  * variants).  For ASCII platforms and Unicode ranges on EBCDIC
3199                  * platforms, it's easy to calculate a precise number.  To
3200                  * start, we count the variants in the range, which we need
3201                  * elsewhere in this function anyway.  (For the case where it
3202                  * isn't easy to calculate, 'extras' has been initialized to 0,
3203                  * and the calculation is done in a loop further down.) */
3204 #ifdef EBCDIC
3205                 if (convert_unicode)
3206 #endif
3207                 {
3208                     /* This is executed unconditionally on ASCII, and for
3209                      * Unicode ranges on EBCDIC.  Under these conditions, all
3210                      * code points above a certain value are variant; and none
3211                      * under that value are.  We just need to find out how much
3212                      * of the range is above that value.  We don't count the
3213                      * end points here, as they will already have been counted
3214                      * as they were parsed. */
3215                     if (range_min >= UTF_CONTINUATION_MARK) {
3216
3217                         /* The whole range is made up of variants */
3218                         extras = (range_max - 1) - (range_min + 1) + 1;
3219                     }
3220                     else if (range_max >= UTF_CONTINUATION_MARK) {
3221
3222                         /* Only the higher portion of the range is variants */
3223                         extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3224                     }
3225
3226                     utf8_variant_count += extras;
3227                 }
3228
3229                 /* The base growth is the number of code points in the range,
3230                  * not including the endpoints, which have already been sized
3231                  * for (and output).  We don't subtract for the hyphen, as it
3232                  * has been parsed but not output, and the SvGROW below is
3233                  * based only on what's been output plus what's left to parse.
3234                  * */
3235                 grow = (range_max - 1) - (range_min + 1) + 1;
3236
3237                 if (has_utf8) {
3238 #ifdef EBCDIC
3239                     /* In some cases in EBCDIC, we haven't yet calculated a
3240                      * precise amount needed for the UTF-8 variants.  Just
3241                      * assume the worst case, that everything will expand by a
3242                      * byte */
3243                     if (! convert_unicode) {
3244                         grow *= 2;
3245                     }
3246                     else
3247 #endif
3248                     {
3249                         /* Otherwise we know exactly how many variants there
3250                          * are in the range. */
3251                         grow += extras;
3252                     }
3253                 }
3254
3255                 /* Grow, but position the output to overwrite the range min end
3256                  * point, because in some cases we overwrite that */
3257                 SvCUR_set(sv, d - SvPVX_const(sv));
3258                 offset_to_min = min_ptr - SvPVX_const(sv);
3259
3260                 /* See Note on sizing above. */
3261                 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3262                                              + (send - s)
3263                                              + grow
3264                                              + 1 /* Trailing NUL */ );
3265
3266                 /* Now, we can expand out the range. */
3267 #ifdef EBCDIC
3268                 if (convert_unicode) {
3269                     SSize_t i;
3270
3271                     /* Recall that the min and max are now in Unicode terms, so
3272                      * we have to convert each character to its native
3273                      * equivalent */
3274                     if (has_utf8) {
3275                         for (i = range_min; i <= range_max; i++) {
3276                             append_utf8_from_native_byte(
3277                                                     LATIN1_TO_NATIVE((U8) i),
3278                                                     (U8 **) &d);
3279                         }
3280                     }
3281                     else {
3282                         for (i = range_min; i <= range_max; i++) {
3283                             *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3284                         }
3285                     }
3286                 }
3287                 else
3288 #endif
3289                 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3290                 {
3291                     /* Here, no conversions are necessary, which means that the
3292                      * first character in the range is already in 'd' and
3293                      * valid, so we can skip overwriting it */
3294                     if (has_utf8) {
3295                         SSize_t i;
3296                         d += UTF8SKIP(d);
3297                         for (i = range_min + 1; i <= range_max; i++) {
3298                             append_utf8_from_native_byte((U8) i, (U8 **) &d);
3299                         }
3300                     }
3301                     else {
3302                         SSize_t i;
3303                         d++;
3304                         assert(range_min + 1 <= range_max);
3305                         for (i = range_min + 1; i < range_max; i++) {
3306 #ifdef EBCDIC
3307                             /* In this case on EBCDIC, we haven't calculated
3308                              * the variants.  Do it here, as we go along */
3309                             if (! UVCHR_IS_INVARIANT(i)) {
3310                                 utf8_variant_count++;
3311                             }
3312 #endif
3313                             *d++ = (char)i;
3314                         }
3315
3316                         /* The range_max is done outside the loop so as to
3317                          * avoid having to special case not incrementing
3318                          * 'utf8_variant_count' on EBCDIC (it's already been
3319                          * counted when originally parsed) */
3320                         *d++ = (char) range_max;
3321                     }
3322                 }
3323
3324 #ifdef EBCDIC
3325                 /* If the original range extended above 255, add in that
3326                  * portion. */
3327                 if (real_range_max) {
3328                     *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3329                     *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3330                     if (real_range_max > 0x100) {
3331                         if (real_range_max > 0x101) {
3332                             *d++ = (char) ILLEGAL_UTF8_BYTE;
3333                         }
3334                         d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3335                     }
3336                 }
3337 #endif
3338
3339               range_done:
3340                 /* mark the range as done, and continue */
3341                 didrange = TRUE;
3342                 dorange = FALSE;
3343 #ifdef EBCDIC
3344                 non_portable_endpoint = 0;
3345                 backslash_N = 0;
3346 #endif
3347                 continue;
3348             } /* End of is a range */
3349         } /* End of transliteration.  Joins main code after these else's */
3350         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3351             char *s1 = s-1;
3352             int esc = 0;
3353             while (s1 >= start && *s1-- == '\\')
3354                 esc = !esc;
3355             if (!esc)
3356                 in_charclass = TRUE;
3357         }
3358         else if (*s == ']' && PL_lex_inpat && in_charclass) {
3359             char *s1 = s-1;
3360             int esc = 0;
3361             while (s1 >= start && *s1-- == '\\')
3362                 esc = !esc;
3363             if (!esc)
3364                 in_charclass = FALSE;
3365         }
3366             /* skip for regexp comments /(?#comment)/, except for the last
3367              * char, which will be done separately.  Stop on (?{..}) and
3368              * friends */
3369         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3370             if (s[2] == '#') {
3371                 while (s+1 < send && *s != ')')
3372                     *d++ = *s++;
3373             }
3374             else if (!PL_lex_casemods
3375                      && (    s[2] == '{' /* This should match regcomp.c */
3376                          || (s[2] == '?' && s[3] == '{')))
3377             {
3378                 break;
3379             }
3380         }
3381             /* likewise skip #-initiated comments in //x patterns */
3382         else if (*s == '#'
3383                  && PL_lex_inpat
3384                  && !in_charclass
3385                  && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3386         {
3387             while (s < send && *s != '\n')
3388                 *d++ = *s++;
3389         }
3390             /* no further processing of single-quoted regex */
3391         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3392             goto default_action;
3393
3394             /* check for embedded arrays
3395              * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3396              */
3397         else if (*s == '@' && s[1]) {
3398             if (UTF
3399                ? isIDFIRST_utf8_safe(s+1, send)
3400                : isWORDCHAR_A(s[1]))
3401             {
3402                 break;
3403             }
3404             if (strchr(":'{$", s[1]))
3405                 break;
3406             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3407                 break; /* in regexp, neither @+ nor @- are interpolated */
3408         }
3409             /* check for embedded scalars.  only stop if we're sure it's a
3410              * variable.  */
3411         else if (*s == '$') {
3412             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3413                 break;
3414             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3415                 if (s[1] == '\\') {
3416                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3417                                    "Possible unintended interpolation of $\\ in regex");
3418                 }
3419                 break;          /* in regexp, $ might be tail anchor */
3420             }
3421         }
3422
3423         /* End of else if chain - OP_TRANS rejoin rest */
3424
3425         if (UNLIKELY(s >= send)) {
3426             assert(s == send);
3427             break;
3428         }
3429
3430         /* backslashes */
3431         if (*s == '\\' && s+1 < send) {
3432             char* e;    /* Can be used for ending '}', etc. */
3433
3434             s++;
3435
3436             /* warn on \1 - \9 in substitution replacements, but note that \11
3437              * is an octal; and \19 is \1 followed by '9' */
3438             if (PL_lex_inwhat == OP_SUBST
3439                 && !PL_lex_inpat
3440                 && isDIGIT(*s)
3441                 && *s != '0'
3442                 && !isDIGIT(s[1]))
3443             {
3444                 /* diag_listed_as: \%d better written as $%d */
3445                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3446                 *--s = '$';
3447                 break;
3448             }
3449
3450             /* string-change backslash escapes */
3451             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3452                 --s;
3453                 break;
3454             }
3455             /* In a pattern, process \N, but skip any other backslash escapes.
3456              * This is because we don't want to translate an escape sequence
3457              * into a meta symbol and have the regex compiler use the meta
3458              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3459              * in spite of this, we do have to process \N here while the proper
3460              * charnames handler is in scope.  See bugs #56444 and #62056.
3461              *
3462              * There is a complication because \N in a pattern may also stand
3463              * for 'match a non-nl', and not mean a charname, in which case its
3464              * processing should be deferred to the regex compiler.  To be a
3465              * charname it must be followed immediately by a '{', and not look
3466              * like \N followed by a curly quantifier, i.e., not something like
3467              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3468              * quantifier */
3469             else if (PL_lex_inpat
3470                     && (*s != 'N'
3471                         || s[1] != '{'
3472                         || regcurly(s + 1)))
3473             {
3474                 *d++ = '\\';
3475                 goto default_action;
3476             }
3477
3478             switch (*s) {
3479             default:
3480                 {
3481                     if ((isALPHANUMERIC(*s)))
3482                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3483                                        "Unrecognized escape \\%c passed through",
3484                                        *s);
3485                     /* default action is to copy the quoted character */
3486                     goto default_action;
3487                 }
3488
3489             /* eg. \132 indicates the octal constant 0132 */
3490             case '0': case '1': case '2': case '3':
3491             case '4': case '5': case '6': case '7':
3492                 {
3493                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3494                     STRLEN len = 3;
3495                     uv = grok_oct(s, &len, &flags, NULL);
3496                     s += len;
3497                     if (len < 3 && s < send && isDIGIT(*s)
3498                         && ckWARN(WARN_MISC))
3499                     {
3500                         Perl_warner(aTHX_ packWARN(WARN_MISC),
3501                                     "%s", form_short_octal_warning(s, len));
3502                     }
3503                 }
3504                 goto NUM_ESCAPE_INSERT;
3505
3506             /* eg. \o{24} indicates the octal constant \024 */
3507             case 'o':
3508                 {
3509                     const char* error;
3510
3511                     bool valid = grok_bslash_o(&s, PL_bufend,
3512                                                &uv, &error,
3513                                                TRUE, /* Output warning */
3514                                                FALSE, /* Not strict */
3515                                                TRUE, /* Output warnings for
3516                                                          non-portables */
3517                                                UTF);
3518                     if (! valid) {
3519                         yyerror(error);
3520                         uv = 0; /* drop through to ensure range ends are set */
3521                     }
3522                     goto NUM_ESCAPE_INSERT;
3523                 }
3524
3525             /* eg. \x24 indicates the hex constant 0x24 */
3526             case 'x':
3527                 {
3528                     const char* error;
3529
3530                     bool valid = grok_bslash_x(&s, PL_bufend,
3531                                                &uv, &error,
3532                                                TRUE, /* Output warning */
3533                                                FALSE, /* Not strict */
3534                                                TRUE,  /* Output warnings for
3535                                                          non-portables */
3536                                                UTF);
3537                     if (! valid) {
3538                         yyerror(error);
3539                         uv = 0; /* drop through to ensure range ends are set */
3540                     }
3541                 }
3542
3543               NUM_ESCAPE_INSERT:
3544                 /* Insert oct or hex escaped character. */
3545
3546                 /* Here uv is the ordinal of the next character being added */
3547                 if (UVCHR_IS_INVARIANT(uv)) {
3548                     *d++ = (char) uv;
3549                 }
3550                 else {
3551                     if (!has_utf8 && uv > 255) {
3552
3553                         /* Here, 'uv' won't fit unless we convert to UTF-8.
3554                          * If we've only seen invariants so far, all we have to
3555                          * do is turn on the flag */
3556                         if (utf8_variant_count == 0) {
3557                             SvUTF8_on(sv);
3558                         }
3559                         else {
3560                             SvCUR_set(sv, d - SvPVX_const(sv));
3561                             SvPOK_on(sv);
3562                             *d = '\0';
3563
3564                             sv_utf8_upgrade_flags_grow(
3565                                            sv,
3566                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3567
3568                                            /* Since we're having to grow here,
3569                                             * make sure we have enough room for
3570                                             * this escape and a NUL, so the
3571                                             * code immediately below won't have
3572                                             * to actually grow again */
3573                                           UVCHR_SKIP(uv)
3574                                         + (STRLEN)(send - s) + 1);
3575                             d = SvPVX(sv) + SvCUR(sv);
3576                         }
3577
3578                         has_above_latin1 = TRUE;
3579                         has_utf8 = TRUE;
3580                     }
3581
3582                     if (! has_utf8) {
3583                         *d++ = (char)uv;
3584                         utf8_variant_count++;
3585                     }
3586                     else {
3587                        /* Usually, there will already be enough room in 'sv'
3588                         * since such escapes are likely longer than any UTF-8
3589                         * sequence they can end up as.  This isn't the case on
3590                         * EBCDIC where \x{40000000} contains 12 bytes, and the
3591                         * UTF-8 for it contains 14.  And, we have to allow for
3592                         * a trailing NUL.  It probably can't happen on ASCII
3593                         * platforms, but be safe.  See Note on sizing above. */
3594                         const STRLEN needed = d - SvPVX(sv)
3595                                             + UVCHR_SKIP(uv)
3596                                             + (send - s)
3597                                             + 1;
3598                         if (UNLIKELY(needed > SvLEN(sv))) {
3599                             SvCUR_set(sv, d - SvPVX_const(sv));
3600                             d = SvCUR(sv) + SvGROW(sv, needed);
3601                         }
3602
3603                         d = (char*)uvchr_to_utf8((U8*)d, uv);
3604                         if (PL_lex_inwhat == OP_TRANS
3605                             && PL_parser->lex_sub_op)
3606                         {
3607                             PL_parser->lex_sub_op->op_private |=
3608                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3609                                              : OPpTRANS_TO_UTF);
3610                         }
3611                     }
3612                 }
3613 #ifdef EBCDIC
3614                 non_portable_endpoint++;
3615 #endif
3616                 continue;
3617
3618             case 'N':
3619                 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3620                  * named character, like \N{LATIN SMALL LETTER A}, or a named
3621                  * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3622                  * GRAVE} (except y/// can't handle the latter, croaking).  For
3623                  * convenience all three forms are referred to as "named
3624                  * characters" below.
3625                  *
3626                  * For patterns, \N also can mean to match a non-newline.  Code
3627                  * before this 'switch' statement should already have handled
3628                  * this situation, and hence this code only has to deal with
3629                  * the named character cases.
3630                  *
3631                  * For non-patterns, the named characters are converted to
3632                  * their string equivalents.  In patterns, named characters are
3633                  * not converted to their ultimate forms for the same reasons
3634                  * that other escapes aren't (mainly that the ultimate
3635                  * character could be considered a meta-symbol by the regex
3636                  * compiler).  Instead, they are converted to the \N{U+...}
3637                  * form to get the value from the charnames that is in effect
3638                  * right now, while preserving the fact that it was a named
3639                  * character, so that the regex compiler knows this.
3640                  *
3641                  * The structure of this section of code (besides checking for
3642                  * errors and upgrading to utf8) is:
3643                  *    If the named character is of the form \N{U+...}, pass it
3644                  *      through if a pattern; otherwise convert the code point
3645                  *      to utf8
3646                  *    Otherwise must be some \N{NAME}: convert to
3647                  *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3648                  *
3649                  * Transliteration is an exception.  The conversion to utf8 is
3650                  * only done if the code point requires it to be representable.
3651                  *
3652                  * Here, 's' points to the 'N'; the test below is guaranteed to
3653                  * succeed if we are being called on a pattern, as we already
3654                  * know from a test above that the next character is a '{'.  A
3655                  * non-pattern \N must mean 'named character', which requires
3656                  * braces */
3657                 s++;
3658                 if (*s != '{') {
3659                     yyerror("Missing braces on \\N{}");
3660                     *d++ = '\0';
3661                     continue;
3662                 }
3663                 s++;
3664
3665                 /* If there is no matching '}', it is an error. */
3666                 if (! (e = (char *) memchr(s, '}', send - s))) {
3667                     if (! PL_lex_inpat) {
3668                         yyerror("Missing right brace on \\N{}");
3669                     } else {
3670                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3671                     }
3672                     yyquit(); /* Have exhausted the input. */
3673                 }
3674
3675                 /* Here it looks like a named character */
3676
3677                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3678                     s += 2;         /* Skip to next char after the 'U+' */
3679                     if (PL_lex_inpat) {
3680
3681                         /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3682                         /* Check the syntax.  */
3683                         const char *orig_s;
3684                         orig_s = s - 5;
3685                         if (!isXDIGIT(*s)) {
3686                           bad_NU:
3687                             yyerror(
3688                                 "Invalid hexadecimal number in \\N{U+...}"
3689                             );
3690                             s = e + 1;
3691                             *d++ = '\0';
3692                             continue;
3693                         }
3694                         while (++s < e) {
3695                             if (isXDIGIT(*s))
3696                                 continue;
3697                             else if ((*s == '.' || *s == '_')
3698                                   && isXDIGIT(s[1]))
3699                                 continue;
3700                             goto bad_NU;
3701                         }
3702
3703                         /* Pass everything through unchanged.
3704                          * +1 is for the '}' */
3705                         Copy(orig_s, d, e - orig_s + 1, char);
3706                         d += e - orig_s + 1;
3707                     }
3708                     else {  /* Not a pattern: convert the hex to string */
3709                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3710                                 | PERL_SCAN_SILENT_ILLDIGIT
3711                                 | PERL_SCAN_DISALLOW_PREFIX;
3712                         STRLEN len = e - s;
3713                         uv = grok_hex(s, &len, &flags, NULL);
3714                         if (len == 0 || (len != (STRLEN)(e - s)))
3715                             goto bad_NU;
3716
3717                          /* For non-tr///, if the destination is not in utf8,
3718                           * unconditionally recode it to be so.  This is
3719                           * because \N{} implies Unicode semantics, and scalars
3720                           * have to be in utf8 to guarantee those semantics.
3721                           * tr/// doesn't care about Unicode rules, so no need
3722                           * there to upgrade to UTF-8 for small enough code
3723                           * points */
3724                         if (! has_utf8 && (   uv > 0xFF
3725                                            || PL_lex_inwhat != OP_TRANS))
3726                         {
3727                             /* See Note on sizing above.  */
3728                             const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1;
3729
3730                             SvCUR_set(sv, d - SvPVX_const(sv));
3731                             SvPOK_on(sv);
3732                             *d = '\0';
3733
3734                             if (utf8_variant_count == 0) {
3735                                 SvUTF8_on(sv);
3736                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3737                             }
3738                             else {
3739                                 sv_utf8_upgrade_flags_grow(
3740                                                sv,
3741                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3742                                                extra);
3743                                 d = SvPVX(sv) + SvCUR(sv);
3744                             }
3745
3746                             has_utf8 = TRUE;
3747                             has_above_latin1 = TRUE;
3748                         }
3749
3750                         /* Add the (Unicode) code point to the output. */
3751                         if (! has_utf8 || OFFUNI_IS_INVARIANT(uv)) {
3752                             *d++ = (char) LATIN1_TO_NATIVE(uv);
3753                         }
3754                         else {
3755                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3756                         }
3757                     }
3758                 }
3759                 else /* Here is \N{NAME} but not \N{U+...}. */
3760                      if ((res = get_and_check_backslash_N_name(s, e)))
3761                 {
3762                     STRLEN len;
3763                     const char *str = SvPV_const(res, len);
3764                     if (PL_lex_inpat) {
3765
3766                         if (! len) { /* The name resolved to an empty string */
3767                             Copy("\\N{}", d, 4, char);
3768                             d += 4;
3769                         }
3770                         else {
3771                             /* In order to not lose information for the regex
3772                             * compiler, pass the result in the specially made
3773                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3774                             * the code points in hex of each character
3775                             * returned by charnames */
3776
3777                             const char *str_end = str + len;
3778                             const STRLEN off = d - SvPVX_const(sv);
3779
3780                             if (! SvUTF8(res)) {
3781                                 /* For the non-UTF-8 case, we can determine the
3782                                  * exact length needed without having to parse
3783                                  * through the string.  Each character takes up
3784                                  * 2 hex digits plus either a trailing dot or
3785                                  * the "}" */
3786                                 const char initial_text[] = "\\N{U+";
3787                                 const STRLEN initial_len = sizeof(initial_text)
3788                                                            - 1;
3789                                 d = off + SvGROW(sv, off
3790                                                     + 3 * len
3791
3792                                                     /* +1 for trailing NUL */
3793                                                     + initial_len + 1
3794
3795                                                     + (STRLEN)(send - e));
3796                                 Copy(initial_text, d, initial_len, char);
3797                                 d += initial_len;
3798                                 while (str < str_end) {
3799                                     char hex_string[4];
3800                                     int len =
3801                                         my_snprintf(hex_string,
3802                                                   sizeof(hex_string),
3803                                                   "%02X.",
3804
3805                                                   /* The regex compiler is
3806                                                    * expecting Unicode, not
3807                                                    * native */
3808                                                   NATIVE_TO_LATIN1(*str));
3809                                     PERL_MY_SNPRINTF_POST_GUARD(len,
3810                                                            sizeof(hex_string));
3811                                     Copy(hex_string, d, 3, char);
3812                                     d += 3;
3813                                     str++;
3814                                 }
3815                                 d--;    /* Below, we will overwrite the final
3816                                            dot with a right brace */
3817                             }
3818                             else {
3819                                 STRLEN char_length; /* cur char's byte length */
3820
3821                                 /* and the number of bytes after this is
3822                                  * translated into hex digits */
3823                                 STRLEN output_length;
3824
3825                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3826                                  * for max('U+', '.'); and 1 for NUL */
3827                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3828
3829                                 /* Get the first character of the result. */
3830                                 U32 uv = utf8n_to_uvchr((U8 *) str,
3831                                                         len,
3832                                                         &char_length,
3833                                                         UTF8_ALLOW_ANYUV);
3834                                 /* Convert first code point to Unicode hex,
3835                                  * including the boiler plate before it. */
3836                                 output_length =
3837                                     my_snprintf(hex_string, sizeof(hex_string),
3838                                              "\\N{U+%X",
3839                                              (unsigned int) NATIVE_TO_UNI(uv));
3840
3841                                 /* Make sure there is enough space to hold it */
3842                                 d = off + SvGROW(sv, off
3843                                                     + output_length
3844                                                     + (STRLEN)(send - e)
3845                                                     + 2);       /* '}' + NUL */
3846                                 /* And output it */
3847                                 Copy(hex_string, d, output_length, char);
3848                                 d += output_length;
3849
3850                                 /* For each subsequent character, append dot and
3851                                 * its Unicode code point in hex */
3852                                 while ((str += char_length) < str_end) {
3853                                     const STRLEN off = d - SvPVX_const(sv);
3854                                     U32 uv = utf8n_to_uvchr((U8 *) str,
3855                                                             str_end - str,
3856                                                             &char_length,
3857                                                             UTF8_ALLOW_ANYUV);
3858                                     output_length =
3859                                         my_snprintf(hex_string,
3860                                              sizeof(hex_string),
3861                                              ".%X",
3862                                              (unsigned int) NATIVE_TO_UNI(uv));
3863
3864                                     d = off + SvGROW(sv, off
3865                                                         + output_length
3866                                                         + (STRLEN)(send - e)
3867                                                         + 2);   /* '}' +  NUL */
3868                                     Copy(hex_string, d, output_length, char);
3869                                     d += output_length;
3870                                 }
3871                             }
3872
3873                             *d++ = '}'; /* Done.  Add the trailing brace */
3874                         }
3875                     }
3876                     else { /* Here, not in a pattern.  Convert the name to a
3877                             * string. */
3878
3879                         if (PL_lex_inwhat == OP_TRANS) {
3880                             str = SvPV_const(res, len);
3881                             if (len > ((SvUTF8(res))
3882                                        ? UTF8SKIP(str)
3883                                        : 1U))
3884                             {
3885                                 yyerror(Perl_form(aTHX_
3886                                     "%.*s must not be a named sequence"
3887                                     " in transliteration operator",
3888                                         /*  +1 to include the "}" */
3889                                     (int) (e + 1 - start), start));
3890                                 *d++ = '\0';
3891                                 goto end_backslash_N;
3892                             }
3893
3894                             if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
3895                                 has_above_latin1 = TRUE;
3896                             }
3897
3898                         }
3899                         else if (! SvUTF8(res)) {
3900                             /* Make sure \N{} return is UTF-8.  This is because
3901                              * \N{} implies Unicode semantics, and scalars have
3902                              * to be in utf8 to guarantee those semantics; but
3903                              * not needed in tr/// */
3904                             sv_utf8_upgrade_flags(res, 0);
3905                             str = SvPV_const(res, len);
3906                         }
3907
3908                          /* Upgrade destination to be utf8 if this new
3909                           * component is */
3910                         if (! has_utf8 && SvUTF8(res)) {
3911                             /* See Note on sizing above.  */
3912                             const STRLEN extra = len + (send - s) + 1;
3913
3914                             SvCUR_set(sv, d - SvPVX_const(sv));
3915                             SvPOK_on(sv);
3916                             *d = '\0';
3917
3918                             if (utf8_variant_count == 0) {
3919                                 SvUTF8_on(sv);
3920                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3921                             }
3922                             else {
3923                                 sv_utf8_upgrade_flags_grow(sv,
3924                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3925                                                 extra);
3926                                 d = SvPVX(sv) + SvCUR(sv);
3927                             }
3928                             has_utf8 = TRUE;
3929                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3930
3931                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3932                              * set correctly here). */
3933                             const STRLEN extra = len + (send - e) + 1;
3934                             const STRLEN off = d - SvPVX_const(sv);
3935                             d = off + SvGROW(sv, off + extra);
3936                         }
3937                         Copy(str, d, len, char);
3938                         d += len;
3939                     }
3940
3941                     SvREFCNT_dec(res);
3942
3943                 } /* End \N{NAME} */
3944
3945               end_backslash_N:
3946 #ifdef EBCDIC
3947                 backslash_N++; /* \N{} is defined to be Unicode */
3948 #endif
3949                 s = e + 1;  /* Point to just after the '}' */
3950                 continue;
3951
3952             /* \c is a control character */
3953             case 'c':
3954                 s++;
3955                 if (s < send) {
3956                     *d++ = grok_bslash_c(*s, 1);
3957                 }
3958                 else {
3959                     yyerror("Missing control char name in \\c");
3960                     yyquit();   /* Are at end of input, no sense continuing */
3961                 }
3962 #ifdef EBCDIC
3963                 non_portable_endpoint++;
3964 #endif
3965                 break;
3966
3967             /* printf-style backslashes, formfeeds, newlines, etc */
3968             case 'b':
3969                 *d++ = '\b';
3970                 break;
3971             case 'n':
3972                 *d++ = '\n';
3973                 break;
3974             case 'r':
3975                 *d++ = '\r';
3976                 break;
3977             case 'f':
3978                 *d++ = '\f';
3979                 break;
3980             case 't':
3981                 *d++ = '\t';
3982                 break;
3983             case 'e':
3984                 *d++ = ESC_NATIVE;
3985                 break;
3986             case 'a':
3987                 *d++ = '\a';
3988                 break;
3989             } /* end switch */
3990
3991             s++;
3992             continue;
3993         } /* end if (backslash) */
3994
3995     default_action:
3996         /* Just copy the input to the output, though we may have to convert
3997          * to/from UTF-8.
3998          *
3999          * If the input has the same representation in UTF-8 as not, it will be
4000          * a single byte, and we don't care about UTF8ness; just copy the byte */
4001         if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4002             *d++ = *s++;
4003         }
4004         else if (! this_utf8 && ! has_utf8) {
4005             /* If neither source nor output is UTF-8, is also a single byte,
4006              * just copy it; but this byte counts should we later have to
4007              * convert to UTF-8 */
4008             *d++ = *s++;
4009             utf8_variant_count++;
4010         }
4011         else if (this_utf8 && has_utf8) {   /* Both UTF-8, can just copy */
4012             const STRLEN len = UTF8SKIP(s);
4013
4014             /* We expect the source to have already been checked for
4015              * malformedness */
4016             assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4017
4018             Copy(s, d, len, U8);
4019             d += len;
4020             s += len;
4021         }
4022         else { /* UTF8ness matters and doesn't match, need to convert */
4023             STRLEN len = 1;
4024             const UV nextuv   = (this_utf8)
4025                                 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
4026                                 : (UV) ((U8) *s);
4027             STRLEN need = UVCHR_SKIP(nextuv);
4028
4029             if (!has_utf8) {
4030                 SvCUR_set(sv, d - SvPVX_const(sv));
4031                 SvPOK_on(sv);
4032                 *d = '\0';
4033
4034                 /* See Note on sizing above. */
4035                 need += (STRLEN)(send - s) + 1;
4036
4037                 if (utf8_variant_count == 0) {
4038                     SvUTF8_on(sv);
4039                     d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4040                 }
4041                 else {
4042                     sv_utf8_upgrade_flags_grow(sv,
4043                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4044                                                need);
4045                     d = SvPVX(sv) + SvCUR(sv);
4046                 }
4047                 has_utf8 = TRUE;
4048             } else if (need > len) {
4049                 /* encoded value larger than old, may need extra space (NOTE:
4050                  * SvCUR() is not set correctly here).   See Note on sizing
4051                  * above.  */
4052                 const STRLEN extra = need + (send - s) + 1;
4053                 const STRLEN off = d - SvPVX_const(sv);
4054                 d = off + SvGROW(sv, off + extra);
4055             }
4056             s += len;
4057
4058             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
4059         }
4060     } /* while loop to process each character */
4061
4062     /* terminate the string and set up the sv */
4063     *d = '\0';
4064     SvCUR_set(sv, d - SvPVX_const(sv));
4065     if (SvCUR(sv) >= SvLEN(sv))
4066         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %" UVuf
4067                    " >= %" UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
4068
4069     SvPOK_on(sv);
4070     if (has_utf8) {
4071         SvUTF8_on(sv);
4072         if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
4073             PL_parser->lex_sub_op->op_private |=
4074                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
4075         }
4076     }
4077
4078     /* shrink the sv if we allocated more than we used */
4079     if (SvCUR(sv) + 5 < SvLEN(sv)) {
4080         SvPV_shrink_to_cur(sv);
4081     }
4082
4083     /* return the substring (via pl_yylval) only if we parsed anything */
4084     if (s > start) {
4085         char *s2 = start;
4086         for (; s2 < s; s2++) {
4087             if (*s2 == '\n')
4088                 COPLINE_INC_WITH_HERELINES;
4089         }
4090         SvREFCNT_inc_simple_void_NN(sv);
4091         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4092             && ! PL_parser->lex_re_reparsing)
4093         {
4094             const char *const key = PL_lex_inpat ? "qr" : "q";
4095             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4096             const char *type;
4097             STRLEN typelen;
4098
4099             if (PL_lex_inwhat == OP_TRANS) {
4100                 type = "tr";
4101                 typelen = 2;
4102             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4103                 type = "s";
4104                 typelen = 1;
4105             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4106                 type = "q";
4107                 typelen = 1;
4108             } else  {
4109                 type = "qq";
4110                 typelen = 2;
4111             }
4112
4113             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4114                                 type, typelen);
4115         }
4116         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4117     }
4118     LEAVE_with_name("scan_const");
4119     return s;
4120 }
4121
4122 /* S_intuit_more
4123  * Returns TRUE if there's more to the expression (e.g., a subscript),
4124  * FALSE otherwise.
4125  *
4126  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4127  *
4128  * ->[ and ->{ return TRUE
4129  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4130  * { and [ outside a pattern are always subscripts, so return TRUE
4131  * if we're outside a pattern and it's not { or [, then return FALSE
4132  * if we're in a pattern and the first char is a {
4133  *   {4,5} (any digits around the comma) returns FALSE
4134  * if we're in a pattern and the first char is a [
4135  *   [] returns FALSE
4136  *   [SOMETHING] has a funky algorithm to decide whether it's a
4137  *      character class or not.  It has to deal with things like
4138  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4139  * anything else returns TRUE
4140  */
4141
4142 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4143
4144 STATIC int
4145 S_intuit_more(pTHX_ char *s, char *e)
4146 {
4147     PERL_ARGS_ASSERT_INTUIT_MORE;
4148
4149     if (PL_lex_brackets)
4150         return TRUE;
4151     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4152         return TRUE;
4153     if (*s == '-' && s[1] == '>'
4154      && FEATURE_POSTDEREF_QQ_IS_ENABLED
4155      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4156         ||(s[2] == '@' && strchr("*[{",s[3])) ))
4157         return TRUE;
4158     if (*s != '{' && *s != '[')
4159         return FALSE;
4160     if (!PL_lex_inpat)
4161         return TRUE;
4162
4163     /* In a pattern, so maybe we have {n,m}. */
4164     if (*s == '{') {
4165         if (regcurly(s)) {
4166             return FALSE;
4167         }
4168         return TRUE;
4169     }
4170
4171     /* On the other hand, maybe we have a character class */
4172
4173     s++;
4174     if (*s == ']' || *s == '^')
4175         return FALSE;
4176     else {
4177         /* this is terrifying, and it works */
4178         int weight;
4179         char seen[256];
4180         const char * const send = (char *) memchr(s, ']', e - s);
4181         unsigned char un_char, last_un_char;
4182         char tmpbuf[sizeof PL_tokenbuf * 4];
4183
4184         if (!send)              /* has to be an expression */
4185             return TRUE;
4186         weight = 2;             /* let's weigh the evidence */
4187
4188         if (*s == '$')
4189             weight -= 3;
4190         else if (isDIGIT(*s)) {
4191             if (s[1] != ']') {
4192                 if (isDIGIT(s[1]) && s[2] == ']')
4193                     weight -= 10;
4194             }
4195             else
4196                 weight -= 100;
4197         }
4198         Zero(seen,256,char);
4199         un_char = 255;
4200         for (; s < send; s++) {
4201             last_un_char = un_char;
4202             un_char = (unsigned char)*s;
4203             switch (*s) {
4204             case '@':
4205             case '&':
4206             case '$':
4207                 weight -= seen[un_char] * 10;
4208                 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4209                     int len;
4210                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4211                     len = (int)strlen(tmpbuf);
4212                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4213                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
4214                         weight -= 100;
4215                     else
4216                         weight -= 10;
4217                 }
4218                 else if (*s == '$'
4219                          && s[1]
4220                          && strchr("[#!%*<>()-=",s[1]))
4221                 {
4222                     if (/*{*/ strchr("])} =",s[2]))
4223                         weight -= 10;
4224                     else
4225                         weight -= 1;
4226                 }
4227                 break;
4228             case '\\':
4229                 un_char = 254;
4230                 if (s[1]) {
4231                     if (strchr("wds]",s[1]))
4232                         weight += 100;
4233                     else if (seen[(U8)'\''] || seen[(U8)'"'])
4234                         weight += 1;
4235                     else if (strchr("rnftbxcav",s[1]))
4236                         weight += 40;
4237                     else if (isDIGIT(s[1])) {
4238                         weight += 40;
4239                         while (s[1] && isDIGIT(s[1]))
4240                             s++;
4241                     }
4242                 }
4243                 else
4244                     weight += 100;
4245                 break;
4246             case '-':
4247                 if (s[1] == '\\')
4248                     weight += 50;
4249                 if (strchr("aA01! ",last_un_char))
4250                     weight += 30;
4251                 if (strchr("zZ79~",s[1]))
4252                     weight += 30;
4253                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4254                     weight -= 5;        /* cope with negative subscript */
4255                 break;
4256             default:
4257                 if (!isWORDCHAR(last_un_char)
4258                     && !(last_un_char == '$' || last_un_char == '@'
4259                          || last_un_char == '&')
4260                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4261                     char *d = s;
4262                     while (isALPHA(*s))
4263                         s++;
4264                     if (keyword(d, s - d, 0))
4265                         weight -= 150;
4266                 }
4267                 if (un_char == last_un_char + 1)
4268                     weight += 5;
4269                 weight -= seen[un_char];
4270                 break;
4271             }
4272             seen[un_char]++;
4273         }
4274         if (weight >= 0)        /* probably a character class */
4275             return FALSE;
4276     }
4277
4278     return TRUE;
4279 }
4280
4281 /*
4282  * S_intuit_method
4283  *
4284  * Does all the checking to disambiguate
4285  *   foo bar
4286  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4287  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4288  *
4289  * First argument is the stuff after the first token, e.g. "bar".
4290  *
4291  * Not a method if foo is a filehandle.
4292  * Not a method if foo is a subroutine prototyped to take a filehandle.
4293  * Not a method if it's really "Foo $bar"
4294  * Method if it's "foo $bar"
4295  * Not a method if it's really "print foo $bar"
4296  * Method if it's really "foo package::" (interpreted as package->foo)
4297  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4298  * Not a method if bar is a filehandle or package, but is quoted with
4299  *   =>
4300  */
4301
4302 STATIC int
4303 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4304 {
4305     char *s = start + (*start == '$');
4306     char tmpbuf[sizeof PL_tokenbuf];
4307     STRLEN len;
4308     GV* indirgv;
4309         /* Mustn't actually add anything to a symbol table.
4310            But also don't want to "initialise" any placeholder
4311            constants that might already be there into full
4312            blown PVGVs with attached PVCV.  */
4313     GV * const gv =
4314         ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4315
4316     PERL_ARGS_ASSERT_INTUIT_METHOD;
4317
4318     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4319             return 0;
4320     if (cv && SvPOK(cv)) {
4321         const char *proto = CvPROTO(cv);
4322         if (proto) {
4323             while (*proto && (isSPACE(*proto) || *proto == ';'))
4324                 proto++;
4325             if (*proto == '*')
4326                 return 0;
4327         }
4328     }
4329
4330     if (*start == '$') {
4331         SSize_t start_off = start - SvPVX(PL_linestr);
4332         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4333             || isUPPER(*PL_tokenbuf))
4334             return 0;
4335         /* this could be $# */
4336         if (isSPACE(*s))
4337             s = skipspace(s);
4338         PL_bufptr = SvPVX(PL_linestr) + start_off;
4339         PL_expect = XREF;
4340         return *s == '(' ? FUNCMETH : METHOD;
4341     }
4342
4343     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4344     /* start is the beginning of the possible filehandle/object,
4345      * and s is the end of it
4346      * tmpbuf is a copy of it (but with single quotes as double colons)
4347      */
4348
4349     if (!keyword(tmpbuf, len, 0)) {
4350         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4351             len -= 2;
4352             tmpbuf[len] = '\0';
4353             goto bare_package;
4354         }
4355         indirgv = gv_fetchpvn_flags(tmpbuf, len,
4356                                     GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4357                                     SVt_PVCV);
4358         if (indirgv && SvTYPE(indirgv) != SVt_NULL
4359          && (!isGV(indirgv) || GvCVu(indirgv)))
4360             return 0;
4361         /* filehandle or package name makes it a method */
4362         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4363             s = skipspace(s);
4364             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4365                 return 0;       /* no assumptions -- "=>" quotes bareword */
4366       bare_package:
4367             NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4368                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4369             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4370             PL_expect = XTERM;
4371             force_next(BAREWORD);
4372             PL_bufptr = s;
4373             return *s == '(' ? FUNCMETH : METHOD;
4374         }
4375     }
4376     return 0;
4377 }
4378
4379 /* Encoded script support. filter_add() effectively inserts a
4380  * 'pre-processing' function into the current source input stream.
4381  * Note that the filter function only applies to the current source file
4382  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4383  *
4384  * The datasv parameter (which may be NULL) can be used to pass
4385  * private data to this instance of the filter. The filter function
4386  * can recover the SV using the FILTER_DATA macro and use it to
4387  * store private buffers and state information.
4388  *
4389  * The supplied datasv parameter is upgraded to a PVIO type
4390  * and the IoDIRP/IoANY field is used to store the function pointer,
4391  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4392  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4393  * private use must be set using malloc'd pointers.
4394  */
4395
4396 SV *
4397 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4398 {
4399     if (!funcp)
4400         return NULL;
4401
4402     if (!PL_parser)
4403         return NULL;
4404
4405     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4406         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4407
4408     if (!PL_rsfp_filters)
4409         PL_rsfp_filters = newAV();
4410     if (!datasv)
4411         datasv = newSV(0);
4412     SvUPGRADE(datasv, SVt_PVIO);
4413     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4414     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4415     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4416                           FPTR2DPTR(void *, IoANY(datasv)),
4417                           SvPV_nolen(datasv)));
4418     av_unshift(PL_rsfp_filters, 1);
4419     av_store(PL_rsfp_filters, 0, datasv) ;
4420     if (
4421         !PL_parser->filtered
4422      && PL_parser->lex_flags & LEX_EVALBYTES
4423      && PL_bufptr < PL_bufend
4424     ) {
4425         const char *s = PL_bufptr;
4426         while (s < PL_bufend) {
4427             if (*s == '\n') {
4428                 SV *linestr = PL_parser->linestr;
4429                 char *buf = SvPVX(linestr);
4430                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4431                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4432                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4433                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4434                 STRLEN const last_uni_pos =
4435                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4436                 STRLEN const last_lop_pos =
4437                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4438                 av_push(PL_rsfp_filters, linestr);
4439                 PL_parser->linestr =
4440                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4441                 buf = SvPVX(PL_parser->linestr);
4442                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4443                 PL_parser->bufptr = buf + bufptr_pos;
4444                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4445                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4446                 PL_parser->linestart = buf + linestart_pos;
4447                 if (PL_parser->last_uni)
4448                     PL_parser->last_uni = buf + last_uni_pos;
4449                 if (PL_parser->last_lop)
4450                     PL_parser->last_lop = buf + last_lop_pos;
4451                 SvLEN_set(linestr, SvCUR(linestr));
4452                 SvCUR_set(linestr, s - SvPVX(linestr));
4453                 PL_parser->filtered = 1;
4454                 break;
4455             }
4456             s++;
4457         }
4458     }
4459     return(datasv);
4460 }
4461
4462
4463 /* Delete most recently added instance of this filter function. */
4464 void
4465 Perl_filter_del(pTHX_ filter_t funcp)
4466 {
4467     SV *datasv;
4468
4469     PERL_ARGS_ASSERT_FILTER_DEL;
4470
4471 #ifdef DEBUGGING
4472     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4473                           FPTR2DPTR(void*, funcp)));
4474 #endif
4475     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4476         return;
4477     /* if filter is on top of stack (usual case) just pop it off */
4478     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4479     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4480         sv_free(av_pop(PL_rsfp_filters));
4481
4482         return;
4483     }
4484     /* we need to search for the correct entry and clear it     */
4485     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4486 }
4487
4488
4489 /* Invoke the idxth filter function for the current rsfp.        */
4490 /* maxlen 0 = read one text line */
4491 I32
4492 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4493 {
4494     filter_t funcp;
4495     I32 ret;
4496     SV *datasv = NULL;
4497     /* This API is bad. It should have been using unsigned int for maxlen.
4498        Not sure if we want to change the API, but if not we should sanity
4499        check the value here.  */
4500     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4501
4502     PERL_ARGS_ASSERT_FILTER_READ;
4503
4504     if (!PL_parser || !PL_rsfp_filters)
4505         return -1;
4506     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4507         /* Provide a default input filter to make life easy.    */
4508         /* Note that we append to the line. This is handy.      */
4509         DEBUG_P(PerlIO_printf(Perl_debug_log,
4510                               "filter_read %d: from rsfp\n", idx));
4511         if (correct_length) {
4512             /* Want a block */
4513             int len ;
4514             const int old_len = SvCUR(buf_sv);
4515
4516             /* ensure buf_sv is large enough */
4517             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4518             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4519                                    correct_length)) <= 0) {
4520                 if (PerlIO_error(PL_rsfp))
4521                     return -1;          /* error */
4522                 else
4523                     return 0 ;          /* end of file */
4524             }
4525             SvCUR_set(buf_sv, old_len + len) ;
4526             SvPVX(buf_sv)[old_len + len] = '\0';
4527         } else {
4528             /* Want a line */
4529             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4530                 if (PerlIO_error(PL_rsfp))
4531                     return -1;          /* error */
4532                 else
4533                     return 0 ;          /* end of file */
4534             }
4535         }
4536         return SvCUR(buf_sv);
4537     }
4538     /* Skip this filter slot if filter has been deleted */
4539     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4540         DEBUG_P(PerlIO_printf(Perl_debug_log,
4541                               "filter_read %d: skipped (filter deleted)\n",
4542                               idx));
4543         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4544     }
4545     if (SvTYPE(datasv) != SVt_PVIO) {
4546         if (correct_length) {
4547             /* Want a block */
4548             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4549             if (!remainder) return 0; /* eof */
4550             if (correct_length > remainder) correct_length = remainder;
4551             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4552             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4553         } else {
4554             /* Want a line */
4555             const char *s = SvEND(datasv);
4556             const char *send = SvPVX(datasv) + SvLEN(datasv);
4557             while (s < send) {
4558                 if (*s == '\n') {
4559                     s++;
4560                     break;
4561                 }
4562                 s++;
4563             }
4564             if (s == send) return 0; /* eof */
4565             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4566             SvCUR_set(datasv, s-SvPVX(datasv));
4567         }
4568         return SvCUR(buf_sv);
4569     }
4570     /* Get function pointer hidden within datasv        */
4571     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4572     DEBUG_P(PerlIO_printf(Perl_debug_log,
4573                           "filter_read %d: via function %p (%s)\n",
4574                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4575     /* Call function. The function is expected to       */
4576     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4577     /* Return: <0:error, =0:eof, >0:not eof             */
4578     ENTER;
4579     save_scalar(PL_errgv);
4580     ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4581     LEAVE;
4582     return ret;
4583 }
4584
4585 STATIC char *
4586 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4587 {
4588     PERL_ARGS_ASSERT_FILTER_GETS;
4589
4590 #ifdef PERL_CR_FILTER
4591     if (!PL_rsfp_filters) {
4592         filter_add(S_cr_textfilter,NULL);
4593     }
4594 #endif
4595     if (PL_rsfp_filters) {
4596         if (!append)
4597             SvCUR_set(sv, 0);   /* start with empty line        */
4598         if (FILTER_READ(0, sv, 0) > 0)
4599             return ( SvPVX(sv) ) ;
4600         else
4601             return NULL ;
4602     }
4603     else
4604         return (sv_gets(sv, PL_rsfp, append));
4605 }
4606
4607 STATIC HV *
4608 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4609 {
4610     GV *gv;
4611
4612     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4613
4614     if (memEQs(pkgname, len, "__PACKAGE__"))
4615         return PL_curstash;
4616
4617     if (len > 2
4618         && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4619         && (gv = gv_fetchpvn_flags(pkgname,
4620                                    len,
4621                                    ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4622     {
4623         return GvHV(gv);                        /* Foo:: */
4624     }
4625
4626     /* use constant CLASS => 'MyClass' */
4627     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4628     if (gv && GvCV(gv)) {
4629         SV * const sv = cv_const_sv(GvCV(gv));
4630         if (sv)
4631             return gv_stashsv(sv, 0);
4632     }
4633
4634     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4635 }
4636
4637
4638 STATIC char *
4639 S_tokenize_use(pTHX_ int is_use, char *s) {
4640     PERL_ARGS_ASSERT_TOKENIZE_USE;
4641
4642     if (PL_expect != XSTATE)
4643         /* diag_listed_as: "use" not allowed in expression */
4644         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4645                     is_use ? "use" : "no"));
4646     PL_expect = XTERM;
4647     s = skipspace(s);
4648     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4649         s = force_version(s, TRUE);
4650         if (*s == ';' || *s == '}'
4651                 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4652             NEXTVAL_NEXTTOKE.opval = NULL;
4653             force_next(BAREWORD);
4654         }
4655         else if (*s == 'v') {
4656             s = force_word(s,BAREWORD,FALSE,TRUE);
4657             s = force_version(s, FALSE);
4658         }
4659     }
4660     else {
4661         s = force_word(s,BAREWORD,FALSE,TRUE);
4662         s = force_version(s, FALSE);
4663     }
4664     pl_yylval.ival = is_use;
4665     return s;
4666 }
4667 #ifdef DEBUGGING
4668     static const char* const exp_name[] =
4669         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4670           "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4671           "SIGVAR", "TERMORDORDOR"
4672         };
4673 #endif
4674
4675 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4676 STATIC bool
4677 S_word_takes_any_delimiter(char *p, STRLEN len)
4678 {
4679     return (len == 1 && strchr("msyq", p[0]))
4680             || (len == 2
4681                 && ((p[0] == 't' && p[1] == 'r')
4682                     || (p[0] == 'q' && strchr("qwxr", p[1]))));
4683 }
4684
4685 static void
4686 S_check_scalar_slice(pTHX_ char *s)
4687 {
4688     s++;
4689     while (SPACE_OR_TAB(*s)) s++;
4690     if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4691                                                              PL_bufend,
4692                                                              UTF))
4693     {
4694         return;
4695     }
4696     while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4697            || (*s && strchr(" \t$#+-'\"", *s)))
4698     {
4699         s += UTF ? UTF8SKIP(s) : 1;
4700     }
4701     if (*s == '}' || *s == ']')
4702         pl_yylval.ival = OPpSLICEWARNING;
4703 }
4704
4705 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4706 static void
4707 S_lex_token_boundary(pTHX)
4708 {
4709     PL_oldoldbufptr = PL_oldbufptr;
4710     PL_oldbufptr = PL_bufptr;
4711 }
4712
4713 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
4714 static char *
4715 S_vcs_conflict_marker(pTHX_ char *s)
4716 {
4717     lex_token_boundary();
4718     PL_bufptr = s;
4719     yyerror("Version control conflict marker");
4720     while (s < PL_bufend && *s != '\n')
4721         s++;
4722     return s;
4723 }
4724
4725 /*
4726   yylex
4727
4728   Works out what to call the token just pulled out of the input
4729   stream.  The yacc parser takes care of taking the ops we return and
4730   stitching them into a tree.
4731
4732   Returns:
4733     The type of the next token
4734
4735   Structure:
4736       Check if we have already built the token; if so, use it.
4737       Switch based on the current state:
4738           - if we have a case modifier in a string, deal with that
4739           - handle other cases of interpolation inside a string
4740           - scan the next line if we are inside a format
4741       In the normal state, switch on the next character:
4742           - default:
4743             if alphabetic, go to key lookup
4744             unrecognized character - croak
4745           - 0/4/26: handle end-of-line or EOF
4746           - cases for whitespace
4747           - \n and #: handle comments and line numbers
4748           - various operators, brackets and sigils
4749           - numbers
4750           - quotes
4751           - 'v': vstrings (or go to key lookup)
4752           - 'x' repetition operator (or go to key lookup)
4753           - other ASCII alphanumerics (key lookup begins here):
4754               word before => ?
4755               keyword plugin
4756               scan built-in keyword (but do nothing with it yet)
4757               check for statement label
4758               check for lexical subs
4759                   goto just_a_word if there is one
4760               see whether built-in keyword is overridden
4761               switch on keyword number:
4762                   - default: just_a_word:
4763                       not a built-in keyword; handle bareword lookup
4764                       disambiguate between method and sub call
4765                       fall back to bareword
4766                   - cases for built-in keywords
4767 */
4768
4769
4770 int
4771 Perl_yylex(pTHX)
4772 {
4773     dVAR;
4774     char *s = PL_bufptr;
4775     char *d;
4776     STRLEN len;
4777     bool bof = FALSE;
4778     const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4779     U8 formbrack = 0;
4780     U32 fake_eof = 0;
4781
4782     /* orig_keyword, gvp, and gv are initialized here because
4783      * jump to the label just_a_word_zero can bypass their
4784      * initialization later. */
4785     I32 orig_keyword = 0;
4786     GV *gv = NULL;
4787     GV **gvp = NULL;
4788
4789     if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
4790         const U8* first_bad_char_loc;
4791         if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
4792                                                         PL_bufend - PL_bufptr,
4793                                                         &first_bad_char_loc)))
4794         {
4795             _force_out_malformed_utf8_message(first_bad_char_loc,
4796                                               (U8 *) PL_bufend,
4797                                               0,
4798                                               1 /* 1 means die */ );
4799             NOT_REACHED; /* NOTREACHED */
4800         }
4801         PL_parser->recheck_utf8_validity = FALSE;
4802     }
4803     DEBUG_T( {
4804         SV* tmp = newSVpvs("");
4805         PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
4806             (IV)CopLINE(PL_curcop),
4807             lex_state_names[PL_lex_state],
4808             exp_name[PL_expect],
4809             pv_display(tmp, s, strlen(s), 0, 60));
4810         SvREFCNT_dec(tmp);
4811     } );
4812
4813     /* when we've already built the next token, just pull it out of the queue */
4814     if (PL_nexttoke) {
4815         PL_nexttoke--;
4816         pl_yylval = PL_nextval[PL_nexttoke];
4817         {
4818             I32 next_type;
4819             next_type = PL_nexttype[PL_nexttoke];
4820             if (next_type & (7<<24)) {
4821                 if (next_type & (1<<24)) {
4822                     if (PL_lex_brackets > 100)
4823                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4824                     PL_lex_brackstack[PL_lex_brackets++] =
4825                         (char) ((next_type >> 16) & 0xff);
4826                 }
4827                 if (next_type & (2<<24))
4828                     PL_lex_allbrackets++;
4829                 if (next_type & (4<<24))
4830                     PL_lex_allbrackets--;
4831                 next_type &= 0xffff;
4832             }
4833             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4834         }
4835     }
4836
4837     switch (PL_lex_state) {
4838     case LEX_NORMAL:
4839     case LEX_INTERPNORMAL:
4840         break;
4841
4842     /* interpolated case modifiers like \L \U, including \Q and \E.
4843        when we get here, PL_bufptr is at the \
4844     */
4845     case LEX_INTERPCASEMOD:
4846 #ifdef DEBUGGING
4847         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4848             Perl_croak(aTHX_
4849                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4850                        PL_bufptr, PL_bufend, *PL_bufptr);
4851 #endif
4852         /* handle \E or end of string */
4853         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4854             /* if at a \E */
4855             if (PL_lex_casemods) {
4856                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4857                 PL_lex_casestack[PL_lex_casemods] = '\0';
4858
4859                 if (PL_bufptr != PL_bufend
4860                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4861                         || oldmod == 'F')) {
4862                     PL_bufptr += 2;
4863                     PL_lex_state = LEX_INTERPCONCAT;
4864                 }
4865                 PL_lex_allbrackets--;
4866                 return REPORT(')');
4867             }
4868             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4869                /* Got an unpaired \E */
4870                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4871                         "Useless use of \\E");
4872             }
4873             if (PL_bufptr != PL_bufend)
4874                 PL_bufptr += 2;
4875             PL_lex_state = LEX_INTERPCONCAT;
4876             return yylex();
4877         }
4878         else {
4879             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4880               "### Saw case modifier\n"); });
4881             s = PL_bufptr + 1;
4882             if (s[1] == '\\' && s[2] == 'E') {
4883                 PL_bufptr = s + 3;
4884                 PL_lex_state = LEX_INTERPCONCAT;
4885                 return yylex();
4886             }
4887             else {
4888                 I32 tmp;
4889                 if (   memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
4890                     || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
4891                 {
4892                     tmp = *s, *s = s[2], s[2] = (char)tmp;      /* misordered... */
4893                 }
4894                 if ((*s == 'L' || *s == 'U' || *s == 'F')
4895                     && (strpbrk(PL_lex_casestack, "LUF")))
4896                 {
4897                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4898                     PL_lex_allbrackets--;
4899                     return REPORT(')');
4900                 }
4901                 if (PL_lex_casemods > 10)
4902                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4903                 PL_lex_casestack[PL_lex_casemods++] = *s;
4904                 PL_lex_casestack[PL_lex_casemods] = '\0';
4905                 PL_lex_state = LEX_INTERPCONCAT;
4906                 NEXTVAL_NEXTTOKE.ival = 0;
4907                 force_next((2<<24)|'(');
4908                 if (*s == 'l')
4909                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4910                 else if (*s == 'u')
4911                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4912                 else if (*s == 'L')
4913                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4914                 else if (*s == 'U')
4915                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4916                 else if (*s == 'Q')
4917                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4918                 else if (*s == 'F')
4919                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4920                 else
4921                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4922                 PL_bufptr = s + 1;
4923             }
4924             force_next(FUNC);
4925             if (PL_lex_starts) {
4926                 s = PL_bufptr;
4927                 PL_lex_starts = 0;
4928                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4929                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4930                     TOKEN(',');
4931                 else
4932                     AopNOASSIGN(OP_CONCAT);
4933             }
4934             else
4935                 return yylex();
4936         }
4937
4938     case LEX_INTERPPUSH:
4939         return REPORT(sublex_push());
4940
4941     case LEX_INTERPSTART:
4942         if (PL_bufptr == PL_bufend)
4943             return REPORT(sublex_done());
4944         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4945               "### Interpolated variable\n"); });
4946         PL_expect = XTERM;
4947         /* for /@a/, we leave the joining for the regex engine to do
4948          * (unless we're within \Q etc) */
4949         PL_lex_dojoin = (*PL_bufptr == '@'
4950                             && (!PL_lex_inpat || PL_lex_casemods));
4951         PL_lex_state = LEX_INTERPNORMAL;
4952         if (PL_lex_dojoin) {
4953             NEXTVAL_NEXTTOKE.ival = 0;
4954             force_next(',');
4955             force_ident("\"", '$');
4956             NEXTVAL_NEXTTOKE.ival = 0;
4957             force_next('$');
4958             NEXTVAL_NEXTTOKE.ival = 0;
4959             force_next((2<<24)|'(');
4960             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4961             force_next(FUNC);
4962         }
4963         /* Convert (?{...}) and friends to 'do {...}' */
4964         if (PL_lex_inpat && *PL_bufptr == '(') {
4965             PL_parser->lex_shared->re_eval_start = PL_bufptr;
4966             PL_bufptr += 2;
4967             if (*PL_bufptr != '{')
4968                 PL_bufptr++;
4969             PL_expect = XTERMBLOCK;
4970             force_next(DO);
4971         }
4972
4973         if (PL_lex_starts++) {
4974             s = PL_bufptr;
4975             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4976             if (!PL_lex_casemods && PL_lex_inpat)
4977                 TOKEN(',');
4978             else
4979                 AopNOASSIGN(OP_CONCAT);
4980         }
4981         return yylex();
4982
4983     case LEX_INTERPENDMAYBE:
4984         if (intuit_more(PL_bufptr, PL_bufend)) {
4985             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4986             break;
4987         }
4988         /* FALLTHROUGH */
4989
4990     case LEX_INTERPEND:
4991         if (PL_lex_dojoin) {
4992             const U8 dojoin_was = PL_lex_dojoin;
4993             PL_lex_dojoin = FALSE;
4994             PL_lex_state = LEX_INTERPCONCAT;
4995             PL_lex_allbrackets--;
4996             return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN);
4997         }
4998         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4999             && SvEVALED(PL_lex_repl))
5000         {
5001             if (PL_bufptr != PL_bufend)
5002                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
5003             PL_lex_repl = NULL;
5004         }
5005         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
5006            re_eval_str.  If the here-doc body’s length equals the previous
5007            value of re_eval_start, re_eval_start will now be null.  So
5008            check re_eval_str as well. */
5009         if (PL_parser->lex_shared->re_eval_start
5010          || PL_parser->lex_shared->re_eval_str) {
5011             SV *sv;
5012             if (*PL_bufptr != ')')
5013                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5014             PL_bufptr++;
5015             /* having compiled a (?{..}) expression, return the original
5016              * text too, as a const */
5017             if (PL_parser->lex_shared->re_eval_str) {
5018                 sv = PL_parser->lex_shared->re_eval_str;
5019                 PL_parser->lex_shared->re_eval_str = NULL;
5020                 SvCUR_set(sv,
5021                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
5022                 SvPV_shrink_to_cur(sv);
5023             }
5024             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5025                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
5026             NEXTVAL_NEXTTOKE.opval =
5027                     newSVOP(OP_CONST, 0,
5028                                  sv);
5029             force_next(THING);
5030             PL_parser->lex_shared->re_eval_start = NULL;
5031             PL_expect = XTERM;
5032             return REPORT(',');
5033         }
5034
5035         /* FALLTHROUGH */
5036     case LEX_INTERPCONCAT:
5037 #ifdef DEBUGGING
5038         if (PL_lex_brackets)
5039             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5040                        (long) PL_lex_brackets);
5041 #endif
5042         if (PL_bufptr == PL_bufend)
5043             return REPORT(sublex_done());
5044
5045         /* m'foo' still needs to be parsed for possible (?{...}) */
5046         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5047             SV *sv = newSVsv(PL_linestr);
5048             sv = tokeq(sv);
5049             pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
5050             s = PL_bufend;
5051         }
5052         else {
5053             int save_error_count = PL_error_count;
5054
5055             s = scan_const(PL_bufptr);
5056
5057             /* Set flag if this was a pattern and there were errors.  op.c will
5058              * refuse to compile a pattern with this flag set.  Otherwise, we
5059              * could get segfaults, etc. */
5060             if (PL_lex_inpat && PL_error_count > save_error_count) {
5061                 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
5062             }
5063             if (*s == '\\')
5064                 PL_lex_state = LEX_INTERPCASEMOD;
5065             else
5066                 PL_lex_state = LEX_INTERPSTART;
5067         }
5068
5069         if (s != PL_bufptr) {
5070             NEXTVAL_NEXTTOKE = pl_yylval;
5071             PL_expect = XTERM;
5072             force_next(THING);
5073             if (PL_lex_starts++) {
5074                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5075                 if (!PL_lex_casemods && PL_lex_inpat)
5076                     TOKEN(',');
5077                 else
5078                     AopNOASSIGN(OP_CONCAT);
5079             }
5080             else {
5081                 PL_bufptr = s;
5082                 return yylex();
5083             }
5084         }
5085
5086         return yylex();
5087     case LEX_FORMLINE:
5088         assert(PL_lex_formbrack);
5089         s = scan_formline(PL_bufptr);
5090         if (!PL_lex_formbrack)
5091         {
5092             formbrack = 1;
5093             goto rightbracket;
5094         }
5095         PL_bufptr = s;
5096         return yylex();
5097     }
5098
5099     /* We really do *not* want PL_linestr ever becoming a COW. */
5100     assert (!SvIsCOW(PL_linestr));
5101     s = PL_bufptr;
5102     PL_oldoldbufptr = PL_oldbufptr;
5103     PL_oldbufptr = s;
5104     PL_parser->saw_infix_sigil = 0;
5105
5106     if (PL_in_my == KEY_sigvar) {
5107         /* we expect the sigil and optional var name part of a
5108          * signature element here. Since a '$' is not necessarily
5109          * followed by a var name, handle it specially here; the general
5110          * yylex code would otherwise try to interpret whatever follows
5111          * as a var; e.g. ($, ...) would be seen as the var '$,'
5112          */
5113
5114         U8 sigil;
5115
5116         s = skipspace(s);
5117         sigil = *s++;
5118         PL_bufptr = s; /* for error reporting */
5119         switch (sigil) {
5120         case '$':
5121         case '@':
5122         case '%':
5123             /* spot stuff that looks like an prototype */
5124             if (strchr("$:@%&*;\\[]", *s)) {
5125                 yyerror("Illegal character following sigil in a subroutine signature");
5126                 break;
5127             }
5128             /* '$#' is banned, while '$ # comment' isn't */
5129             if (*s == '#') {
5130                 yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
5131                 break;
5132             }
5133             s = skipspace(s);
5134             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5135                 char *dest = PL_tokenbuf + 1;
5136                 /* read var name, including sigil, into PL_tokenbuf */
5137                 PL_tokenbuf[0] = sigil;
5138                 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
5139                     0, cBOOL(UTF), FALSE, FALSE);
5140                 *dest = '\0';
5141                 assert(PL_tokenbuf[1]); /* we have a variable name */
5142             }
5143             else {
5144                 *PL_tokenbuf = 0;
5145                 PL_in_my = 0;
5146             }
5147
5148             s = skipspace(s);
5149             /* parse the = for the default ourselves to avoid '+=' etc being accepted here
5150              * as the ASSIGNOP, and exclude other tokens that start with =
5151              */
5152             if (*s == '=' && (!s[1] || strchr("=~>", s[1]) == 0)) {
5153                 /* save now to report with the same context as we did when
5154                  * all ASSIGNOPS were accepted */
5155                 PL_oldbufptr = s;
5156
5157                 ++s;
5158                 NEXTVAL_NEXTTOKE.ival = 0;
5159                 force_next(ASSIGNOP);
5160                 PL_expect = XTERM;
5161             }
5162             else if (*s == ',' || *s == ')') {
5163                 PL_expect = XOPERATOR;
5164             }
5165             else {
5166                 /* make sure the context shows the unexpected character and
5167                  * hopefully a bit more */
5168                 if (*s) ++s;
5169                 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5170                     s++;
5171                 PL_bufptr = s; /* for error reporting */
5172                 yyerror("Illegal operator following parameter in a subroutine signature");
5173                 PL_in_my = 0;
5174             }
5175             if (*PL_tokenbuf) {
5176                 NEXTVAL_NEXTTOKE.ival = sigil;
5177                 force_next('p'); /* force a signature pending identifier */
5178             }
5179             break;
5180
5181         case ')':
5182             PL_expect = XBLOCK;
5183             break;
5184         case ',': /* handle ($a,,$b) */
5185             break;
5186
5187         default:
5188             PL_in_my = 0;
5189             yyerror("A signature parameter must start with '$', '@' or '%'");
5190             /* very crude error recovery: skip to likely next signature
5191              * element */
5192             while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5193                 s++;
5194             break;
5195         }
5196         TOKEN(sigil);
5197     }
5198
5199   retry:
5200     switch (*s) {
5201     default:
5202         if (UTF) {
5203             if (isIDFIRST_utf8_safe(s, PL_bufend)) {
5204                 goto keylookup;
5205             }
5206         }
5207         else if (isALNUMC(*s)) {
5208             goto keylookup;
5209         }
5210     {
5211         SV *dsv = newSVpvs_flags("", SVs_TEMP);
5212         const char *c;
5213         if (UTF) {
5214             STRLEN skiplen = UTF8SKIP(s);
5215             STRLEN stravail = PL_bufend - s;
5216             c = sv_uni_display(dsv, newSVpvn_flags(s,
5217                                                    skiplen > stravail ? stravail : skiplen,
5218                                                    SVs_TEMP | SVf_UTF8),
5219                                10, UNI_DISPLAY_ISPRINT);
5220         }
5221         else {
5222             c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5223         }
5224
5225         if (s >= PL_linestart) {
5226             d = PL_linestart;
5227         }
5228         else {
5229             /* somehow (probably due to a parse failure), PL_linestart has advanced
5230              * pass PL_bufptr, get a reasonable beginning of line
5231              */
5232             d = s;
5233             while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
5234                 --d;
5235         }
5236         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
5237         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5238             d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
5239         }
5240
5241         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
5242                           UTF8fARG(UTF, (s - d), d),
5243                          (int) len + 1);
5244     }
5245     case 4:
5246     case 26:
5247         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
5248     case 0:
5249         if ((!PL_rsfp || PL_lex_inwhat)
5250          && (!PL_parser->filtered || s+1 < PL_bufend)) {
5251             PL_last_uni = 0;
5252             PL_last_lop = 0;
5253             if (PL_lex_brackets
5254                 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
5255             {
5256                 yyerror((const char *)
5257                         (PL_lex_formbrack
5258                          ? "Format not terminated"
5259                          : "Missing right curly or square bracket"));
5260             }
5261             DEBUG_T( { PerlIO_printf(Perl_debug_log,
5262                         "### Tokener got EOF\n");
5263             } );
5264             TOKEN(0);
5265         }
5266         if (s++ < PL_bufend)
5267             goto retry;                 /* ignore stray nulls */
5268         PL_last_uni = 0;
5269         PL_last_lop = 0;
5270         if (!PL_in_eval && !PL_preambled) {
5271             PL_preambled = TRUE;
5272             if (PL_perldb) {
5273                 /* Generate a string of Perl code to load the debugger.
5274                  * If PERL5DB is set, it will return the contents of that,
5275                  * otherwise a compile-time require of perl5db.pl.  */
5276
5277                 const char * const pdb = PerlEnv_getenv("PERL5DB");
5278
5279                 if (pdb) {
5280                     sv_setpv(PL_linestr, pdb);
5281                     sv_catpvs(PL_linestr,";");
5282                 } else {
5283                     SETERRNO(0,SS_NORMAL);
5284                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5285                 }
5286                 PL_parser->preambling = CopLINE(PL_curcop);
5287             } else
5288                 SvPVCLEAR(PL_linestr);
5289             if (PL_preambleav) {
5290                 SV **svp = AvARRAY(PL_preambleav);
5291                 SV **const end = svp + AvFILLp(PL_preambleav);
5292                 while(svp <= end) {
5293                     sv_catsv(PL_linestr, *svp);
5294                     ++svp;
5295                     sv_catpvs(PL_linestr, ";");
5296                 }
5297                 sv_free(MUTABLE_SV(PL_preambleav));
5298                 PL_preambleav = NULL;
5299             }
5300             if (PL_minus_E)
5301                 sv_catpvs(PL_linestr,
5302                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5303             if (PL_minus_n || PL_minus_p) {
5304                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5305                 if (PL_minus_l)
5306                     sv_catpvs(PL_linestr,"chomp;");
5307                 if (PL_minus_a) {
5308                     if (PL_minus_F) {
5309                         if (   (   *PL_splitstr == '/'
5310                                 || *PL_splitstr == '\''
5311                                 || *PL_splitstr == '"')
5312                             && strchr(PL_splitstr + 1, *PL_splitstr))
5313                         {
5314                             /* strchr is ok, because -F pattern can't contain
5315                              * embeddded NULs */
5316                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5317                         }
5318                         else {
5319                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5320                                bytes can be used as quoting characters.  :-) */
5321                             const char *splits = PL_splitstr;
5322                             sv_catpvs(PL_linestr, "our @F=split(q\0");
5323                             do {
5324                                 /* Need to \ \s  */
5325                                 if (*splits == '\\')
5326                                     sv_catpvn(PL_linestr, splits, 1);
5327                                 sv_catpvn(PL_linestr, splits, 1);
5328                             } while (*splits++);
5329                             /* This loop will embed the trailing NUL of
5330                                PL_linestr as the last thing it does before
5331                                terminating.  */
5332                             sv_catpvs(PL_linestr, ");");
5333                         }
5334                     }
5335                     else
5336                         sv_catpvs(PL_linestr,"our @F=split(' ');");
5337                 }
5338             }
5339             sv_catpvs(PL_linestr, "\n");
5340             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5341             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5342             PL_last_lop = PL_last_uni = NULL;
5343             if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
5344                 update_debugger_info(PL_linestr, NULL, 0);
5345             goto retry;
5346         }
5347         do {
5348             fake_eof = 0;
5349             bof = cBOOL(PL_rsfp);
5350             if (0) {
5351               fake_eof:
5352                 fake_eof = LEX_FAKE_EOF;
5353             }
5354             PL_bufptr = PL_bufend;
5355             COPLINE_INC_WITH_HERELINES;
5356             if (!lex_next_chunk(fake_eof)) {
5357                 CopLINE_dec(PL_curcop);
5358                 s = PL_bufptr;
5359                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
5360             }
5361             CopLINE_dec(PL_curcop);
5362             s = PL_bufptr;
5363             /* If it looks like the start of a BOM or raw UTF-16,
5364              * check if it in fact is. */
5365             if (bof && PL_rsfp
5366                 && (   *s == 0
5367                     || *(U8*)s == BOM_UTF8_FIRST_BYTE
5368                     || *(U8*)s >= 0xFE
5369                     || s[1] == 0))
5370             {
5371                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5372                 bof = (offset == (Off_t)SvCUR(PL_linestr));
5373 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5374                 /* offset may include swallowed CR */
5375                 if (!bof)
5376                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5377 #endif
5378                 if (bof) {
5379                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5380                     s = swallow_bom((U8*)s);
5381                 }
5382             }
5383             if (PL_parser->in_pod) {
5384                 /* Incest with pod. */
5385                 if (    memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
5386                     && !isALPHA(s[4]))
5387                 {
5388                     SvPVCLEAR(PL_linestr);
5389                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5390                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5391                     PL_last_lop = PL_last_uni = NULL;
5392                     PL_parser->in_pod = 0;
5393                 }
5394             }
5395             if (PL_rsfp || PL_parser->filtered)
5396                 incline(s, PL_bufend);
5397         } while (PL_parser->in_pod);
5398         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5399         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5400         PL_last_lop = PL_last_uni = NULL;
5401         if (CopLINE(PL_curcop) == 1) {
5402             while (s < PL_bufend && isSPACE(*s))
5403                 s++;
5404             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5405                 s++;
5406             d = NULL;
5407             if (!PL_in_eval) {
5408                 if (*s == '#' && *(s+1) == '!')
5409                     d = s + 2;
5410 #ifdef ALTERNATE_SHEBANG
5411                 else {
5412                     static char const as[] = ALTERNATE_SHEBANG;
5413                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5414                         d = s + (sizeof(as) - 1);
5415                 }
5416 #endif /* ALTERNATE_SHEBANG */
5417             }
5418             if (d) {
5419                 char *ipath;
5420                 char *ipathend;
5421
5422                 while (isSPACE(*d))
5423                     d++;
5424                 ipath = d;
5425                 while (*d && !isSPACE(*d))
5426                     d++;
5427                 ipathend = d;
5428
5429 #ifdef ARG_ZERO_IS_SCRIPT
5430                 if (ipathend > ipath) {
5431                     /*
5432                      * HP-UX (at least) sets argv[0] to the script name,
5433                      * which makes $^X incorrect.  And Digital UNIX and Linux,
5434                      * at least, set argv[0] to the basename of the Perl
5435                      * interpreter. So, having found "#!", we'll set it right.
5436                      */
5437                     SV* copfilesv = CopFILESV(PL_curcop);
5438                     if (copfilesv) {
5439                         SV * const x =
5440                             GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5441                                              SVt_PV)); /* $^X */
5442                         assert(SvPOK(x) || SvGMAGICAL(x));
5443                         if (sv_eq(x, copfilesv)) {
5444                             sv_setpvn(x, ipath, ipathend - ipath);
5445                             SvSETMAGIC(x);
5446                         }
5447                         else {
5448                             STRLEN blen;
5449                             STRLEN llen;
5450                             const char *bstart = SvPV_const(copfilesv, blen);
5451                             const char * const lstart = SvPV_const(x, llen);
5452                             if (llen < blen) {
5453                                 bstart += blen - llen;
5454                                 if (strnEQ(bstart, lstart, llen) &&     bstart[-1] == '/') {
5455                                     sv_setpvn(x, ipath, ipathend - ipath);
5456                                     SvSETMAGIC(x);
5457                                 }
5458                             }
5459                         }
5460                     }
5461                     else {
5462                         /* Anything to do if no copfilesv? */
5463                     }
5464                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
5465                 }
5466 #endif /* ARG_ZERO_IS_SCRIPT */
5467
5468                 /*
5469                  * Look for options.
5470                  */
5471                 d = instr(s,"perl -");
5472                 if (!d) {
5473                     d = instr(s,"perl");
5474 #if defined(DOSISH)
5475                     /* avoid getting into infinite loops when shebang
5476                      * line contains "Perl" rather than "perl" */
5477                     if (!d) {
5478                         for (d = ipathend-4; d >= ipath; --d) {
5479                             if (isALPHA_FOLD_EQ(*d, 'p')
5480                                 && !ibcmp(d, "perl", 4))
5481                             {
5482                                 break;
5483                             }
5484                         }
5485                         if (d < ipath)
5486                             d = NULL;
5487                     }
5488 #endif
5489                 }
5490 #ifdef ALTERNATE_SHEBANG
5491                 /*
5492                  * If the ALTERNATE_SHEBANG on this system starts with a
5493                  * character that can be part of a Perl expression, then if
5494                  * we see it but not "perl", we're probably looking at the
5495                  * start of Perl code, not a request to hand off to some
5496                  * other interpreter.  Similarly, if "perl" is there, but
5497                  * not in the first 'word' of the line, we assume the line
5498                  * contains the start of the Perl program.
5499                  */
5500                 if (d && *s != '#') {
5501                     const char *c = ipath;
5502                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5503                         c++;
5504                     if (c < d)
5505                         d = NULL;       /* "perl" not in first word; ignore */
5506                     else
5507                         *s = '#';       /* Don't try to parse shebang line */
5508                 }
5509 #endif /* ALTERNATE_SHEBANG */
5510                 if (!d
5511                     && *s == '#'
5512                     && ipathend > ipath
5513                     && !PL_minus_c
5514                     && !instr(s,"indir")
5515                     && instr(PL_origargv[0],"perl"))
5516                 {
5517                     dVAR;
5518                     char **newargv;
5519
5520                     *ipathend = '\0';
5521                     s = ipathend + 1;
5522                     while (s < PL_bufend && isSPACE(*s))
5523                         s++;
5524                     if (s < PL_bufend) {
5525                         Newx(newargv,PL_origargc+3,char*);
5526                         newargv[1] = s;
5527                         while (s < PL_bufend && !isSPACE(*s))
5528                             s++;
5529                         *s = '\0';
5530                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5531                     }
5532                     else
5533                         newargv = PL_origargv;
5534                     newargv[0] = ipath;
5535                     PERL_FPU_PRE_EXEC
5536                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5537                     PERL_FPU_POST_EXEC
5538                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5539                 }
5540                 if (d) {
5541                     while (*d && !isSPACE(*d))
5542                         d++;
5543                     while (SPACE_OR_TAB(*d))
5544                         d++;
5545
5546                     if (*d++ == '-') {
5547                         const bool switches_done = PL_doswitches;
5548                         const U32 oldpdb = PL_perldb;
5549                         const bool oldn = PL_minus_n;
5550                         const bool oldp = PL_minus_p;
5551                         const char *d1 = d;
5552
5553                         do {
5554                             bool baduni = FALSE;
5555                             if (*d1 == 'C') {
5556                                 const char *d2 = d1 + 1;
5557                                 if (parse_unicode_opts((const char **)&d2)
5558                                     != PL_unicode)
5559                                     baduni = TRUE;
5560                             }
5561                             if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
5562                                 const char * const m = d1;
5563                                 while (*d1 && !isSPACE(*d1))
5564                                     d1++;
5565                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5566                                       (int)(d1 - m), m);
5567                             }
5568                             d1 = moreswitches(d1);
5569                         } while (d1);
5570                         if (PL_doswitches && !switches_done) {
5571                             int argc = PL_origargc;
5572                             char **argv = PL_origargv;
5573                             do {
5574                                 argc--,argv++;
5575                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5576                             init_argv_symbols(argc,argv);
5577                         }
5578                         if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
5579                             || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5580                               /* if we have already added "LINE: while (<>) {",
5581                                  we must not do it again */
5582                         {
5583                             SvPVCLEAR(PL_linestr);
5584                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5585                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5586                             PL_last_lop = PL_last_uni = NULL;
5587                             PL_preambled = FALSE;
5588                             if (PERLDB_LINE_OR_SAVESRC)
5589                                 (void)gv_fetchfile(PL_origfilename);
5590                             goto retry;
5591                         }
5592                     }
5593                 }
5594             }
5595         }
5596         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5597             PL_lex_state = LEX_FORMLINE;
5598             force_next(FORMRBRACK);
5599             TOKEN(';');
5600         }
5601         goto retry;
5602     case '\r':
5603 #ifdef PERL_STRICT_CR
5604         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5605         Perl_croak(aTHX_
5606       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5607 #endif
5608     case ' ': case '\t': case '\f': case '\v':
5609         s++;
5610         goto retry;
5611     case '#':
5612     case '\n':
5613         if (PL_lex_state != LEX_NORMAL
5614             || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
5615         {
5616             const bool in_comment = *s == '#';
5617             if (*s == '#' && s == PL_linestart && PL_in_eval
5618              && !PL_rsfp && !PL_parser->filtered) {
5619                 /* handle eval qq[#line 1 "foo"\n ...] */
5620                 CopLINE_dec(PL_curcop);
5621                 incline(s, PL_bufend);
5622             }
5623             d = s;
5624             while (d < PL_bufend && *d != '\n')
5625                 d++;
5626             if (d < PL_bufend)
5627                 d++;
5628             s = d;
5629             if (in_comment && d == PL_bufend
5630                 && PL_lex_state == LEX_INTERPNORMAL
5631                 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5632                 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5633             else
5634                 incline(s, PL_bufend);
5635             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5636                 PL_lex_state = LEX_FORMLINE;
5637                 force_next(FORMRBRACK);
5638                 TOKEN(';');
5639             }
5640         }
5641         else {
5642             while (s < PL_bufend && *s != '\n')
5643                 s++;
5644             if (s < PL_bufend)
5645                 {
5646                     s++;
5647                     if (s < PL_bufend)
5648                         incline(s, PL_bufend);
5649                 }
5650         }
5651         goto retry;
5652     case '-':
5653         if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5654             I32 ftst = 0;
5655             char tmp;
5656
5657             s++;
5658             PL_bufptr = s;
5659             tmp = *s++;
5660
5661             while (s < PL_bufend && SPACE_OR_TAB(*s))
5662                 s++;
5663
5664             if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5665                 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5666                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5667                 OPERATOR('-');          /* unary minus */
5668             }
5669             switch (tmp) {
5670             case 'r': ftst = OP_FTEREAD;        break;
5671             case 'w': ftst = OP_FTEWRITE;       break;
5672             case 'x': ftst = OP_FTEEXEC;        break;
5673             case 'o': ftst = OP_FTEOWNED;       break;
5674             case 'R': ftst = OP_FTRREAD;        break;
5675             case 'W': ftst = OP_FTRWRITE;       break;
5676             case 'X': ftst = OP_FTREXEC;        break;
5677             case 'O': ftst = OP_FTROWNED;       break;
5678             case 'e': ftst = OP_FTIS;           break;
5679             case 'z': ftst = OP_FTZERO;         break;
5680             case 's': ftst = OP_FTSIZE;         break;
5681             case 'f': ftst = OP_FTFILE;         break;
5682             case 'd': ftst = OP_FTDIR;          break;
5683             case 'l': ftst = OP_FTLINK;         break;
5684             case 'p': ftst = OP_FTPIPE;         break;
5685             case 'S': ftst = OP_FTSOCK;         break;
5686             case 'u': ftst = OP_FTSUID;         break;
5687             case 'g': ftst = OP_FTSGID;         break;
5688             case 'k': ftst = OP_FTSVTX;         break;
5689             case 'b': ftst = OP_FTBLK;          break;
5690             case 'c': ftst = OP_FTCHR;          break;
5691             case 't': ftst = OP_FTTTY;          break;
5692             case 'T': ftst = OP_FTTEXT;         break;
5693             case 'B': ftst = OP_FTBINARY;       break;
5694             case 'M': case 'A': case 'C':
5695                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5696                 switch (tmp) {
5697                 case 'M': ftst = OP_FTMTIME;    break;
5698                 case 'A': ftst = OP_FTATIME;    break;
5699                 case 'C': ftst = OP_FTCTIME;    break;
5700                 default:                        break;
5701                 }
5702                 break;
5703             default:
5704                 break;
5705             }
5706             if (ftst) {
5707                 PL_last_uni = PL_oldbufptr;
5708                 PL_last_lop_op = (OPCODE)ftst;
5709                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5710                         "### Saw file test %c\n", (int)tmp);
5711                 } );
5712                 FTST(ftst);
5713             }
5714             else {
5715                 /* Assume it was a minus followed by a one-letter named
5716                  * subroutine call (or a -bareword), then. */
5717                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5718                         "### '-%c' looked like a file test but was not\n",
5719                         (int) tmp);
5720                 } );
5721                 s = --PL_bufptr;
5722             }
5723         }
5724         {
5725             const char tmp = *s++;
5726             if (*s == tmp) {
5727                 s++;
5728                 if (PL_expect == XOPERATOR)
5729                     TERM(POSTDEC);
5730                 else
5731                     OPERATOR(PREDEC);
5732             }
5733             else if (*s == '>') {
5734                 s++;
5735                 s = skipspace(s);
5736                 if (((*s == '$' || *s == '&') && s[1] == '*')
5737                   ||(*s == '$' && s[1] == '#' && s[2] == '*')
5738                   ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5739                   ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5740                  )
5741                 {
5742                     PL_expect = XPOSTDEREF;
5743                     TOKEN(ARROW);
5744                 }
5745                 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5746                     s = force_word(s,METHOD,FALSE,TRUE);
5747                     TOKEN(ARROW);
5748                 }
5749                 else if (*s == '$')
5750                     OPERATOR(ARROW);
5751                 else
5752                     TERM(ARROW);
5753             }
5754             if (PL_expect == XOPERATOR) {
5755                 if (*s == '='
5756                     && !PL_lex_allbrackets
5757                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5758                 {
5759                     s--;
5760                     TOKEN(0);
5761                 }
5762                 Aop(OP_SUBTRACT);
5763             }
5764             else {
5765                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5766                     check_uni();
5767                 OPERATOR('-');          /* unary minus */
5768             }
5769         }
5770
5771     case '+':
5772         {
5773             const char tmp = *s++;
5774             if (*s == tmp) {
5775                 s++;
5776                 if (PL_expect == XOPERATOR)
5777                     TERM(POSTINC);
5778                 else
5779                     OPERATOR(PREINC);
5780             }
5781             if (PL_expect == XOPERATOR) {
5782                 if (*s == '='
5783                     && !PL_lex_allbrackets
5784                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5785                 {
5786                     s--;
5787                     TOKEN(0);
5788                 }
5789                 Aop(OP_ADD);
5790             }
5791             else {
5792                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5793                     check_uni();
5794                 OPERATOR('+');
5795             }
5796         }
5797
5798     case '*':
5799         if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5800         if (PL_expect != XOPERATOR) {
5801             s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5802             PL_expect = XOPERATOR;
5803             force_ident(PL_tokenbuf, '*');
5804             if (!*PL_tokenbuf)
5805                 PREREF('*');
5806             TERM('*');
5807         }
5808         s++;
5809         if (*s == '*') {
5810             s++;
5811             if (*s == '=' && !PL_lex_allbrackets
5812                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5813             {
5814                 s -= 2;
5815                 TOKEN(0);
5816             }
5817             PWop(OP_POW);
5818         }
5819         if (*s == '='
5820             && !PL_lex_allbrackets
5821             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5822         {
5823             s--;
5824             TOKEN(0);
5825         }
5826         PL_parser->saw_infix_sigil = 1;
5827         Mop(OP_MULTIPLY);
5828
5829     case '%':
5830     {
5831         if (PL_expect == XOPERATOR) {
5832             if (s[1] == '='
5833                 && !PL_lex_allbrackets
5834                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5835             {
5836                 TOKEN(0);
5837             }
5838             ++s;
5839             PL_parser->saw_infix_sigil = 1;
5840             Mop(OP_MODULO);
5841         }
5842         else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5843         PL_tokenbuf[0] = '%';
5844         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5845         pl_yylval.ival = 0;
5846         if (!PL_tokenbuf[1]) {
5847             PREREF('%');
5848         }
5849         if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5850             && intuit_more(s, PL_bufend)) {
5851             if (*s == '[')
5852                 PL_tokenbuf[0] = '@';
5853         }
5854         PL_expect = XOPERATOR;
5855         force_ident_maybe_lex('%');
5856         TERM('%');
5857     }
5858     case '^':
5859         d = s;
5860         bof = FEATURE_BITWISE_IS_ENABLED;
5861         if (bof && s[1] == '.')
5862             s++;
5863         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5864                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5865         {
5866             s = d;
5867             TOKEN(0);
5868         }
5869         s++;
5870         BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5871     case '[':
5872         if (PL_lex_brackets > 100)
5873             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5874         PL_lex_brackstack[PL_lex_brackets++] = 0;
5875         PL_lex_allbrackets++;
5876         {
5877             const char tmp = *s++;
5878             OPERATOR(tmp);
5879         }
5880     case '~':
5881         if (s[1] == '~'
5882             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5883         {
5884             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5885                 TOKEN(0);
5886             s += 2;
5887             Perl_ck_warner_d(aTHX_
5888                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5889                 "Smartmatch is experimental");
5890             Eop(OP_SMARTMATCH);
5891         }
5892         s++;
5893         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
5894             s++;
5895             BCop(OP_SCOMPLEMENT);
5896         }
5897         BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
5898     case ',':
5899         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5900             TOKEN(0);
5901         s++;
5902         OPERATOR(',');
5903     case ':':
5904         if (s[1] == ':') {
5905             len = 0;
5906             goto just_a_word_zero_gv;
5907         }
5908         s++;
5909         {
5910         OP *attrs;
5911
5912         switch (PL_expect) {
5913         case XOPERATOR:
5914             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5915                 break;
5916             PL_bufptr = s;      /* update in case we back off */
5917             if (*s == '=') {
5918                 Perl_croak(aTHX_
5919                            "Use of := for an empty attribute list is not allowed");
5920             }
5921             goto grabattrs;
5922         case XATTRBLOCK:
5923             PL_expect = XBLOCK;
5924             goto grabattrs;
5925         case XATTRTERM:
5926             PL_expect = XTERMBLOCK;
5927          grabattrs:
5928             s = skipspace(s);
5929             attrs = NULL;
5930             while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5931                 I32 tmp;
5932                 SV *sv;
5933                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5934                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5935                     if (tmp < 0) tmp = -tmp;
5936                     switch (tmp) {
5937                     case KEY_or:
5938                     case KEY_and:
5939                     case KEY_for:
5940                     case KEY_foreach:
5941                     case KEY_unless:
5942                     case KEY_if:
5943                     case KEY_while:
5944                     case KEY_until:
5945                         goto got_attrs;
5946                     default:
5947                         break;
5948                     }
5949                 }
5950                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5951                 if (*d == '(') {
5952                     d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5953                     if (!d) {
5954                         if (attrs)
5955                             op_free(attrs);
5956                         sv_free(sv);
5957                         Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5958                     }
5959                     COPLINE_SET_FROM_MULTI_END;
5960                 }
5961                 if (PL_lex_stuff) {
5962                     sv_catsv(sv, PL_lex_stuff);
5963                     attrs = op_append_elem(OP_LIST, attrs,
5964                                         newSVOP(OP_CONST, 0, sv));
5965                     SvREFCNT_dec_NN(PL_lex_stuff);
5966                     PL_lex_stuff = NULL;
5967                 }
5968                 else {
5969                     /* NOTE: any CV attrs applied here need to be part of
5970                        the CVf_BUILTIN_ATTRS define in cv.h! */
5971                     if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
5972                         sv_free(sv);
5973                         CvLVALUE_on(PL_compcv);
5974                     }
5975                     else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
5976                         sv_free(sv);
5977                         CvMETHOD_on(PL_compcv);
5978                     }
5979                     else if (!PL_in_my && memEQs(SvPVX(sv), len, "const"))
5980                     {
5981                         sv_free(sv);
5982                         Perl_ck_warner_d(aTHX_
5983                             packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5984                            ":const is experimental"
5985                         );
5986                         CvANONCONST_on(PL_compcv);
5987                         if (!CvANON(PL_compcv))
5988                             yyerror(":const is not permitted on named "
5989                                     "subroutines");
5990                     }
5991                     /* After we've set the flags, it could be argued that
5992                        we don't need to do the attributes.pm-based setting
5993                        process, and shouldn't bother appending recognized
5994                        flags.  To experiment with that, uncomment the
5995                        following "else".  (Note that's already been
5996                        uncommented.  That keeps the above-applied built-in
5997                        attributes from being intercepted (and possibly
5998                        rejected) by a package's attribute routines, but is
5999                        justified by the performance win for the common case
6000                        of applying only built-in attributes.) */
6001                     else
6002                         attrs = op_append_elem(OP_LIST, attrs,
6003                                             newSVOP(OP_CONST, 0,
6004                                                     sv));
6005                 }
6006                 s = skipspace(d);
6007                 if (*s == ':' && s[1] != ':')
6008                     s = skipspace(s+1);
6009                 else if (s == d)
6010                     break;      /* require real whitespace or :'s */
6011                 /* XXX losing whitespace on sequential attributes here */
6012             }
6013             {
6014                 if (*s != ';'
6015                     && *s != '}'
6016                     && !(PL_expect == XOPERATOR
6017                          ? (*s == '=' ||  *s == ')')
6018                          : (*s == '{' ||  *s == '(')))
6019                 {
6020                     const char q = ((*s == '\'') ? '"' : '\'');
6021                     /* If here for an expression, and parsed no attrs, back
6022                        off. */
6023                     if (PL_expect == XOPERATOR && !attrs) {
6024                         s = PL_bufptr;
6025                         break;
6026                     }
6027                     /* MUST advance bufptr here to avoid bogus "at end of line"
6028                        context messages from yyerror().
6029                     */
6030                     PL_bufptr = s;
6031                     yyerror( (const char *)
6032                              (*s
6033                               ? Perl_form(aTHX_ "Invalid separator character "
6034                                           "%c%c%c in attribute list", q, *s, q)
6035                               : "Unterminated attribute list" ) );
6036                     if (attrs)
6037                         op_free(attrs);
6038                     OPERATOR(':');
6039                 }
6040             }
6041         got_attrs:
6042             if (attrs) {
6043                 NEXTVAL_NEXTTOKE.opval = attrs;
6044                 force_next(THING);
6045             }
6046             TOKEN(COLONATTR);
6047         }
6048         }
6049         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6050             s--;
6051             TOKEN(0);
6052         }
6053         PL_lex_allbrackets--;
6054         OPERATOR(':');
6055     case '(':
6056         s++;
6057         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6058             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
6059         else
6060             PL_expect = XTERM;
6061         s = skipspace(s);
6062         PL_lex_allbrackets++;
6063         TOKEN('(');
6064     case ';':
6065         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6066             TOKEN(0);
6067         CLINE;
6068         s++;
6069         PL_expect = XSTATE;
6070         TOKEN(';');
6071     case ')':
6072         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6073             TOKEN(0);
6074         s++;
6075         PL_lex_allbrackets--;
6076         s = skipspace(s);
6077         if (*s == '{')
6078             PREBLOCK(')');
6079         TERM(')');
6080     case ']':
6081         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6082             TOKEN(0);
6083         s++;
6084         if (PL_lex_brackets <= 0)
6085             /* diag_listed_as: Unmatched right %s bracket */
6086             yyerror("Unmatched right square bracket");
6087         else
6088             --PL_lex_brackets;
6089         PL_lex_allbrackets--;
6090         if (PL_lex_state == LEX_INTERPNORMAL) {
6091             if (PL_lex_brackets == 0) {
6092                 if (*s == '-' && s[1] == '>')
6093                     PL_lex_state = LEX_INTERPENDMAYBE;
6094                 else if (*s != '[' && *s != '{')
6095                     PL_lex_state = LEX_INTERPEND;
6096             }
6097         }
6098         TERM(']');
6099     case '{':
6100         s++;
6101       leftbracket:
6102         if (PL_lex_brackets > 100) {
6103             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6104         }
6105         switch (PL_expect) {
6106         case XTERM:
6107         case XTERMORDORDOR:
6108             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6109             PL_lex_allbrackets++;
6110             OPERATOR(HASHBRACK);
6111         case XOPERATOR:
6112             while (s < PL_bufend && SPACE_OR_TAB(*s))
6113                 s++;
6114             d = s;
6115             PL_tokenbuf[0] = '\0';
6116             if (d < PL_bufend && *d == '-') {
6117                 PL_tokenbuf[0] = '-';
6118                 d++;
6119                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6120                     d++;
6121             }
6122             if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6123                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6124                               FALSE, &len);
6125                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6126                     d++;
6127                 if (*d == '}') {
6128                     const char minus = (PL_tokenbuf[0] == '-');
6129                     s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6130                     if (minus)
6131                         force_next('-');
6132                 }
6133             }
6134             /* FALLTHROUGH */
6135         case XATTRTERM:
6136         case XTERMBLOCK:
6137             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6138             PL_lex_allbrackets++;
6139             PL_expect = XSTATE;
6140             break;
6141         case XATTRBLOCK:
6142         case XBLOCK:
6143             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6144             PL_lex_allbrackets++;
6145             PL_expect = XSTATE;
6146             break;
6147         case XBLOCKTERM:
6148             PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6149             PL_lex_allbrackets++;
6150             PL_expect = XSTATE;
6151             break;
6152         default: {
6153                 const char *t;
6154                 if (PL_oldoldbufptr == PL_last_lop)
6155                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6156                 else
6157                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6158                 PL_lex_allbrackets++;
6159                 s = skipspace(s);
6160                 if (*s == '}') {
6161                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6162                         PL_expect = XTERM;
6163                         /* This hack is to get the ${} in the message. */
6164                         PL_bufptr = s+1;
6165                         yyerror("syntax error");
6166                         break;
6167                     }
6168                     OPERATOR(HASHBRACK);
6169                 }
6170                 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6171                     /* ${...} or @{...} etc., but not print {...}
6172                      * Skip the disambiguation and treat this as a block.
6173                      */
6174                     goto block_expectation;
6175                 }
6176                 /* This hack serves to disambiguate a pair of curlies
6177                  * as being a block or an anon hash.  Normally, expectation
6178                  * determines that, but in cases where we're not in a
6179                  * position to expect anything in particular (like inside
6180                  * eval"") we have to resolve the ambiguity.  This code
6181                  * covers the case where the first term in the curlies is a
6182                  * quoted string.  Most other cases need to be explicitly
6183                  * disambiguated by prepending a "+" before the opening
6184                  * curly in order to force resolution as an anon hash.
6185                  *
6186                  * XXX should probably propagate the outer expectation
6187                  * into eval"" to rely less on this hack, but that could
6188                  * potentially break current behavior of eval"".
6189                  * GSAR 97-07-21
6190                  */
6191                 t = s;
6192                 if (*s == '\'' || *s == '"' || *s == '`') {
6193                     /* common case: get past first string, handling escapes */
6194                     for (t++; t < PL_bufend && *t != *s;)
6195                         if (*t++ == '\\')
6196                             t++;
6197                     t++;
6198                 }
6199                 else if (*s == 'q') {
6200                     if (++t < PL_bufend
6201                         && (!isWORDCHAR(*t)
6202                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6203                                 && !isWORDCHAR(*t))))
6204                     {
6205                         /* skip q//-like construct */
6206                         const char *tmps;
6207                         char open, close, term;
6208                         I32 brackets = 1;
6209
6210                         while (t < PL_bufend && isSPACE(*t))
6211                             t++;
6212                         /* check for q => */
6213                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6214                             OPERATOR(HASHBRACK);
6215                         }
6216                         term = *t;
6217                         open = term;
6218                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6219                             term = tmps[5];
6220                         close = term;
6221                         if (open == close)
6222                             for (t++; t < PL_bufend; t++) {
6223                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6224                                     t++;
6225                                 else if (*t == open)
6226                                     break;
6227                             }
6228                         else {
6229                             for (t++; t < PL_bufend; t++) {
6230                                 if (*t == '\\' && t+1 < PL_bufend)
6231                                     t++;
6232                                 else if (*t == close && --brackets <= 0)
6233                                     break;
6234                                 else if (*t == open)
6235                                     brackets++;
6236                             }
6237                         }
6238                         t++;
6239                     }
6240                     else
6241                         /* skip plain q word */
6242                         while (   t < PL_bufend
6243                                && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6244                         {
6245                             t += UTF ? UTF8SKIP(t) : 1;
6246                         }
6247                 }
6248                 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6249                     t += UTF ? UTF8SKIP(t) : 1;
6250                     while (   t < PL_bufend
6251                            && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6252                     {
6253                         t += UTF ? UTF8SKIP(t) : 1;
6254                     }
6255                 }
6256                 while (t < PL_bufend && isSPACE(*t))
6257                     t++;
6258                 /* if comma follows first term, call it an anon hash */
6259                 /* XXX it could be a comma expression with loop modifiers */
6260                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6261                                    || (*t == '=' && t[1] == '>')))
6262                     OPERATOR(HASHBRACK);
6263                 if (PL_expect == XREF)
6264                 {
6265                   block_expectation:
6266                     /* If there is an opening brace or 'sub:', treat it
6267                        as a term to make ${{...}}{k} and &{sub:attr...}
6268                        dwim.  Otherwise, treat it as a statement, so
6269                        map {no strict; ...} works.
6270                      */
6271                     s = skipspace(s);
6272                     if (*s == '{') {
6273                         PL_expect = XTERM;
6274                         break;
6275                     }
6276                     if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6277                         PL_bufptr = s;
6278                         d = s + 3;
6279                         d = skipspace(d);
6280                         s = PL_bufptr;
6281                         if (*d == ':') {
6282                             PL_expect = XTERM;
6283                             break;
6284                         }
6285                     }
6286                     PL_expect = XSTATE;
6287                 }
6288                 else {
6289                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6290                     PL_expect = XSTATE;
6291                 }
6292             }
6293             break;
6294         }
6295         pl_yylval.ival = CopLINE(PL_curcop);
6296         PL_copline = NOLINE;   /* invalidate current command line number */
6297         TOKEN(formbrack ? '=' : '{');
6298     case '}':
6299         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6300             TOKEN(0);
6301       rightbracket:
6302         assert(s != PL_bufend);
6303         s++;
6304         if (PL_lex_brackets <= 0)
6305             /* diag_listed_as: Unmatched right %s bracket */
6306             yyerror("Unmatched right curly bracket");
6307         else
6308             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6309         PL_lex_allbrackets--;
6310         if (PL_lex_state == LEX_INTERPNORMAL) {
6311             if (PL_lex_brackets == 0) {
6312                 if (PL_expect & XFAKEBRACK) {
6313                     PL_expect &= XENUMMASK;
6314                     PL_lex_state = LEX_INTERPEND;
6315                     PL_bufptr = s;
6316                     return yylex();     /* ignore fake brackets */
6317                 }
6318                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6319                  && SvEVALED(PL_lex_repl))
6320                     PL_lex_state = LEX_INTERPEND;
6321                 else if (*s == '-' && s[1] == '>')
6322                     PL_lex_state = LEX_INTERPENDMAYBE;
6323                 else if (*s != '[' && *s != '{')
6324                     PL_lex_state = LEX_INTERPEND;
6325             }
6326         }
6327         if (PL_expect & XFAKEBRACK) {
6328             PL_expect &= XENUMMASK;
6329             PL_bufptr = s;
6330             return yylex();             /* ignore fake brackets */
6331         }
6332         force_next(formbrack ? '.' : '}');
6333         if (formbrack) LEAVE_with_name("lex_format");
6334         if (formbrack == 2) { /* means . where arguments were expected */
6335             force_next(';');
6336             TOKEN(FORMRBRACK);
6337         }
6338         TOKEN(';');
6339     case '&':
6340         if (PL_expect == XPOSTDEREF) POSTDEREF('&');
6341         s++;
6342         if (*s++ == '&') {
6343             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6344                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6345                 s -= 2;
6346                 TOKEN(0);
6347             }
6348             AOPERATOR(ANDAND);
6349         }
6350         s--;
6351         if (PL_expect == XOPERATOR) {
6352             if (   PL_bufptr == PL_linestart
6353                 && ckWARN(WARN_SEMICOLON)
6354                 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6355             {
6356                 CopLINE_dec(PL_curcop);
6357                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6358                 CopLINE_inc(PL_curcop);
6359             }
6360             d = s;
6361             if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6362                 s++;
6363             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6364                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6365                 s = d;
6366                 s--;
6367                 TOKEN(0);
6368             }
6369             if (d == s) {
6370                 PL_parser->saw_infix_sigil = 1;
6371                 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6372             }
6373             else
6374                 BAop(OP_SBIT_AND);
6375         }
6376
6377         PL_tokenbuf[0] = '&';
6378         s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6379         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6380         if (PL_tokenbuf[1]) {
6381             force_ident_maybe_lex('&');
6382         }
6383         else
6384             PREREF('&');
6385         TERM('&');
6386
6387     case '|':
6388         s++;
6389         if (*s++ == '|') {
6390             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6391                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6392                 s -= 2;
6393                 TOKEN(0);
6394             }
6395             AOPERATOR(OROR);
6396         }
6397         s--;
6398         d = s;
6399         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6400             s++;
6401         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6402                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6403             s = d - 1;
6404             TOKEN(0);
6405         }
6406         BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6407     case '=':
6408         s++;
6409         {
6410             const char tmp = *s++;
6411             if (tmp == '=') {
6412                 if (   (s == PL_linestart+2 || s[-3] == '\n')
6413                     && memBEGINs(s, (STRLEN) (PL_bufend - s), "====="))
6414                 {
6415                     s = vcs_conflict_marker(s + 5);
6416                     goto retry;
6417                 }
6418                 if (!PL_lex_allbrackets
6419                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6420                 {
6421                     s -= 2;
6422                     TOKEN(0);
6423                 }
6424                 Eop(OP_EQ);
6425             }
6426             if (tmp == '>') {
6427                 if (!PL_lex_allbrackets
6428                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
6429                 {
6430                     s -= 2;
6431                     TOKEN(0);
6432                 }
6433                 OPERATOR(',');
6434             }
6435             if (tmp == '~')
6436                 PMop(OP_MATCH);
6437             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6438                 && strchr("+-*/%.^&|<",tmp))
6439                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6440                             "Reversed %c= operator",(int)tmp);
6441             s--;
6442             if (PL_expect == XSTATE
6443                 && isALPHA(tmp)
6444                 && (s == PL_linestart+1 || s[-2] == '\n') )
6445             {
6446                 if (   (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6447                     || PL_lex_state != LEX_NORMAL)
6448                 {
6449                     d = PL_bufend;
6450                     while (s < d) {
6451                         if (*s++ == '\n') {
6452                             incline(s, PL_bufend);
6453                             if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
6454                             {
6455                                 s = (char *) memchr(s,'\n', d - s);
6456                                 if (s)
6457                                     s++;
6458                                 else
6459                                     s = d;
6460                                 incline(s, PL_bufend);
6461                                 goto retry;
6462                             }
6463                         }
6464                     }
6465                     goto retry;
6466                 }
6467                 s = PL_bufend;
6468                 PL_parser->in_pod = 1;
6469                 goto retry;
6470             }
6471         }
6472         if (PL_expect == XBLOCK) {
6473             const char *t = s;
6474 #ifdef PERL_STRICT_CR
6475             while (SPACE_OR_TAB(*t))
6476 #else
6477             while (SPACE_OR_TAB(*t) || *t == '\r')
6478 #endif
6479                 t++;
6480             if (*t == '\n' || *t == '#') {
6481                 formbrack = 1;
6482                 ENTER_with_name("lex_format");
6483                 SAVEI8(PL_parser->form_lex_state);
6484                 SAVEI32(PL_lex_formbrack);
6485                 PL_parser->form_lex_state = PL_lex_state;
6486                 PL_lex_formbrack = PL_lex_brackets + 1;
6487                 goto leftbracket;
6488             }
6489         }
6490         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6491             s--;
6492             TOKEN(0);
6493         }
6494         pl_yylval.ival = 0;
6495         OPERATOR(ASSIGNOP);
6496     case '!':
6497         s++;
6498         {
6499             const char tmp = *s++;
6500             if (tmp == '=') {
6501                 /* was this !=~ where !~ was meant?
6502                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6503
6504                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6505                     const char *t = s+1;
6506
6507                     while (t < PL_bufend && isSPACE(*t))
6508                         ++t;
6509
6510                     if (*t == '/' || *t == '?'
6511                         || ((*t == 'm' || *t == 's' || *t == 'y')
6512                             && !isWORDCHAR(t[1]))
6513                         || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6514                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6515                                     "!=~ should be !~");
6516                 }
6517                 if (!PL_lex_allbrackets
6518                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6519                 {
6520                     s -= 2;
6521                     TOKEN(0);
6522                 }
6523                 Eop(OP_NE);
6524             }
6525             if (tmp == '~')
6526                 PMop(OP_NOT);
6527         }
6528         s--;
6529         OPERATOR('!');
6530     case '<':
6531         if (PL_expect != XOPERATOR) {
6532             if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6533                 check_uni();
6534             if (s[1] == '<' && s[2] != '>') {
6535                 if (   (s == PL_linestart || s[-1] == '\n')
6536                     && memBEGINs(s+2, (STRLEN) (PL_bufend - (s+2)), "<<<<<"))
6537                 {
6538                     s = vcs_conflict_marker(s + 7);
6539                     goto retry;
6540                 }
6541                 s = scan_heredoc(s);
6542             }
6543             else
6544                 s = scan_inputsymbol(s);
6545             PL_expect = XOPERATOR;
6546             TOKEN(sublex_start());
6547         }
6548         s++;
6549         {
6550             char tmp = *s++;
6551             if (tmp == '<') {
6552                 if (   (s == PL_linestart+2 || s[-3] == '\n')
6553                     && memBEGINs(s, (STRLEN) (PL_bufend - s), "<<<<<"))
6554                 {
6555                     s = vcs_conflict_marker(s + 5);
6556                     goto retry;
6557                 }
6558                 if (*s == '=' && !PL_lex_allbrackets
6559                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6560                 {
6561                     s -= 2;
6562                     TOKEN(0);
6563                 }
6564                 SHop(OP_LEFT_SHIFT);
6565             }
6566             if (tmp == '=') {
6567                 tmp = *s++;
6568                 if (tmp == '>') {
6569                     if (!PL_lex_allbrackets
6570                         && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6571                     {
6572                         s -= 3;
6573                         TOKEN(0);
6574                     }
6575                     Eop(OP_NCMP);
6576                 }
6577                 s--;
6578                 if (!PL_lex_allbrackets
6579                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6580                 {
6581                     s -= 2;
6582                     TOKEN(0);
6583                 }
6584                 Rop(OP_LE);
6585             }
6586         }
6587         s--;
6588         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6589             s--;
6590             TOKEN(0);
6591         }
6592         Rop(OP_LT);
6593     case '>':
6594         s++;
6595         {
6596             const char tmp = *s++;
6597             if (tmp == '>') {
6598                 if (   (s == PL_linestart+2 || s[-3] == '\n')
6599                     && memBEGINs(s, (STRLEN) (PL_bufend - s), ">>>>>"))
6600                 {
6601                     s = vcs_conflict_marker(s + 5);
6602                     goto retry;
6603                 }
6604                 if (*s == '=' && !PL_lex_allbrackets
6605                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6606                 {
6607                     s -= 2;
6608                     TOKEN(0);
6609                 }
6610                 SHop(OP_RIGHT_SHIFT);
6611             }
6612             else if (tmp == '=') {
6613                 if (!PL_lex_allbrackets
6614                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6615                 {
6616                     s -= 2;
6617                     TOKEN(0);
6618                 }
6619                 Rop(OP_GE);
6620             }
6621         }
6622         s--;
6623         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6624             s--;
6625             TOKEN(0);
6626         }
6627         Rop(OP_GT);
6628
6629     case '$':
6630         CLINE;
6631
6632         if (PL_expect == XPOSTDEREF) {
6633             if (s[1] == '#') {
6634                 s++;
6635                 POSTDEREF(DOLSHARP);
6636             }
6637             POSTDEREF('$');
6638         }
6639
6640         if (   s[1] == '#'
6641             && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
6642                 || strchr("{$:+-@", s[2])))
6643         {
6644             PL_tokenbuf[0] = '@';
6645             s = scan_ident(s + 1, PL_tokenbuf + 1,
6646                            sizeof PL_tokenbuf - 1, FALSE);
6647             if (PL_expect == XOPERATOR) {
6648                 d = s;
6649                 if (PL_bufptr > s) {
6650                     d = PL_bufptr-1;
6651                     PL_bufptr = PL_oldbufptr;
6652                 }
6653                 no_op("Array length", d);
6654             }
6655             if (!PL_tokenbuf[1])
6656                 PREREF(DOLSHARP);
6657             PL_expect = XOPERATOR;
6658             force_ident_maybe_lex('#');
6659             TOKEN(DOLSHARP);
6660         }
6661
6662         PL_tokenbuf[0] = '$';
6663         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6664         if (PL_expect == XOPERATOR) {
6665             d = s;
6666             if (PL_bufptr > s) {
6667                 d = PL_bufptr-1;
6668                 PL_bufptr = PL_oldbufptr;
6669             }
6670             no_op("Scalar", d);
6671         }
6672         if (!PL_tokenbuf[1]) {
6673             if (s == PL_bufend)
6674                 yyerror("Final $ should be \\$ or $name");
6675             PREREF('$');
6676         }
6677
6678         d = s;
6679         {
6680             const char tmp = *s;
6681             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6682                 s = skipspace(s);
6683
6684             if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6685                 && intuit_more(s, PL_bufend)) {
6686                 if (*s == '[') {
6687                     PL_tokenbuf[0] = '@';
6688                     if (ckWARN(WARN_SYNTAX)) {
6689                         char *t = s+1;
6690
6691                         while (   isSPACE(*t)
6692                                || isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)
6693                                || *t == '$')
6694                         {
6695                             t += UTF ? UTF8SKIP(t) : 1;
6696                         }
6697                         if (*t++ == ',') {
6698                             PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
6699                             while (t < PL_bufend && *t != ']')
6700                                 t++;
6701                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6702                                         "Multidimensional syntax %" UTF8f " not supported",
6703                                         UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
6704                         }
6705                     }
6706                 }
6707                 else if (*s == '{') {
6708                     char *t;
6709                     PL_tokenbuf[0] = '%';
6710                     if (    strEQ(PL_tokenbuf+1, "SIG")
6711                         && ckWARN(WARN_SYNTAX)
6712                         && (t = (char *) memchr(s, '}', PL_bufend - s))
6713                         && (t = (char *) memchr(t, '=', PL_bufend - t)))
6714                     {
6715                         char tmpbuf[sizeof PL_tokenbuf];
6716                         do {
6717                             t++;
6718                         } while (isSPACE(*t));
6719                         if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
6720                             STRLEN len;
6721                             t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6722                                             &len);
6723                             while (isSPACE(*t))
6724                                 t++;
6725                             if (  *t == ';'
6726                                 && get_cvn_flags(tmpbuf, len, UTF
6727                                                                 ? SVf_UTF8
6728                                                                 : 0))
6729                             {
6730                                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6731                                     "You need to quote \"%" UTF8f "\"",
6732                                         UTF8fARG(UTF, len, tmpbuf));
6733                             }
6734                         }
6735                     }
6736                 }
6737             }
6738
6739             PL_expect = XOPERATOR;
6740             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6741                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6742                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6743                     PL_expect = XOPERATOR;
6744                 else if (strchr("$@\"'`q", *s))
6745                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6746                 else if (   strchr("&*<%", *s)
6747                          && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
6748                 {
6749                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6750                 }
6751                 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6752                     char tmpbuf[sizeof PL_tokenbuf];
6753                     int t2;
6754                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6755                     if ((t2 = keyword(tmpbuf, len, 0))) {
6756                         /* binary operators exclude handle interpretations */
6757                         switch (t2) {
6758                         case -KEY_x:
6759                         case -KEY_eq:
6760                         case -KEY_ne:
6761                         case -KEY_gt:
6762                         case -KEY_lt:
6763                         case -KEY_ge:
6764                         case -KEY_le:
6765                         case -KEY_cmp:
6766                             break;
6767                         default:
6768                             PL_expect = XTERM;  /* e.g. print $fh length() */
6769                             break;
6770                         }
6771                     }
6772                     else {
6773                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6774                     }
6775                 }
6776                 else if (isDIGIT(*s))
6777                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6778                 else if (*s == '.' && isDIGIT(s[1]))
6779                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6780                 else if ((*s == '?' || *s == '-' || *s == '+')
6781                          && !isSPACE(s[1]) && s[1] != '=')
6782                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6783                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6784                          && s[1] != '/')
6785                     PL_expect = XTERM;          /* e.g. print $fh /.../
6786                                                    XXX except DORDOR operator
6787                                                 */
6788                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6789                          && s[2] != '=')
6790                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6791             }
6792         }
6793         force_ident_maybe_lex('$');
6794         TOKEN('$');
6795
6796     case '@':
6797         if (PL_expect == XPOSTDEREF)
6798             POSTDEREF('@');
6799         PL_tokenbuf[0] = '@';
6800         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6801         if (PL_expect == XOPERATOR) {
6802             d = s;
6803             if (PL_bufptr > s) {
6804                 d = PL_bufptr-1;
6805                 PL_bufptr = PL_oldbufptr;
6806             }
6807             no_op("Array", d);
6808         }
6809         pl_yylval.ival = 0;
6810         if (!PL_tokenbuf[1]) {
6811             PREREF('@');
6812         }
6813         if (PL_lex_state == LEX_NORMAL)
6814             s = skipspace(s);
6815         if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6816             && intuit_more(s, PL_bufend))
6817         {
6818             if (*s == '{')
6819                 PL_tokenbuf[0] = '%';
6820
6821             /* Warn about @ where they meant $. */
6822             if (*s == '[' || *s == '{') {
6823                 if (ckWARN(WARN_SYNTAX)) {
6824                     S_check_scalar_slice(aTHX_ s);
6825                 }
6826             }
6827         }
6828         PL_expect = XOPERATOR;
6829         force_ident_maybe_lex('@');
6830         TERM('@');
6831
6832      case '/':                  /* may be division, defined-or, or pattern */
6833         if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6834             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6835                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6836                 TOKEN(0);
6837             s += 2;
6838             AOPERATOR(DORDOR);
6839         }
6840         else if (PL_expect == XOPERATOR) {
6841             s++;
6842             if (*s == '=' && !PL_lex_allbrackets
6843                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6844             {
6845                 s--;
6846                 TOKEN(0);
6847             }
6848             Mop(OP_DIVIDE);
6849         }
6850         else {
6851             /* Disable warning on "study /blah/" */
6852             if (    PL_oldoldbufptr == PL_last_uni
6853                 && (   *PL_last_uni != 's' || s - PL_last_uni < 5
6854                     || memNE(PL_last_uni, "study", 5)
6855                     || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6856              ))
6857                 check_uni();
6858             s = scan_pat(s,OP_MATCH);
6859             TERM(sublex_start());
6860         }
6861
6862      case '?':                  /* conditional */
6863         s++;
6864         if (!PL_lex_allbrackets
6865             && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
6866         {
6867             s--;
6868             TOKEN(0);
6869         }
6870         PL_lex_allbrackets++;
6871         OPERATOR('?');
6872
6873     case '.':
6874         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6875 #ifdef PERL_STRICT_CR
6876             && s[1] == '\n'
6877 #else
6878             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6879 #endif
6880             && (s == PL_linestart || s[-1] == '\n') )
6881         {
6882             PL_expect = XSTATE;
6883             formbrack = 2; /* dot seen where arguments expected */
6884             goto rightbracket;
6885         }
6886         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6887             s += 3;
6888             OPERATOR(YADAYADA);
6889         }
6890         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6891             char tmp = *s++;
6892             if (*s == tmp) {
6893                 if (!PL_lex_allbrackets
6894                     && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
6895                 {
6896                     s--;
6897                     TOKEN(0);
6898                 }
6899                 s++;
6900                 if (*s == tmp) {
6901                     s++;
6902                     pl_yylval.ival = OPf_SPECIAL;
6903                 }
6904                 else
6905                     pl_yylval.ival = 0;
6906                 OPERATOR(DOTDOT);
6907             }
6908             if (*s == '=' && !PL_lex_allbrackets
6909                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6910             {
6911                 s--;
6912                 TOKEN(0);
6913             }
6914             Aop(OP_CONCAT);
6915         }
6916         /* FALLTHROUGH */
6917     case '0': case '1': case '2': case '3': case '4':
6918     case '5': case '6': case '7': case '8': case '9':
6919         s = scan_num(s, &pl_yylval);
6920         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6921         if (PL_expect == XOPERATOR)
6922             no_op("Number",s);
6923         TERM(THING);
6924
6925     case '\'':
6926         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6927         if (!s)
6928             missingterm(NULL, 0);
6929         COPLINE_SET_FROM_MULTI_END;
6930         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6931         if (PL_expect == XOPERATOR) {
6932             no_op("String",s);
6933         }
6934         pl_yylval.ival = OP_CONST;
6935         TERM(sublex_start());
6936
6937     case '"':
6938         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6939         DEBUG_T( {
6940             if (s)
6941                 printbuf("### Saw string before %s\n", s);
6942             else
6943                 PerlIO_printf(Perl_debug_log,
6944                              "### Saw unterminated string\n");
6945         } );
6946         if (PL_expect == XOPERATOR) {
6947                 no_op("String",s);
6948         }
6949         if (!s)
6950             missingterm(NULL, 0);
6951         pl_yylval.ival = OP_CONST;
6952         /* FIXME. I think that this can be const if char *d is replaced by
6953            more localised variables.  */
6954         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6955             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6956                 pl_yylval.ival = OP_STRINGIFY;
6957                 break;
6958             }
6959         }
6960         if (pl_yylval.ival == OP_CONST)
6961             COPLINE_SET_FROM_MULTI_END;
6962         TERM(sublex_start());
6963
6964     case '`':
6965         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6966         DEBUG_T( {
6967             if (s)
6968                 printbuf("### Saw backtick string before %s\n", s);
6969             else
6970                 PerlIO_printf(Perl_debug_log,
6971                              "### Saw unterminated backtick string\n");
6972         } );
6973         if (PL_expect == XOPERATOR)
6974             no_op("Backticks",s);
6975         if (!s)
6976             missingterm(NULL, 0);
6977         pl_yylval.ival = OP_BACKTICK;
6978         TERM(sublex_start());
6979
6980     case '\\':
6981         s++;
6982         if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6983          && isDIGIT(*s))
6984             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6985                            *s, *s);
6986         if (PL_expect == XOPERATOR)
6987             no_op("Backslash",s);
6988         OPERATOR(REFGEN);
6989
6990     case 'v':
6991         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6992             char *start = s + 2;
6993             while (isDIGIT(*start) || *start == '_')
6994                 start++;
6995             if (*start == '.' && isDIGIT(start[1])) {
6996                 s = scan_num(s, &pl_yylval);
6997                 TERM(THING);
6998             }
6999             else if ((*start == ':' && start[1] == ':')
7000                   || (PL_expect == XSTATE && *start == ':'))
7001                 goto keylookup;
7002             else if (PL_expect == XSTATE) {
7003                 d = start;
7004                 while (d < PL_bufend && isSPACE(*d)) d++;
7005                 if (*d == ':') goto keylookup;
7006             }
7007             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
7008             if (!isALPHA(*start) && (PL_expect == XTERM
7009                         || PL_expect == XREF || PL_expect == XSTATE
7010                         || PL_expect == XTERMORDORDOR)) {
7011                 GV *const gv = gv_fetchpvn_flags(s, start - s,
7012                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
7013                 if (!gv) {
7014                     s = scan_num(s, &pl_yylval);
7015                     TERM(THING);
7016                 }
7017             }
7018         }
7019         goto keylookup;
7020     case 'x':
7021         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
7022             s++;
7023             Mop(OP_REPEAT);
7024         }
7025         goto keylookup;
7026
7027     case '_':
7028     case 'a': case 'A':
7029     case 'b': case 'B':
7030     case 'c': case 'C':
7031     case 'd': case 'D':
7032     case 'e': case 'E':
7033     case 'f': case 'F':
7034     case 'g': case 'G':
7035     case 'h': case 'H':
7036     case 'i': case 'I':
7037     case 'j': case 'J':
7038     case 'k': case 'K':
7039     case 'l': case 'L':
7040     case 'm': case 'M':
7041     case 'n': case 'N':
7042     case 'o': case 'O':
7043     case 'p': case 'P':
7044     case 'q': case 'Q':
7045     case 'r': case 'R':
7046     case 's': case 'S':
7047     case 't': case 'T':
7048     case 'u': case 'U':
7049               case 'V':
7050     case 'w': case 'W':
7051               case 'X':
7052     case 'y': case 'Y':
7053     case 'z': case 'Z':
7054
7055       keylookup: {
7056         bool anydelim;
7057         bool lex;
7058         I32 tmp;
7059         SV *sv;
7060         CV *cv;
7061         PADOFFSET off;
7062         OP *rv2cv_op;
7063
7064         lex = FALSE;
7065         orig_keyword = 0;
7066         off = 0;
7067         sv = NULL;
7068         cv = NULL;
7069         gv = NULL;
7070         gvp = NULL;
7071         rv2cv_op = NULL;
7072
7073         PL_bufptr = s;
7074         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7075
7076         /* Some keywords can be followed by any delimiter, including ':' */
7077         anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
7078
7079         /* x::* is just a word, unless x is "CORE" */
7080         if (!anydelim && *s == ':' && s[1] == ':') {
7081             if (memEQs(PL_tokenbuf, len, "CORE")) goto case_KEY_CORE;
7082             goto just_a_word;
7083         }
7084
7085         d = s;
7086         while (d < PL_bufend && isSPACE(*d))
7087                 d++;    /* no comments skipped here, or s### is misparsed */
7088
7089         /* Is this a word before a => operator? */
7090         if (*d == '=' && d[1] == '>') {
7091           fat_arrow:
7092             CLINE;
7093             pl_yylval.opval
7094                 = newSVOP(OP_CONST, 0,
7095                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7096             pl_yylval.opval->op_private = OPpCONST_BARE;
7097             TERM(BAREWORD);
7098         }
7099
7100         /* Check for plugged-in keyword */
7101         {
7102             OP *o;
7103             int result;
7104             char *saved_bufptr = PL_bufptr;
7105             PL_bufptr = s;
7106             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
7107             s = PL_bufptr;
7108             if (result == KEYWORD_PLUGIN_DECLINE) {
7109                 /* not a plugged-in keyword */
7110                 PL_bufptr = saved_bufptr;
7111             } else if (result == KEYWORD_PLUGIN_STMT) {
7112                 pl_yylval.opval = o;
7113                 CLINE;
7114                 if (!PL_nexttoke) PL_expect = XSTATE;
7115                 return REPORT(PLUGSTMT);
7116             } else if (result == KEYWORD_PLUGIN_EXPR) {
7117                 pl_yylval.opval = o;
7118                 CLINE;
7119                 if (!PL_nexttoke) PL_expect = XOPERATOR;
7120                 return REPORT(PLUGEXPR);
7121             } else {
7122                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
7123                                         PL_tokenbuf);
7124             }
7125         }
7126
7127         /* Check for built-in keyword */
7128         tmp = keyword(PL_tokenbuf, len, 0);
7129
7130         /* Is this a label? */
7131         if (!anydelim && PL_expect == XSTATE
7132               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7133             s = d + 1;
7134             pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
7135             pl_yylval.pval[len] = '\0';
7136             pl_yylval.pval[len+1] = UTF ? 1 : 0;
7137             CLINE;
7138             TOKEN(LABEL);
7139         }
7140
7141         /* Check for lexical sub */
7142         if (PL_expect != XOPERATOR) {
7143             char tmpbuf[sizeof PL_tokenbuf + 1];
7144             *tmpbuf = '&';
7145             Copy(PL_tokenbuf, tmpbuf+1, len, char);
7146             off = pad_findmy_pvn(tmpbuf, len+1, 0);
7147             if (off != NOT_IN_PAD) {
7148                 assert(off); /* we assume this is boolean-true below */
7149                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7150                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
7151                     HEK * const stashname = HvNAME_HEK(stash);
7152                     sv = newSVhek(stashname);
7153                     sv_catpvs(sv, "::");
7154                     sv_catpvn_flags(sv, PL_tokenbuf, len,
7155                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
7156                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7157                                     SVt_PVCV);
7158                     off = 0;
7159                     if (!gv) {
7160                         sv_free(sv);
7161                         sv = NULL;
7162                         goto just_a_word;
7163                     }
7164                 }
7165                 else {
7166                     rv2cv_op = newOP(OP_PADANY, 0);
7167                     rv2cv_op->op_targ = off;
7168                     cv = find_lexical_cv(off);
7169                 }
7170                 lex = TRUE;
7171                 goto just_a_word;
7172             }
7173             off = 0;
7174         }
7175
7176         if (tmp < 0) {                  /* second-class keyword? */
7177             GV *ogv = NULL;     /* override (winner) */
7178             GV *hgv = NULL;     /* hidden (loser) */
7179             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7180                 CV *cv;
7181                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7182                                             (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
7183                                             SVt_PVCV))
7184                     && (cv = GvCVu(gv)))
7185                 {
7186                     if (GvIMPORTED_CV(gv))
7187                         ogv = gv;
7188                     else if (! CvMETHOD(cv))
7189                         hgv = gv;
7190                 }
7191                 if (!ogv
7192                     && (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7193                                                               len, FALSE))
7194                     && (gv = *gvp)
7195                     && (isGV_with_GP(gv)
7196                         ? GvCVu(gv) && GvIMPORTED_CV(gv)
7197                         :   SvPCS_IMPORTED(gv)
7198                         && (gv_init(gv, PL_globalstash, PL_tokenbuf,
7199                                                                  len, 0), 1)))
7200                 {
7201                     ogv = gv;
7202                 }
7203             }
7204             if (ogv) {
7205                 orig_keyword = tmp;
7206                 tmp = 0;                /* overridden by import or by GLOBAL */
7207             }
7208             else if (gv && !gvp
7209                      && -tmp==KEY_lock  /* XXX generalizable kludge */
7210                      && GvCVu(gv))
7211             {
7212                 tmp = 0;                /* any sub overrides "weak" keyword */
7213             }
7214             else {                      /* no override */
7215                 tmp = -tmp;
7216                 if (tmp == KEY_dump) {
7217                     Perl_ck_warner_d(aTHX_ packWARN2(WARN_MISC,WARN_DEPRECATED),
7218                                      "dump() better written as CORE::dump(). "
7219                                      "dump() will no longer be available "
7220                                      "in Perl 5.30");
7221                 }
7222                 gv = NULL;
7223                 gvp = 0;
7224                 if (hgv && tmp != KEY_x)        /* never ambiguous */
7225                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7226                                    "Ambiguous call resolved as CORE::%s(), "
7227                                    "qualify as such or use &",
7228                                    GvENAME(hgv));
7229             }
7230         }
7231
7232         if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7233          && (!anydelim || *s != '#')) {
7234             /* no override, and not s### either; skipspace is safe here
7235              * check for => on following line */
7236             bool arrow;
7237             STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7238             STRLEN   soff = s         - SvPVX(PL_linestr);
7239             s = peekspace(s);
7240             arrow = *s == '=' && s[1] == '>';
7241             PL_bufptr = SvPVX(PL_linestr) + bufoff;
7242             s         = SvPVX(PL_linestr) +   soff;
7243             if (arrow)
7244                 goto fat_arrow;
7245         }
7246
7247       reserved_word:
7248         switch (tmp) {
7249
7250             /* Trade off - by using this evil construction we can pull the
7251                variable gv into the block labelled keylookup. If not, then
7252                we have to give it function scope so that the goto from the
7253                earlier ':' case doesn't bypass the initialisation.  */
7254             just_a_word_zero_gv:
7255                 sv = NULL;
7256                 cv = NULL;
7257                 gv = NULL;
7258                 gvp = NULL;
7259                 rv2cv_op = NULL;
7260                 orig_keyword = 0;
7261                 lex = 0;
7262                 off = 0;
7263             /* FALLTHROUGH */
7264         default:                        /* not a keyword */
7265           just_a_word: {
7266                 int pkgname = 0;
7267                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7268                 bool safebw;
7269                 bool no_op_error = FALSE;
7270
7271                 if (PL_expect == XOPERATOR) {
7272                     if (PL_bufptr == PL_linestart) {
7273                         CopLINE_dec(PL_curcop);
7274                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7275                         CopLINE_inc(PL_curcop);
7276                     }
7277                     else
7278                         /* We want to call no_op with s pointing after the
7279                            bareword, so defer it.  But we want it to come
7280                            before the Bad name croak.  */
7281                         no_op_error = TRUE;
7282                 }
7283
7284                 /* Get the rest if it looks like a package qualifier */
7285
7286                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7287                     STRLEN morelen;
7288                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7289                                   TRUE, &morelen);
7290                     if (no_op_error) {
7291                         no_op("Bareword",s);
7292                         no_op_error = FALSE;
7293                     }
7294                     if (!morelen)
7295                         Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7296                                 UTF8fARG(UTF, len, PL_tokenbuf),
7297                                 *s == '\'' ? "'" : "::");
7298                     len += morelen;
7299                     pkgname = 1;
7300                 }
7301
7302                 if (no_op_error)
7303                         no_op("Bareword",s);
7304
7305                 /* See if the name is "Foo::",
7306                    in which case Foo is a bareword
7307                    (and a package name). */
7308
7309                 if (len > 2
7310                     && PL_tokenbuf[len - 2] == ':'
7311                     && PL_tokenbuf[len - 1] == ':')
7312                 {
7313                     if (ckWARN(WARN_BAREWORD)
7314                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7315                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7316                                     "Bareword \"%" UTF8f
7317                                     "\" refers to nonexistent package",
7318                                     UTF8fARG(UTF, len, PL_tokenbuf));
7319                     len -= 2;
7320                     PL_tokenbuf[len] = '\0';
7321                     gv = NULL;
7322                     gvp = 0;
7323                     safebw = TRUE;
7324                 }
7325                 else {
7326                     safebw = FALSE;
7327                 }
7328
7329                 /* if we saw a global override before, get the right name */
7330
7331                 if (!sv)
7332                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7333                                                 len);
7334                 if (gvp) {
7335                     SV * const tmp_sv = sv;
7336                     sv = newSVpvs("CORE::GLOBAL::");
7337                     sv_catsv(sv, tmp_sv);
7338                     SvREFCNT_dec(tmp_sv);
7339                 }
7340
7341
7342                 /* Presume this is going to be a bareword of some sort. */
7343                 CLINE;
7344                 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
7345                 pl_yylval.opval->op_private = OPpCONST_BARE;
7346
7347                 /* And if "Foo::", then that's what it certainly is. */
7348                 if (safebw)
7349                     goto safe_bareword;
7350
7351                 if (!off)
7352                 {
7353                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7354                     const_op->op_private = OPpCONST_BARE;
7355                     rv2cv_op =
7356                         newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7357                     cv = lex
7358                         ? isGV(gv)
7359                             ? GvCV(gv)
7360                             : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
7361                                 ? (CV *)SvRV(gv)
7362                                 : ((CV *)gv)
7363                         : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
7364                 }
7365
7366                 /* Use this var to track whether intuit_method has been
7367                    called.  intuit_method returns 0 or > 255.  */
7368                 tmp = 1;
7369
7370                 /* See if it's the indirect object for a list operator. */
7371
7372                 if (PL_oldoldbufptr
7373                     && PL_oldoldbufptr < PL_bufptr
7374                     && (PL_oldoldbufptr == PL_last_lop
7375                         || PL_oldoldbufptr == PL_last_uni)
7376                     && /* NO SKIPSPACE BEFORE HERE! */
7377                        (PL_expect == XREF
7378                         || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7379                                                                == OA_FILEREF))
7380                 {
7381                     bool immediate_paren = *s == '(';
7382                     SSize_t s_off;
7383
7384                     /* (Now we can afford to cross potential line boundary.) */
7385                     s = skipspace(s);
7386
7387                     /* intuit_method() can indirectly call lex_next_chunk(),
7388                      * invalidating s
7389                      */
7390                     s_off = s - SvPVX(PL_linestr);
7391                     /* Two barewords in a row may indicate method call. */
7392                     if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7393                             || *s == '$')
7394                         && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
7395                     {
7396                         /* the code at method: doesn't use s */
7397                         goto method;
7398                     }
7399                     s = SvPVX(PL_linestr) + s_off;
7400
7401                     /* If not a declared subroutine, it's an indirect object. */
7402                     /* (But it's an indir obj regardless for sort.) */
7403                     /* Also, if "_" follows a filetest operator, it's a bareword */
7404
7405                     if (
7406                         ( !immediate_paren && (PL_last_lop_op == OP_SORT
7407                          || (!cv
7408                              && (PL_last_lop_op != OP_MAPSTART
7409                                  && PL_last_lop_op != OP_GREPSTART))))
7410                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7411                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7412                                                             == OA_FILESTATOP))
7413                        )
7414                     {
7415                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7416                         goto bareword;
7417                     }
7418                 }
7419
7420                 PL_expect = XOPERATOR;
7421                 s = skipspace(s);
7422
7423                 /* Is this a word before a => operator? */
7424                 if (*s == '=' && s[1] == '>' && !pkgname) {
7425                     op_free(rv2cv_op);
7426                     CLINE;
7427                     if (gvp || (lex && !off)) {
7428                         assert (cSVOPx(pl_yylval.opval)->op_sv == sv);
7429                         /* This is our own scalar, created a few lines
7430                            above, so this is safe. */
7431                         SvREADONLY_off(sv);
7432                         sv_setpv(sv, PL_tokenbuf);
7433                         if (UTF && !IN_BYTES
7434                          && is_utf8_string((U8*)PL_tokenbuf, len))
7435                               SvUTF8_on(sv);
7436                         SvREADONLY_on(sv);
7437                     }
7438                     TERM(BAREWORD);
7439                 }
7440
7441                 /* If followed by a paren, it's certainly a subroutine. */
7442                 if (*s == '(') {
7443                     CLINE;
7444                     if (cv) {
7445                         d = s + 1;
7446                         while (SPACE_OR_TAB(*d))
7447                             d++;
7448                         if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7449                             s = d + 1;
7450                             goto its_constant;
7451                         }
7452                     }
7453                     NEXTVAL_NEXTTOKE.opval =
7454                         off ? rv2cv_op : pl_yylval.opval;
7455                     if (off)
7456                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
7457                     else op_free(rv2cv_op),        force_next(BAREWORD);
7458                     pl_yylval.ival = 0;
7459                     TOKEN('&');
7460                 }
7461
7462                 /* If followed by var or block, call it a method (unless sub) */
7463
7464                 if ((*s == '$' || *s == '{') && !cv) {
7465                     op_free(rv2cv_op);
7466                     PL_last_lop = PL_oldbufptr;
7467                     PL_last_lop_op = OP_METHOD;
7468                     if (!PL_lex_allbrackets
7469                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7470                     {
7471                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7472                     }
7473                     PL_expect = XBLOCKTERM;
7474                     PL_bufptr = s;
7475                     return REPORT(METHOD);
7476                 }
7477
7478                 /* If followed by a bareword, see if it looks like indir obj. */
7479
7480                 if (   tmp == 1
7481                     && !orig_keyword
7482                     && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7483                     && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
7484                 {
7485                   method:
7486                     if (lex && !off) {
7487                         assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
7488                         SvREADONLY_off(sv);
7489                         sv_setpvn(sv, PL_tokenbuf, len);
7490                         if (UTF && !IN_BYTES
7491                          && is_utf8_string((U8*)PL_tokenbuf, len))
7492                             SvUTF8_on (sv);
7493                         else SvUTF8_off(sv);
7494                     }
7495                     op_free(rv2cv_op);
7496                     if (tmp == METHOD && !PL_lex_allbrackets
7497                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7498                     {
7499                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7500                     }
7501                     return REPORT(tmp);
7502                 }
7503
7504                 /* Not a method, so call it a subroutine (if defined) */
7505
7506                 if (cv) {
7507                     /* Check for a constant sub */
7508                     if ((sv = cv_const_sv_or_av(cv))) {
7509                   its_constant:
7510                         op_free(rv2cv_op);
7511                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7512                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7513                         if (SvTYPE(sv) == SVt_PVAV)
7514                             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7515                                                       pl_yylval.opval);
7516                         else {
7517                             pl_yylval.opval->op_private = 0;
7518                             pl_yylval.opval->op_folded = 1;
7519                             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7520                         }
7521                         TOKEN(BAREWORD);
7522                     }
7523
7524                     op_free(pl_yylval.opval);
7525                     pl_yylval.opval =
7526                         off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7527                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7528                     PL_last_lop = PL_oldbufptr;
7529                     PL_last_lop_op = OP_ENTERSUB;
7530                     /* Is there a prototype? */
7531                     if (
7532                         SvPOK(cv))
7533                     {
7534                         STRLEN protolen = CvPROTOLEN(cv);
7535                         const char *proto = CvPROTO(cv);
7536                         bool optional;
7537                         proto = S_strip_spaces(aTHX_ proto, &protolen);
7538                         if (!protolen)
7539                             TERM(FUNC0SUB);
7540                         if ((optional = *proto == ';'))
7541                           do
7542                             proto++;
7543                           while (*proto == ';');
7544                         if (
7545                             (
7546                                 (
7547                                     *proto == '$' || *proto == '_'
7548                                  || *proto == '*' || *proto == '+'
7549                                 )
7550                              && proto[1] == '\0'
7551                             )
7552                          || (
7553                              *proto == '\\' && proto[1] && proto[2] == '\0'
7554                             )
7555                         )
7556                             UNIPROTO(UNIOPSUB,optional);
7557                         if (*proto == '\\' && proto[1] == '[') {
7558                             const char *p = proto + 2;
7559                             while(*p && *p != ']')
7560                                 ++p;
7561                             if(*p == ']' && !p[1])
7562                                 UNIPROTO(UNIOPSUB,optional);
7563                         }
7564                         if (*proto == '&' && *s == '{') {
7565                             if (PL_curstash)
7566                                 sv_setpvs(PL_subname, "__ANON__");
7567                             else
7568                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7569                             if (!PL_lex_allbrackets
7570                                 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7571                             {
7572                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7573                             }
7574                             PREBLOCK(LSTOPSUB);
7575                         }
7576                     }
7577                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7578                     PL_expect = XTERM;
7579                     force_next(off ? PRIVATEREF : BAREWORD);
7580                     if (!PL_lex_allbrackets
7581                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7582                     {
7583                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7584                     }
7585                     TOKEN(NOAMP);
7586                 }
7587
7588                 /* Call it a bare word */
7589
7590                 if (PL_hints & HINT_STRICT_SUBS)
7591                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
7592                 else {
7593                 bareword:
7594                     /* after "print" and similar functions (corresponding to
7595                      * "F? L" in opcode.pl), whatever wasn't already parsed as
7596                      * a filehandle should be subject to "strict subs".
7597                      * Likewise for the optional indirect-object argument to system
7598                      * or exec, which can't be a bareword */
7599                     if ((PL_last_lop_op == OP_PRINT
7600                             || PL_last_lop_op == OP_PRTF
7601                             || PL_last_lop_op == OP_SAY
7602                             || PL_last_lop_op == OP_SYSTEM
7603                             || PL_last_lop_op == OP_EXEC)
7604                             && (PL_hints & HINT_STRICT_SUBS))
7605                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7606                     if (lastchar != '-') {
7607                         if (ckWARN(WARN_RESERVED)) {
7608                             d = PL_tokenbuf;
7609                             while (isLOWER(*d))
7610                                 d++;
7611                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7612                             {
7613                                 /* PL_warn_reserved is constant */
7614                                 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7615                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7616                                        PL_tokenbuf);
7617                                 GCC_DIAG_RESTORE_STMT;
7618                             }
7619                         }
7620                     }
7621                 }
7622                 op_free(rv2cv_op);
7623
7624             safe_bareword:
7625                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7626                  && saw_infix_sigil) {
7627                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7628                                      "Operator or semicolon missing before %c%" UTF8f,
7629                                      lastchar,
7630                                      UTF8fARG(UTF, strlen(PL_tokenbuf),
7631                                               PL_tokenbuf));
7632                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7633                                      "Ambiguous use of %c resolved as operator %c",
7634                                      lastchar, lastchar);
7635                 }
7636                 TOKEN(BAREWORD);
7637             }
7638
7639         case KEY___FILE__:
7640             FUN0OP(
7641                 newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7642             );
7643
7644         case KEY___LINE__:
7645             FUN0OP(
7646                 newSVOP(OP_CONST, 0,
7647                     Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7648             );
7649
7650         case KEY___PACKAGE__:
7651             FUN0OP(
7652                 newSVOP(OP_CONST, 0,
7653                                         (PL_curstash
7654                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7655                                          : &PL_sv_undef))
7656             );
7657
7658         case KEY___DATA__:
7659         case KEY___END__: {
7660             GV *gv;
7661             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7662                 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7663                                         ? PL_curstash
7664                                         : PL_defstash;
7665                 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7666                 if (!isGV(gv))
7667                     gv_init(gv,stash,"DATA",4,0);
7668                 GvMULTI_on(gv);
7669                 if (!GvIO(gv))
7670                     GvIOp(gv) = newIO();
7671                 IoIFP(GvIOp(gv)) = PL_rsfp;
7672                 /* Mark this internal pseudo-handle as clean */
7673                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7674                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7675                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7676                 else
7677                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7678 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7679                 /* if the script was opened in binmode, we need to revert
7680                  * it to text mode for compatibility; but only iff it has CRs
7681                  * XXX this is a questionable hack at best. */
7682                 if (PL_bufend-PL_bufptr > 2
7683                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7684                 {
7685                     Off_t loc = 0;
7686                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7687                         loc = PerlIO_tell(PL_rsfp);
7688                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7689                     }
7690 #ifdef NETWARE
7691                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7692 #else
7693                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7694 #endif  /* NETWARE */
7695                         if (loc > 0)
7696                             PerlIO_seek(PL_rsfp, loc, 0);
7697                     }
7698                 }
7699 #endif
7700 #ifdef PERLIO_LAYERS
7701                 if (!IN_BYTES) {
7702                     if (UTF)
7703                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7704                 }
7705 #endif
7706                 PL_rsfp = NULL;
7707             }
7708             goto fake_eof;
7709         }
7710
7711         case KEY___SUB__:
7712             FUN0OP(CvCLONE(PL_compcv)
7713                         ? newOP(OP_RUNCV, 0)
7714                         : newPVOP(OP_RUNCV,0,NULL));
7715
7716         case KEY_AUTOLOAD:
7717         case KEY_DESTROY:
7718         case KEY_BEGIN:
7719         case KEY_UNITCHECK:
7720         case KEY_CHECK:
7721         case KEY_INIT:
7722         case KEY_END:
7723             if (PL_expect == XSTATE) {
7724                 s = PL_bufptr;
7725                 goto really_sub;
7726             }
7727             goto just_a_word;
7728
7729         case_KEY_CORE:
7730             {
7731                 STRLEN olen = len;
7732                 d = s;
7733                 s += 2;
7734                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7735                 if ((*s == ':' && s[1] == ':')
7736                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7737                 {
7738                     s = d;
7739                     len = olen;
7740                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7741                     goto just_a_word;
7742                 }
7743                 if (!tmp)
7744                     Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
7745                                       UTF8fARG(UTF, len, PL_tokenbuf));
7746                 if (tmp < 0)
7747                     tmp = -tmp;
7748                 else if (tmp == KEY_require || tmp == KEY_do
7749                       || tmp == KEY_glob)
7750                     /* that's a way to remember we saw "CORE::" */
7751                     orig_keyword = tmp;
7752                 goto reserved_word;
7753             }
7754
7755         case KEY_abs:
7756             UNI(OP_ABS);
7757
7758         case KEY_alarm:
7759             UNI(OP_ALARM);
7760
7761         case KEY_accept:
7762             LOP(OP_ACCEPT,XTERM);
7763
7764         case KEY_and:
7765             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7766                 return REPORT(0);
7767             OPERATOR(ANDOP);
7768
7769         case KEY_atan2:
7770             LOP(OP_ATAN2,XTERM);
7771
7772         case KEY_bind:
7773             LOP(OP_BIND,XTERM);
7774
7775         case KEY_binmode:
7776             LOP(OP_BINMODE,XTERM);
7777
7778         case KEY_bless:
7779             LOP(OP_BLESS,XTERM);
7780
7781         case KEY_break:
7782             FUN0(OP_BREAK);
7783
7784         case KEY_chop:
7785             UNI(OP_CHOP);
7786
7787         case KEY_continue:
7788                     /* We have to disambiguate the two senses of
7789                       "continue". If the next token is a '{' then
7790                       treat it as the start of a continue block;
7791                       otherwise treat it as a control operator.
7792                      */
7793                     s = skipspace(s);
7794                     if (*s == '{')
7795             PREBLOCK(CONTINUE);
7796                     else
7797                         FUN0(OP_CONTINUE);
7798
7799         case KEY_chdir:
7800             /* may use HOME */
7801             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7802             UNI(OP_CHDIR);
7803
7804         case KEY_close:
7805             UNI(OP_CLOSE);
7806
7807         case KEY_closedir:
7808             UNI(OP_CLOSEDIR);
7809
7810         case KEY_cmp:
7811             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7812                 return REPORT(0);
7813             Eop(OP_SCMP);
7814
7815         case KEY_caller:
7816             UNI(OP_CALLER);
7817
7818         case KEY_crypt:
7819 #ifdef FCRYPT
7820             if (!PL_cryptseen) {
7821                 PL_cryptseen = TRUE;
7822                 init_des();
7823             }
7824 #endif
7825             LOP(OP_CRYPT,XTERM);
7826
7827         case KEY_chmod:
7828             LOP(OP_CHMOD,XTERM);
7829
7830         case KEY_chown:
7831             LOP(OP_CHOWN,XTERM);
7832
7833         case KEY_connect:
7834             LOP(OP_CONNECT,XTERM);
7835
7836         case KEY_chr:
7837             UNI(OP_CHR);
7838
7839         case KEY_cos:
7840             UNI(OP_COS);
7841
7842         case KEY_chroot:
7843             UNI(OP_CHROOT);
7844
7845         case KEY_default:
7846             PREBLOCK(DEFAULT);
7847
7848         case KEY_do:
7849             s = skipspace(s);
7850             if (*s == '{')
7851                 PRETERMBLOCK(DO);
7852             if (*s != '\'') {
7853                 *PL_tokenbuf = '&';
7854                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7855                               1, &len);
7856                 if (len && memNEs(PL_tokenbuf+1, len, "CORE")
7857                  && !keyword(PL_tokenbuf + 1, len, 0)) {
7858                     SSize_t off = s-SvPVX(PL_linestr);
7859                     d = skipspace(d);
7860                     s = SvPVX(PL_linestr)+off;
7861                     if (*d == '(') {
7862                         force_ident_maybe_lex('&');
7863                         s = d;
7864                     }
7865                 }
7866             }
7867             if (orig_keyword == KEY_do) {
7868                 orig_keyword = 0;
7869                 pl_yylval.ival = 1;
7870             }
7871             else
7872                 pl_yylval.ival = 0;
7873             OPERATOR(DO);
7874
7875         case KEY_die:
7876             PL_hints |= HINT_BLOCK_SCOPE;
7877             LOP(OP_DIE,XTERM);
7878
7879         case KEY_defined:
7880             UNI(OP_DEFINED);
7881
7882         case KEY_delete:
7883             UNI(OP_DELETE);
7884
7885         case KEY_dbmopen:
7886             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7887                               STR_WITH_LEN("NDBM_File::"),
7888                               STR_WITH_LEN("DB_File::"),
7889                               STR_WITH_LEN("GDBM_File::"),
7890                               STR_WITH_LEN("SDBM_File::"),
7891                               STR_WITH_LEN("ODBM_File::"),
7892                               NULL);
7893             LOP(OP_DBMOPEN,XTERM);
7894
7895         case KEY_dbmclose:
7896             UNI(OP_DBMCLOSE);
7897
7898         case KEY_dump:
7899             LOOPX(OP_DUMP);
7900
7901         case KEY_else:
7902             PREBLOCK(ELSE);
7903
7904         case KEY_elsif:
7905             pl_yylval.ival = CopLINE(PL_curcop);
7906             OPERATOR(ELSIF);
7907
7908         case KEY_eq:
7909             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7910                 return REPORT(0);
7911             Eop(OP_SEQ);
7912
7913         case KEY_exists:
7914             UNI(OP_EXISTS);
7915
7916         case KEY_exit:
7917             UNI(OP_EXIT);
7918
7919         case KEY_eval:
7920             s = skipspace(s);
7921             if (*s == '{') { /* block eval */
7922                 PL_expect = XTERMBLOCK;
7923                 UNIBRACK(OP_ENTERTRY);
7924             }
7925             else { /* string eval */
7926                 PL_expect = XTERM;
7927                 UNIBRACK(OP_ENTEREVAL);
7928             }
7929
7930         case KEY_evalbytes:
7931             PL_expect = XTERM;
7932             UNIBRACK(-OP_ENTEREVAL);
7933
7934         case KEY_eof:
7935             UNI(OP_EOF);
7936
7937         case KEY_exp:
7938             UNI(OP_EXP);
7939
7940         case KEY_each:
7941             UNI(OP_EACH);
7942
7943         case KEY_exec:
7944             LOP(OP_EXEC,XREF);
7945
7946         case KEY_endhostent:
7947             FUN0(OP_EHOSTENT);
7948
7949         case KEY_endnetent:
7950             FUN0(OP_ENETENT);
7951
7952         case KEY_endservent:
7953             FUN0(OP_ESERVENT);
7954
7955         case KEY_endprotoent:
7956             FUN0(OP_EPROTOENT);
7957
7958         case KEY_endpwent:
7959             FUN0(OP_EPWENT);
7960
7961         case KEY_endgrent:
7962             FUN0(OP_EGRENT);
7963
7964         case KEY_for:
7965         case KEY_foreach:
7966             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7967                 return REPORT(0);
7968             pl_yylval.ival = CopLINE(PL_curcop);
7969             s = skipspace(s);
7970             if (   PL_expect == XSTATE
7971                 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
7972             {
7973                 char *p = s;
7974                 SSize_t s_off = s - SvPVX(PL_linestr);
7975
7976                 if (   memBEGINPs(p, (STRLEN) (PL_bufend - p), "my")
7977                     && isSPACE(*(p + 2)))
7978                 {
7979                     p += 2;
7980                 }
7981                 else if (   memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")
7982                          && isSPACE(*(p + 3)))
7983                 {
7984                     p += 3;
7985                 }
7986
7987                 p = skipspace(p);
7988                 /* skip optional package name, as in "for my abc $x (..)" */
7989                 if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
7990                     p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7991                     p = skipspace(p);
7992                 }
7993                 if (*p != '$' && *p != '\\')
7994                     Perl_croak(aTHX_ "Missing $ on loop variable");
7995
7996                 /* The buffer may have been reallocated, update s */
7997                 s = SvPVX(PL_linestr) + s_off;
7998             }
7999             OPERATOR(FOR);
8000
8001         case KEY_formline:
8002             LOP(OP_FORMLINE,XTERM);
8003
8004         case KEY_fork:
8005             FUN0(OP_FORK);
8006
8007         case KEY_fc:
8008             UNI(OP_FC);
8009
8010         case KEY_fcntl:
8011             LOP(OP_FCNTL,XTERM);
8012
8013         case KEY_fileno:
8014             UNI(OP_FILENO);
8015
8016         case KEY_flock:
8017             LOP(OP_FLOCK,XTERM);
8018
8019         case KEY_gt:
8020             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8021                 return REPORT(0);
8022             Rop(OP_SGT);
8023
8024         case KEY_ge:
8025             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8026                 return REPORT(0);
8027             Rop(OP_SGE);
8028
8029         case KEY_grep:
8030             LOP(OP_GREPSTART, XREF);
8031
8032         case KEY_goto:
8033             LOOPX(OP_GOTO);
8034
8035         case KEY_gmtime:
8036             UNI(OP_GMTIME);
8037
8038         case KEY_getc:
8039             UNIDOR(OP_GETC);
8040
8041         case KEY_getppid:
8042             FUN0(OP_GETPPID);
8043
8044         case KEY_getpgrp:
8045             UNI(OP_GETPGRP);
8046
8047         case KEY_getpriority:
8048             LOP(OP_GETPRIORITY,XTERM);
8049
8050         case KEY_getprotobyname:
8051             UNI(OP_GPBYNAME);
8052
8053         case KEY_getprotobynumber:
8054             LOP(OP_GPBYNUMBER,XTERM);
8055
8056         case KEY_getprotoent:
8057             FUN0(OP_GPROTOENT);
8058
8059         case KEY_getpwent:
8060             FUN0(OP_GPWENT);
8061
8062         case KEY_getpwnam:
8063             UNI(OP_GPWNAM);
8064
8065         case KEY_getpwuid:
8066             UNI(OP_GPWUID);
8067
8068         case KEY_getpeername:
8069             UNI(OP_GETPEERNAME);
8070
8071         case KEY_gethostbyname:
8072             UNI(OP_GHBYNAME);
8073
8074         case KEY_gethostbyaddr:
8075             LOP(OP_GHBYADDR,XTERM);
8076
8077         case KEY_gethostent:
8078             FUN0(OP_GHOSTENT);
8079
8080         case KEY_getnetbyname:
8081             UNI(OP_GNBYNAME);
8082
8083         case KEY_getnetbyaddr:
8084             LOP(OP_GNBYADDR,XTERM);
8085
8086         case KEY_getnetent:
8087             FUN0(OP_GNETENT);
8088
8089         case KEY_getservbyname:
8090             LOP(OP_GSBYNAME,XTERM);
8091
8092         case KEY_getservbyport:
8093             LOP(OP_GSBYPORT,XTERM);
8094
8095         case KEY_getservent:
8096             FUN0(OP_GSERVENT);
8097
8098         case KEY_getsockname:
8099             UNI(OP_GETSOCKNAME);
8100
8101         case KEY_getsockopt:
8102             LOP(OP_GSOCKOPT,XTERM);
8103
8104         case KEY_getgrent:
8105             FUN0(OP_GGRENT);
8106
8107         case KEY_getgrnam:
8108             UNI(OP_GGRNAM);
8109
8110         case KEY_getgrgid:
8111             UNI(OP_GGRGID);
8112
8113         case KEY_getlogin:
8114             FUN0(OP_GETLOGIN);
8115
8116         case KEY_given:
8117             pl_yylval.ival = CopLINE(PL_curcop);
8118             Perl_ck_warner_d(aTHX_
8119                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8120                 "given is experimental");
8121             OPERATOR(GIVEN);
8122
8123         case KEY_glob:
8124             LOP(
8125              orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
8126              XTERM
8127             );
8128
8129         case KEY_hex:
8130             UNI(OP_HEX);
8131
8132         case KEY_if:
8133             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8134                 return REPORT(0);
8135             pl_yylval.ival = CopLINE(PL_curcop);
8136             OPERATOR(IF);
8137
8138         case KEY_index:
8139             LOP(OP_INDEX,XTERM);
8140
8141         case KEY_int:
8142             UNI(OP_INT);
8143
8144         case KEY_ioctl:
8145             LOP(OP_IOCTL,XTERM);
8146
8147         case KEY_join:
8148             LOP(OP_JOIN,XTERM);
8149
8150         case KEY_keys:
8151             UNI(OP_KEYS);
8152
8153         case KEY_kill:
8154             LOP(OP_KILL,XTERM);
8155
8156         case KEY_last:
8157             LOOPX(OP_LAST);
8158
8159         case KEY_lc:
8160             UNI(OP_LC);
8161
8162         case KEY_lcfirst:
8163             UNI(OP_LCFIRST);
8164
8165         case KEY_local:
8166             OPERATOR(LOCAL);
8167
8168         case KEY_length:
8169             UNI(OP_LENGTH);
8170
8171         case KEY_lt:
8172             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8173                 return REPORT(0);
8174             Rop(OP_SLT);
8175
8176         case KEY_le:
8177             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8178                 return REPORT(0);
8179             Rop(OP_SLE);
8180
8181         case KEY_localtime:
8182             UNI(OP_LOCALTIME);
8183
8184         case KEY_log:
8185             UNI(OP_LOG);
8186
8187         case KEY_link:
8188             LOP(OP_LINK,XTERM);
8189
8190         case KEY_listen:
8191             LOP(OP_LISTEN,XTERM);
8192
8193         case KEY_lock:
8194             UNI(OP_LOCK);
8195
8196         case KEY_lstat:
8197             UNI(OP_LSTAT);
8198
8199         case KEY_m:
8200             s = scan_pat(s,OP_MATCH);
8201             TERM(sublex_start());
8202
8203         case KEY_map:
8204             LOP(OP_MAPSTART, XREF);
8205
8206         case KEY_mkdir:
8207             LOP(OP_MKDIR,XTERM);
8208
8209         case KEY_msgctl:
8210             LOP(OP_MSGCTL,XTERM);
8211
8212         case KEY_msgget:
8213             LOP(OP_MSGGET,XTERM);
8214
8215         case KEY_msgrcv:
8216             LOP(OP_MSGRCV,XTERM);
8217
8218         case KEY_msgsnd:
8219             LOP(OP_MSGSND,XTERM);
8220
8221         case KEY_our:
8222         case KEY_my:
8223         case KEY_state:
8224             if (PL_in_my) {
8225                 PL_bufptr = s;
8226                 yyerror(Perl_form(aTHX_
8227                                   "Can't redeclare \"%s\" in \"%s\"",
8228                                    tmp      == KEY_my    ? "my" :
8229                                    tmp      == KEY_state ? "state" : "our",
8230                                    PL_in_my == KEY_my    ? "my" :
8231                                    PL_in_my == KEY_state ? "state" : "our"));
8232             }
8233             PL_in_my = (U16)tmp;
8234             s = skipspace(s);
8235             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8236                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8237                 if (memEQs(PL_tokenbuf, len, "sub"))
8238                     goto really_sub;
8239                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8240                 if (!PL_in_my_stash) {
8241                     char tmpbuf[1024];
8242                     int len;
8243                     PL_bufptr = s;
8244                     len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8245                     PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
8246                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8247                 }
8248             }
8249             else if (*s == '\\') {
8250                 if (!FEATURE_MYREF_IS_ENABLED)
8251                     Perl_croak(aTHX_ "The experimental declared_refs "
8252                                      "feature is not enabled");
8253                 Perl_ck_warner_d(aTHX_
8254                      packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
8255                     "Declaring references is experimental");
8256             }
8257             OPERATOR(MY);
8258
8259         case KEY_next:
8260             LOOPX(OP_NEXT);
8261
8262         case KEY_ne:
8263             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8264                 return REPORT(0);
8265             Eop(OP_SNE);
8266
8267         case KEY_no:
8268             s = tokenize_use(0, s);
8269             TOKEN(USE);
8270
8271         case KEY_not:
8272             if (*s == '(' || (s = skipspace(s), *s == '('))
8273                 FUN1(OP_NOT);
8274             else {
8275                 if (!PL_lex_allbrackets
8276                     && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8277                 {
8278                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8279                 }
8280                 OPERATOR(NOTOP);
8281             }
8282
8283         case KEY_open:
8284             s = skipspace(s);
8285             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8286                 const char *t;
8287                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8288                               &len);
8289                 for (t=d; isSPACE(*t);)
8290                     t++;
8291                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8292                     /* [perl #16184] */
8293                     && !(t[0] == '=' && t[1] == '>')
8294                     && !(t[0] == ':' && t[1] == ':')
8295                     && !keyword(s, d-s, 0)
8296                 ) {
8297                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8298                        "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8299                         UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8300                 }
8301             }
8302             LOP(OP_OPEN,XTERM);
8303
8304         case KEY_or:
8305             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8306                 return REPORT(0);
8307             pl_yylval.ival = OP_OR;
8308             OPERATOR(OROP);
8309
8310         case KEY_ord:
8311             UNI(OP_ORD);
8312
8313         case KEY_oct:
8314             UNI(OP_OCT);
8315
8316         case KEY_opendir:
8317             LOP(OP_OPEN_DIR,XTERM);
8318
8319         case KEY_print:
8320             checkcomma(s,PL_tokenbuf,"filehandle");
8321             LOP(OP_PRINT,XREF);
8322
8323         case KEY_printf:
8324             checkcomma(s,PL_tokenbuf,"filehandle");
8325             LOP(OP_PRTF,XREF);
8326
8327         case KEY_prototype:
8328             UNI(OP_PROTOTYPE);
8329
8330         case KEY_push:
8331             LOP(OP_PUSH,XTERM);
8332
8333         case KEY_pop:
8334             UNIDOR(OP_POP);
8335
8336         case KEY_pos:
8337             UNIDOR(OP_POS);
8338
8339         case KEY_pack:
8340             LOP(OP_PACK,XTERM);
8341
8342         case KEY_package:
8343             s = force_word(s,BAREWORD,FALSE,TRUE);
8344             s = skipspace(s);
8345             s = force_strict_version(s);
8346             PREBLOCK(PACKAGE);
8347
8348         case KEY_pipe:
8349             LOP(OP_PIPE_OP,XTERM);
8350
8351         case KEY_q:
8352             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8353             if (!s)
8354                 missingterm(NULL, 0);
8355             COPLINE_SET_FROM_MULTI_END;
8356             pl_yylval.ival = OP_CONST;
8357             TERM(sublex_start());
8358
8359         case KEY_quotemeta:
8360             UNI(OP_QUOTEMETA);
8361
8362         case KEY_qw: {
8363             OP *words = NULL;
8364             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8365             if (!s)
8366                 missingterm(NULL, 0);
8367             COPLINE_SET_FROM_MULTI_END;
8368             PL_expect = XOPERATOR;
8369             if (SvCUR(PL_lex_stuff)) {
8370                 int warned_comma = !ckWARN(WARN_QW);
8371                 int warned_comment = warned_comma;
8372                 d = SvPV_force(PL_lex_stuff, len);
8373                 while (len) {
8374                     for (; isSPACE(*d) && len; --len, ++d)
8375                         /**/;
8376                     if (len) {
8377                         SV *sv;
8378                         const char *b = d;
8379                         if (!warned_comma || !warned_comment) {
8380                             for (; !isSPACE(*d) && len; --len, ++d) {
8381                                 if (!warned_comma && *d == ',') {
8382                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8383                                         "Possible attempt to separate words with commas");
8384                                     ++warned_comma;
8385                                 }
8386                                 else if (!warned_comment && *d == '#') {
8387                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8388                                         "Possible attempt to put comments in qw() list");
8389                                     ++warned_comment;
8390                                 }
8391                             }
8392                         }
8393                         else {
8394                             for (; !isSPACE(*d) && len; --len, ++d)
8395                                 /**/;
8396                         }
8397                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8398                         words = op_append_elem(OP_LIST, words,
8399                                             newSVOP(OP_CONST, 0, tokeq(sv)));
8400                     }
8401                 }
8402             }
8403             if (!words)
8404                 words = newNULLLIST();
8405             SvREFCNT_dec_NN(PL_lex_stuff);
8406             PL_lex_stuff = NULL;
8407             PL_expect = XOPERATOR;
8408             pl_yylval.opval = sawparens(words);
8409             TOKEN(QWLIST);
8410         }
8411
8412         case KEY_qq:
8413             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8414             if (!s)
8415                 missingterm(NULL, 0);
8416             pl_yylval.ival = OP_STRINGIFY;
8417             if (SvIVX(PL_lex_stuff) == '\'')
8418                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
8419             TERM(sublex_start());
8420
8421         case KEY_qr:
8422             s = scan_pat(s,OP_QR);
8423             TERM(sublex_start());
8424
8425         case KEY_qx:
8426             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8427             if (!s)
8428                 missingterm(NULL, 0);
8429             pl_yylval.ival = OP_BACKTICK;
8430             TERM(sublex_start());
8431
8432         case KEY_return:
8433             OLDLOP(OP_RETURN);
8434
8435         case KEY_require:
8436             s = skipspace(s);
8437             if (isDIGIT(*s)) {
8438                 s = force_version(s, FALSE);
8439             }
8440             else if (*s != 'v' || !isDIGIT(s[1])
8441                     || (s = force_version(s, TRUE), *s == 'v'))
8442             {
8443                 *PL_tokenbuf = '\0';
8444                 s = force_word(s,BAREWORD,TRUE,TRUE);
8445                 if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
8446                                            PL_tokenbuf + sizeof(PL_tokenbuf),
8447                                            UTF))
8448                 {
8449                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8450                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
8451                 }
8452                 else if (*s == '<')
8453                     yyerror("<> at require-statement should be quotes");
8454             }
8455             if (orig_keyword == KEY_require) {
8456                 orig_keyword = 0;
8457                 pl_yylval.ival = 1;
8458             }
8459             else
8460                 pl_yylval.ival = 0;
8461             PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
8462             PL_bufptr = s;
8463             PL_last_uni = PL_oldbufptr;
8464             PL_last_lop_op = OP_REQUIRE;
8465             s = skipspace(s);
8466             return REPORT( (int)REQUIRE );
8467
8468         case KEY_reset:
8469             UNI(OP_RESET);
8470
8471         case KEY_redo:
8472             LOOPX(OP_REDO);
8473
8474         case KEY_rename:
8475             LOP(OP_RENAME,XTERM);
8476
8477         case KEY_rand:
8478             UNI(OP_RAND);
8479
8480         case KEY_rmdir:
8481             UNI(OP_RMDIR);
8482
8483         case KEY_rindex:
8484             LOP(OP_RINDEX,XTERM);
8485
8486         case KEY_read:
8487             LOP(OP_READ,XTERM);
8488
8489         case KEY_readdir:
8490             UNI(OP_READDIR);
8491
8492         case KEY_readline:
8493             UNIDOR(OP_READLINE);
8494
8495         case KEY_readpipe:
8496             UNIDOR(OP_BACKTICK);
8497
8498         case KEY_rewinddir:
8499             UNI(OP_REWINDDIR);
8500
8501         case KEY_recv:
8502             LOP(OP_RECV,XTERM);
8503
8504         case KEY_reverse:
8505             LOP(OP_REVERSE,XTERM);
8506
8507         case KEY_readlink:
8508             UNIDOR(OP_READLINK);
8509
8510         case KEY_ref:
8511             UNI(OP_REF);
8512
8513         case KEY_s:
8514             s = scan_subst(s);
8515             if (pl_yylval.opval)
8516                 TERM(sublex_start());
8517             else
8518                 TOKEN(1);       /* force error */
8519
8520         case KEY_say:
8521             checkcomma(s,PL_tokenbuf,"filehandle");
8522             LOP(OP_SAY,XREF);
8523
8524         case KEY_chomp:
8525             UNI(OP_CHOMP);
8526
8527         case KEY_scalar:
8528             UNI(OP_SCALAR);
8529
8530         case KEY_select:
8531             LOP(OP_SELECT,XTERM);
8532
8533         case KEY_seek:
8534             LOP(OP_SEEK,XTERM);
8535
8536         case KEY_semctl:
8537             LOP(OP_SEMCTL,XTERM);
8538
8539         case KEY_semget:
8540             LOP(OP_SEMGET,XTERM);
8541
8542         case KEY_semop:
8543             LOP(OP_SEMOP,XTERM);
8544
8545         case KEY_send:
8546             LOP(OP_SEND,XTERM);
8547
8548         case KEY_setpgrp:
8549             LOP(OP_SETPGRP,XTERM);
8550
8551         case KEY_setpriority:
8552             LOP(OP_SETPRIORITY,XTERM);
8553
8554         case KEY_sethostent:
8555             UNI(OP_SHOSTENT);
8556
8557         case KEY_setnetent:
8558             UNI(OP_SNETENT);
8559
8560         case KEY_setservent:
8561             UNI(OP_SSERVENT);
8562
8563         case KEY_setprotoent:
8564             UNI(OP_SPROTOENT);
8565
8566         case KEY_setpwent:
8567             FUN0(OP_SPWENT);
8568
8569         case KEY_setgrent:
8570             FUN0(OP_SGRENT);
8571
8572         case KEY_seekdir:
8573             LOP(OP_SEEKDIR,XTERM);
8574
8575         case KEY_setsockopt:
8576             LOP(OP_SSOCKOPT,XTERM);
8577
8578         case KEY_shift:
8579             UNIDOR(OP_SHIFT);
8580
8581         case KEY_shmctl:
8582             LOP(OP_SHMCTL,XTERM);
8583
8584         case KEY_shmget:
8585             LOP(OP_SHMGET,XTERM);
8586
8587         case KEY_shmread:
8588             LOP(OP_SHMREAD,XTERM);
8589
8590         case KEY_shmwrite:
8591             LOP(OP_SHMWRITE,XTERM);
8592
8593         case KEY_shutdown:
8594             LOP(OP_SHUTDOWN,XTERM);
8595
8596         case KEY_sin:
8597             UNI(OP_SIN);
8598
8599         case KEY_sleep:
8600             UNI(OP_SLEEP);
8601
8602         case KEY_socket:
8603             LOP(OP_SOCKET,XTERM);
8604
8605         case KEY_socketpair:
8606             LOP(OP_SOCKPAIR,XTERM);
8607
8608         case KEY_sort:
8609             checkcomma(s,PL_tokenbuf,"subroutine name");
8610             s = skipspace(s);
8611             PL_expect = XTERM;
8612             s = force_word(s,BAREWORD,TRUE,TRUE);
8613             LOP(OP_SORT,XREF);
8614
8615         case KEY_split:
8616             LOP(OP_SPLIT,XTERM);
8617
8618         case KEY_sprintf:
8619             LOP(OP_SPRINTF,XTERM);
8620
8621         case KEY_splice:
8622             LOP(OP_SPLICE,XTERM);
8623
8624         case KEY_sqrt:
8625             UNI(OP_SQRT);
8626
8627         case KEY_srand:
8628             UNI(OP_SRAND);
8629
8630         case KEY_stat:
8631             UNI(OP_STAT);
8632
8633         case KEY_study:
8634             UNI(OP_STUDY);
8635
8636         case KEY_substr:
8637             LOP(OP_SUBSTR,XTERM);
8638
8639         case KEY_format:
8640         case KEY_sub:
8641           really_sub:
8642             {
8643                 char * const tmpbuf = PL_tokenbuf + 1;
8644                 expectation attrful;
8645                 bool have_name, have_proto;
8646                 const int key = tmp;
8647                 SV *format_name = NULL;
8648
8649                 SSize_t off = s-SvPVX(PL_linestr);
8650                 s = skipspace(s);
8651                 d = SvPVX(PL_linestr)+off;
8652
8653                 if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
8654                     || *s == '\''
8655                     || (*s == ':' && s[1] == ':'))
8656                 {
8657
8658                     PL_expect = XBLOCK;
8659                     attrful = XATTRBLOCK;
8660                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8661                                   &len);
8662                     if (key == KEY_format)
8663                         format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8664                     *PL_tokenbuf = '&';
8665                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
8666                      || pad_findmy_pvn(
8667                             PL_tokenbuf, len + 1, 0
8668                         ) != NOT_IN_PAD)
8669                         sv_setpvn(PL_subname, tmpbuf, len);
8670                     else {
8671                         sv_setsv(PL_subname,PL_curstname);
8672                         sv_catpvs(PL_subname,"::");
8673                         sv_catpvn(PL_subname,tmpbuf,len);
8674                     }
8675                     if (SvUTF8(PL_linestr))
8676                         SvUTF8_on(PL_subname);
8677                     have_name = TRUE;
8678
8679
8680                     s = skipspace(d);
8681                 }
8682                 else {
8683                     if (key == KEY_my || key == KEY_our || key==KEY_state)
8684                     {
8685                         *d = '\0';
8686                         /* diag_listed_as: Missing name in "%s sub" */
8687                         Perl_croak(aTHX_
8688                                   "Missing name in \"%s\"", PL_bufptr);
8689                     }
8690                     PL_expect = XTERMBLOCK;
8691                     attrful = XATTRTERM;
8692                     sv_setpvs(PL_subname,"?");
8693                     have_name = FALSE;
8694                 }
8695
8696                 if (key == KEY_format) {
8697                     if (format_name) {
8698                         NEXTVAL_NEXTTOKE.opval
8699                             = newSVOP(OP_CONST,0, format_name);
8700                         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8701                         force_next(BAREWORD);
8702                     }
8703                     PREBLOCK(FORMAT);
8704                 }
8705
8706                 /* Look for a prototype */
8707                 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
8708                     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8709                     COPLINE_SET_FROM_MULTI_END;
8710                     if (!s)
8711                         Perl_croak(aTHX_ "Prototype not terminated");
8712                     (void)validate_proto(PL_subname, PL_lex_stuff,
8713                                          ckWARN(WARN_ILLEGALPROTO), 0);
8714                     have_proto = TRUE;
8715
8716                     s = skipspace(s);
8717                 }
8718                 else
8719                     have_proto = FALSE;
8720
8721                 if (*s == ':' && s[1] != ':')
8722                     PL_expect = attrful;
8723                 else if ((*s != '{' && *s != '(') && key != KEY_format) {
8724                     assert(key == KEY_sub || key == KEY_AUTOLOAD ||
8725                            key == KEY_DESTROY || key == KEY_BEGIN ||
8726                            key == KEY_UNITCHECK || key == KEY_CHECK ||
8727                            key == KEY_INIT || key == KEY_END ||
8728                            key == KEY_my || key == KEY_state ||
8729                            key == KEY_our);
8730                     if (!have_name)
8731                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8732                     else if (*s != ';' && *s != '}')
8733                         Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
8734                 }
8735
8736                 if (have_proto) {
8737                     NEXTVAL_NEXTTOKE.opval =
8738                         newSVOP(OP_CONST, 0, PL_lex_stuff);
8739                     PL_lex_stuff = NULL;
8740                     force_next(THING);
8741                 }
8742                 if (!have_name) {
8743                     if (PL_curstash)
8744                         sv_setpvs(PL_subname, "__ANON__");
8745                     else
8746                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8747                     TOKEN(ANONSUB);
8748                 }
8749                 force_ident_maybe_lex('&');
8750                 TOKEN(SUB);
8751             }
8752
8753         case KEY_system:
8754             LOP(OP_SYSTEM,XREF);
8755
8756         case KEY_symlink:
8757             LOP(OP_SYMLINK,XTERM);
8758
8759         case KEY_syscall:
8760             LOP(OP_SYSCALL,XTERM);
8761
8762         case KEY_sysopen:
8763             LOP(OP_SYSOPEN,XTERM);
8764
8765         case KEY_sysseek:
8766             LOP(OP_SYSSEEK,XTERM);
8767
8768         case KEY_sysread:
8769             LOP(OP_SYSREAD,XTERM);
8770
8771         case KEY_syswrite:
8772             LOP(OP_SYSWRITE,XTERM);
8773
8774         case KEY_tr:
8775         case KEY_y:
8776             s = scan_trans(s);
8777             TERM(sublex_start());
8778
8779         case KEY_tell:
8780             UNI(OP_TELL);
8781
8782         case KEY_telldir:
8783             UNI(OP_TELLDIR);
8784
8785         case KEY_tie:
8786             LOP(OP_TIE,XTERM);
8787
8788         case KEY_tied:
8789             UNI(OP_TIED);
8790
8791         case KEY_time:
8792             FUN0(OP_TIME);
8793
8794         case KEY_times:
8795             FUN0(OP_TMS);
8796
8797         case KEY_truncate:
8798             LOP(OP_TRUNCATE,XTERM);
8799
8800         case KEY_uc:
8801             UNI(OP_UC);
8802
8803         case KEY_ucfirst:
8804             UNI(OP_UCFIRST);
8805
8806         case KEY_untie:
8807             UNI(OP_UNTIE);
8808
8809         case KEY_until:
8810             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8811                 return REPORT(0);
8812             pl_yylval.ival = CopLINE(PL_curcop);
8813             OPERATOR(UNTIL);
8814
8815         case KEY_unless:
8816             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8817                 return REPORT(0);
8818             pl_yylval.ival = CopLINE(PL_curcop);
8819             OPERATOR(UNLESS);
8820
8821         case KEY_unlink:
8822             LOP(OP_UNLINK,XTERM);
8823
8824         case KEY_undef:
8825             UNIDOR(OP_UNDEF);
8826
8827         case KEY_unpack:
8828             LOP(OP_UNPACK,XTERM);
8829
8830         case KEY_utime:
8831             LOP(OP_UTIME,XTERM);
8832
8833         case KEY_umask:
8834             UNIDOR(OP_UMASK);
8835
8836         case KEY_unshift:
8837             LOP(OP_UNSHIFT,XTERM);
8838
8839         case KEY_use:
8840             s = tokenize_use(1, s);
8841             TOKEN(USE);
8842
8843         case KEY_values:
8844             UNI(OP_VALUES);
8845
8846         case KEY_vec:
8847             LOP(OP_VEC,XTERM);
8848
8849         case KEY_when:
8850             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8851                 return REPORT(0);
8852             pl_yylval.ival = CopLINE(PL_curcop);
8853             Perl_ck_warner_d(aTHX_
8854                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8855                 "when is experimental");
8856             OPERATOR(WHEN);
8857
8858         case KEY_while:
8859             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8860                 return REPORT(0);
8861             pl_yylval.ival = CopLINE(PL_curcop);
8862             OPERATOR(WHILE);
8863
8864         case KEY_warn:
8865             PL_hints |= HINT_BLOCK_SCOPE;
8866             LOP(OP_WARN,XTERM);
8867
8868         case KEY_wait:
8869             FUN0(OP_WAIT);
8870
8871         case KEY_waitpid:
8872             LOP(OP_WAITPID,XTERM);
8873
8874         case KEY_wantarray:
8875             FUN0(OP_WANTARRAY);
8876
8877         case KEY_write:
8878             /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8879              * we use the same number on EBCDIC */
8880             gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8881             UNI(OP_ENTERWRITE);
8882
8883         case KEY_x:
8884             if (PL_expect == XOPERATOR) {
8885                 if (*s == '=' && !PL_lex_allbrackets
8886                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8887                 {
8888                     return REPORT(0);
8889                 }
8890                 Mop(OP_REPEAT);
8891             }
8892             check_uni();
8893             goto just_a_word;
8894
8895         case KEY_xor:
8896             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8897                 return REPORT(0);
8898             pl_yylval.ival = OP_XOR;
8899             OPERATOR(OROP);
8900         }
8901     }}
8902 }
8903
8904 /*
8905   S_pending_ident
8906
8907   Looks up an identifier in the pad or in a package
8908
8909   is_sig indicates that this is a subroutine signature variable
8910   rather than a plain pad var.
8911
8912   Returns:
8913     PRIVATEREF if this is a lexical name.
8914     BAREWORD   if this belongs to a package.
8915
8916   Structure:
8917       if we're in a my declaration
8918           croak if they tried to say my($foo::bar)
8919           build the ops for a my() declaration
8920       if it's an access to a my() variable
8921           build ops for access to a my() variable
8922       if in a dq string, and they've said @foo and we can't find @foo
8923           warn
8924       build ops for a bareword
8925 */
8926
8927 static int
8928 S_pending_ident(pTHX)
8929 {
8930     PADOFFSET tmp = 0;
8931     const char pit = (char)pl_yylval.ival;
8932     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8933     /* All routes through this function want to know if there is a colon.  */
8934     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8935
8936     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8937           "### Pending identifier '%s'\n", PL_tokenbuf); });
8938     assert(tokenbuf_len >= 2);
8939
8940     /* if we're in a my(), we can't allow dynamics here.
8941        $foo'bar has already been turned into $foo::bar, so
8942        just check for colons.
8943
8944        if it's a legal name, the OP is a PADANY.
8945     */
8946     if (PL_in_my) {
8947         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8948             if (has_colon)
8949                 /* diag_listed_as: No package name allowed for variable %s
8950                                    in "our" */
8951                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8952                                   "%se %s in \"our\"",
8953                                   *PL_tokenbuf=='&' ?"subroutin":"variabl",
8954                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8955             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8956         }
8957         else {
8958             OP *o;
8959             if (has_colon) {
8960                 /* "my" variable %s can't be in a package */
8961                 /* PL_no_myglob is constant */
8962                 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
8963                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8964                             PL_in_my == KEY_my ? "my" : "state",
8965                             *PL_tokenbuf == '&' ? "subroutin" : "variabl",
8966                             PL_tokenbuf),
8967                             UTF ? SVf_UTF8 : 0);
8968                 GCC_DIAG_RESTORE_STMT;
8969             }
8970
8971             if (PL_in_my == KEY_sigvar) {
8972                 /* A signature 'padop' needs in addition, an op_first to
8973                  * point to a child sigdefelem, and an extra field to hold
8974                  * the signature index. We can achieve both by using an
8975                  * UNOP_AUX and (ab)using the op_aux field to hold the
8976                  * index. If we ever need more fields, use a real malloced
8977                  * aux strut instead.
8978                  */
8979                 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
8980                                     INT2PTR(UNOP_AUX_item *,
8981                                         (PL_parser->sig_elems)));
8982                 o->op_private |= (  PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
8983                                   : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
8984                                   :                         OPpARGELEM_HV);
8985             }
8986             else
8987                 o = newOP(OP_PADANY, 0);
8988             o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8989                                                         UTF ? SVf_UTF8 : 0);
8990             if (PL_in_my == KEY_sigvar)
8991                 PL_in_my = 0;
8992
8993             pl_yylval.opval = o;
8994             return PRIVATEREF;
8995         }
8996     }
8997
8998     /*
8999        build the ops for accesses to a my() variable.
9000     */
9001
9002     if (!has_colon) {
9003         if (!PL_in_my)
9004             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9005                                  0);
9006         if (tmp != NOT_IN_PAD) {
9007             /* might be an "our" variable" */
9008             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9009                 /* build ops for a bareword */
9010                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
9011                 HEK * const stashname = HvNAME_HEK(stash);
9012                 SV *  const sym = newSVhek(stashname);
9013                 sv_catpvs(sym, "::");
9014                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9015                 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9016                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9017                 if (pit != '&')
9018                   gv_fetchsv(sym,
9019                     GV_ADDMULTI,
9020                     ((PL_tokenbuf[0] == '$') ? SVt_PV
9021                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9022                      : SVt_PVHV));
9023                 return BAREWORD;
9024             }
9025
9026             pl_yylval.opval = newOP(OP_PADANY, 0);
9027             pl_yylval.opval->op_targ = tmp;
9028             return PRIVATEREF;
9029         }
9030     }
9031
9032     /*
9033        Whine if they've said @foo or @foo{key} in a doublequoted string,
9034        and @foo (or %foo) isn't a variable we can find in the symbol
9035        table.
9036     */
9037     if (ckWARN(WARN_AMBIGUOUS)
9038         && pit == '@'
9039         && PL_lex_state != LEX_NORMAL
9040         && !PL_lex_brackets)
9041     {
9042         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
9043                                          ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9044                                          SVt_PVAV);
9045         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9046            )
9047         {
9048             /* Downgraded from fatal to warning 20000522 mjd */
9049             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9050                         "Possible unintended interpolation of %" UTF8f
9051                         " in string",
9052                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9053         }
9054     }
9055
9056     /* build ops for a bareword */
9057     pl_yylval.opval = newSVOP(OP_CONST, 0,
9058                                    newSVpvn_flags(PL_tokenbuf + 1,
9059                                                       tokenbuf_len - 1,
9060                                                       UTF ? SVf_UTF8 : 0 ));
9061     pl_yylval.opval->op_private = OPpCONST_ENTERED;
9062     if (pit != '&')
9063         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
9064                      (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9065                      | ( UTF ? SVf_UTF8 : 0 ),
9066                      ((PL_tokenbuf[0] == '$') ? SVt_PV
9067                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9068                       : SVt_PVHV));
9069     return BAREWORD;
9070 }
9071
9072 STATIC void
9073 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9074 {
9075     PERL_ARGS_ASSERT_CHECKCOMMA;
9076
9077     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
9078         if (ckWARN(WARN_SYNTAX)) {
9079             int level = 1;
9080             const char *w;
9081             for (w = s+2; *w && level; w++) {
9082                 if (*w == '(')
9083                     ++level;
9084                 else if (*w == ')')
9085                     --level;
9086             }
9087             while (isSPACE(*w))
9088                 ++w;
9089             /* the list of chars below is for end of statements or
9090              * block / parens, boolean operators (&&, ||, //) and branch
9091              * constructs (or, and, if, until, unless, while, err, for).
9092              * Not a very solid hack... */
9093             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9094                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9095                             "%s (...) interpreted as function",name);
9096         }
9097     }
9098     while (s < PL_bufend && isSPACE(*s))
9099         s++;
9100     if (*s == '(')
9101         s++;
9102     while (s < PL_bufend && isSPACE(*s))
9103         s++;
9104     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9105         const char * const w = s;
9106         s += UTF ? UTF8SKIP(s) : 1;
9107         while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9108             s += UTF ? UTF8SKIP(s) : 1;
9109         while (s < PL_bufend && isSPACE(*s))
9110             s++;
9111         if (*s == ',') {
9112             GV* gv;
9113             if (keyword(w, s - w, 0))
9114                 return;
9115
9116             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9117             if (gv && GvCVu(gv))
9118                 return;
9119             if (s - w <= 254) {
9120                 PADOFFSET off;
9121                 char tmpbuf[256];
9122                 Copy(w, tmpbuf+1, s - w, char);
9123                 *tmpbuf = '&';
9124                 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9125                 if (off != NOT_IN_PAD) return;
9126             }
9127             Perl_croak(aTHX_ "No comma allowed after %s", what);
9128         }
9129     }
9130 }
9131
9132 /* S_new_constant(): do any overload::constant lookup.
9133
9134    Either returns sv, or mortalizes/frees sv and returns a new SV*.
9135    Best used as sv=new_constant(..., sv, ...).
9136    If s, pv are NULL, calls subroutine with one argument,
9137    and <type> is used with error messages only.
9138    <type> is assumed to be well formed UTF-8 */
9139
9140 STATIC SV *
9141 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9142                SV *sv, SV *pv, const char *type, STRLEN typelen)
9143 {
9144     dSP;
9145     HV * table = GvHV(PL_hintgv);                /* ^H */
9146     SV *res;
9147     SV *errsv = NULL;
9148     SV **cvp;
9149     SV *cv, *typesv;
9150     const char *why1 = "", *why2 = "", *why3 = "";
9151
9152     PERL_ARGS_ASSERT_NEW_CONSTANT;
9153     /* We assume that this is true: */
9154     if (*key == 'c') { assert (strEQ(key, "charnames")); }
9155     assert(type || s);
9156
9157     /* charnames doesn't work well if there have been errors found */
9158     if (PL_error_count > 0 && *key == 'c')
9159     {
9160         SvREFCNT_dec_NN(sv);
9161         return &PL_sv_undef;
9162     }
9163
9164     sv_2mortal(sv);                     /* Parent created it permanently */
9165     if (!table
9166         || ! (PL_hints & HINT_LOCALIZE_HH)
9167         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9168         || ! SvOK(*cvp))
9169     {
9170         char *msg;
9171
9172         /* Here haven't found what we're looking for.  If it is charnames,
9173          * perhaps it needs to be loaded.  Try doing that before giving up */
9174         if (*key == 'c') {
9175             Perl_load_module(aTHX_
9176                             0,
9177                             newSVpvs("_charnames"),
9178                              /* version parameter; no need to specify it, as if
9179                               * we get too early a version, will fail anyway,
9180                               * not being able to find '_charnames' */
9181                             NULL,
9182                             newSVpvs(":full"),
9183                             newSVpvs(":short"),
9184                             NULL);
9185             assert(sp == PL_stack_sp);
9186             table = GvHV(PL_hintgv);
9187             if (table
9188                 && (PL_hints & HINT_LOCALIZE_HH)
9189                 && (cvp = hv_fetch(table, key, keylen, FALSE))
9190                 && SvOK(*cvp))
9191             {
9192                 goto now_ok;
9193             }
9194         }
9195         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9196             msg = Perl_form(aTHX_
9197                                "Constant(%.*s) unknown",
9198                                 (int)(type ? typelen : len),
9199                                 (type ? type: s));
9200         }
9201         else {
9202             why1 = "$^H{";
9203             why2 = key;
9204             why3 = "} is not defined";
9205         report:
9206             if (*key == 'c') {
9207                 msg = Perl_form(aTHX_
9208                             /* The +3 is for '\N{'; -4 for that, plus '}' */
9209                             "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9210                       );
9211             }
9212             else {
9213                 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9214                                     (int)(type ? typelen : len),
9215                                     (type ? type: s), why1, why2, why3);
9216             }
9217         }
9218         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9219         return SvREFCNT_inc_simple_NN(sv);
9220     }
9221   now_ok:
9222     cv = *cvp;
9223     if (!pv && s)
9224         pv = newSVpvn_flags(s, len, SVs_TEMP);
9225     if (type && pv)
9226         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9227     else
9228         typesv = &PL_sv_undef;
9229
9230     PUSHSTACKi(PERLSI_OVERLOAD);
9231     ENTER ;
9232     SAVETMPS;
9233
9234     PUSHMARK(SP) ;
9235     EXTEND(sp, 3);
9236     if (pv)
9237         PUSHs(pv);
9238     PUSHs(sv);
9239     if (pv)
9240         PUSHs(typesv);
9241     PUTBACK;
9242     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9243
9244     SPAGAIN ;
9245
9246     /* Check the eval first */
9247     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9248         STRLEN errlen;
9249         const char * errstr;
9250         sv_catpvs(errsv, "Propagated");
9251         errstr = SvPV_const(errsv, errlen);
9252         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9253         (void)POPs;
9254         res = SvREFCNT_inc_simple_NN(sv);
9255     }
9256     else {
9257         res = POPs;
9258         SvREFCNT_inc_simple_void_NN(res);
9259     }
9260
9261     PUTBACK ;
9262     FREETMPS ;
9263     LEAVE ;
9264     POPSTACK;
9265
9266     if (!SvOK(res)) {
9267         why1 = "Call to &{$^H{";
9268         why2 = key;
9269         why3 = "}} did not return a defined value";
9270         sv = res;
9271         (void)sv_2mortal(sv);
9272         goto report;
9273     }
9274
9275     return res;
9276 }
9277
9278 PERL_STATIC_INLINE void
9279 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9280                     bool is_utf8, bool check_dollar, bool tick_warn)
9281 {
9282     int saw_tick = 0;
9283     const char *olds = *s;
9284     PERL_ARGS_ASSERT_PARSE_IDENT;
9285
9286     while (*s < PL_bufend) {
9287         if (*d >= e)
9288             Perl_croak(aTHX_ "%s", ident_too_long);
9289         if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9290              /* The UTF-8 case must come first, otherwise things
9291              * like c\N{COMBINING TILDE} would start failing, as the
9292              * isWORDCHAR_A case below would gobble the 'c' up.
9293              */
9294
9295             char *t = *s + UTF8SKIP(*s);
9296             while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
9297                 t += UTF8SKIP(t);
9298             }
9299             if (*d + (t - *s) > e)
9300                 Perl_croak(aTHX_ "%s", ident_too_long);
9301             Copy(*s, *d, t - *s, char);
9302             *d += t - *s;
9303             *s = t;
9304         }
9305         else if ( isWORDCHAR_A(**s) ) {
9306             do {
9307                 *(*d)++ = *(*s)++;
9308             } while (isWORDCHAR_A(**s) && *d < e);
9309         }
9310         else if (   allow_package
9311                  && **s == '\''
9312                  && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
9313         {
9314             *(*d)++ = ':';
9315             *(*d)++ = ':';
9316             (*s)++;
9317             saw_tick++;
9318         }
9319         else if (allow_package && **s == ':' && (*s)[1] == ':'
9320            /* Disallow things like Foo::$bar. For the curious, this is
9321             * the code path that triggers the "Bad name after" warning
9322             * when looking for barewords.
9323             */
9324            && !(check_dollar && (*s)[2] == '$')) {
9325             *(*d)++ = *(*s)++;
9326             *(*d)++ = *(*s)++;
9327         }
9328         else
9329             break;
9330     }
9331     if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
9332               && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
9333         char *d;
9334         char *d2;
9335         Newx(d, *s - olds + saw_tick + 2, char); /* +2 for $# */
9336         d2 = d;
9337         SAVEFREEPV(d);
9338         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9339                          "Old package separator used in string");
9340         if (olds[-1] == '#')
9341             *d2++ = olds[-2];
9342         *d2++ = olds[-1];
9343         while (olds < *s) {
9344             if (*olds == '\'') {
9345                 *d2++ = '\\';
9346                 *d2++ = *olds++;
9347             }
9348             else
9349                 *d2++ = *olds++;
9350         }
9351         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9352                          "\t(Did you mean \"%" UTF8f "\" instead?)\n",
9353                           UTF8fARG(is_utf8, d2-d, d));
9354     }
9355     return;
9356 }
9357
9358 /* Returns a NUL terminated string, with the length of the string written to
9359    *slp
9360    */
9361 STATIC char *
9362 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9363 {
9364     char *d = dest;
9365     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
9366     bool is_utf8 = cBOOL(UTF);
9367
9368     PERL_ARGS_ASSERT_SCAN_WORD;
9369
9370     parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
9371     *d = '\0';
9372     *slp = d - dest;
9373     return s;
9374 }
9375
9376 /* Is the byte 'd' a legal single character identifier name?  'u' is true
9377  * iff Unicode semantics are to be used.  The legal ones are any of:
9378  *  a) all ASCII characters except:
9379  *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
9380  *          2) '{'
9381  *     The final case currently doesn't get this far in the program, so we
9382  *     don't test for it.  If that were to change, it would be ok to allow it.
9383  *  b) When not under Unicode rules, any upper Latin1 character
9384  *  c) Otherwise, when unicode rules are used, all XIDS characters.
9385  *
9386  *      Because all ASCII characters have the same representation whether
9387  *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
9388  *      '{' without knowing if is UTF-8 or not. */
9389 #define VALID_LEN_ONE_IDENT(s, e, is_utf8)                                  \
9390     (isGRAPH_A(*(s)) || ((is_utf8)                                          \
9391                          ? isIDFIRST_utf8_safe(s, e)                        \
9392                          : (isGRAPH_L1(*s)                                  \
9393                             && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
9394
9395 STATIC char *
9396 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9397 {
9398     I32 herelines = PL_parser->herelines;
9399     SSize_t bracket = -1;
9400     char funny = *s++;
9401     char *d = dest;
9402     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
9403     bool is_utf8 = cBOOL(UTF);
9404     I32 orig_copline = 0, tmp_copline = 0;
9405
9406     PERL_ARGS_ASSERT_SCAN_IDENT;
9407
9408     if (isSPACE(*s) || !*s)
9409         s = skipspace(s);
9410     if (isDIGIT(*s)) {
9411         while (isDIGIT(*s)) {
9412             if (d >= e)
9413                 Perl_croak(aTHX_ "%s", ident_too_long);
9414             *d++ = *s++;
9415         }
9416     }
9417     else {  /* See if it is a "normal" identifier */
9418         parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
9419     }
9420     *d = '\0';
9421     d = dest;
9422     if (*d) {
9423         /* Either a digit variable, or parse_ident() found an identifier
9424            (anything valid as a bareword), so job done and return.  */
9425         if (PL_lex_state != LEX_NORMAL)
9426             PL_lex_state = LEX_INTERPENDMAYBE;
9427         return s;
9428     }
9429
9430     /* Here, it is not a run-of-the-mill identifier name */
9431
9432     if (*s == '$' && s[1]
9433         && (   isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
9434             || isDIGIT_A((U8)s[1])
9435             || s[1] == '$'
9436             || s[1] == '{'
9437             || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
9438     {
9439         /* Dereferencing a value in a scalar variable.
9440            The alternatives are different syntaxes for a scalar variable.
9441            Using ' as a leading package separator isn't allowed. :: is.   */
9442         return s;
9443     }
9444     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
9445     if (*s == '{') {
9446         bracket = s - SvPVX(PL_linestr);
9447         s++;
9448         orig_copline = CopLINE(PL_curcop);
9449         if (s < PL_bufend && isSPACE(*s)) {
9450             s = skipspace(s);
9451         }
9452     }
9453     if ((s <= PL_bufend - (is_utf8)
9454                           ? UTF8SKIP(s)
9455                           : 1)
9456         && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
9457     {
9458         if (is_utf8) {
9459             const STRLEN skip = UTF8SKIP(s);
9460             STRLEN i;
9461             d[skip] = '\0';
9462             for ( i = 0; i < skip; i++ )
9463                 d[i] = *s++;
9464         }
9465         else {
9466             *d = *s++;
9467             d[1] = '\0';
9468         }
9469     }
9470     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9471     if (*d == '^' && *s && isCONTROLVAR(*s)) {
9472         *d = toCTRL(*s);
9473         s++;
9474     }
9475     /* Warn about ambiguous code after unary operators if {...} notation isn't
9476        used.  There's no difference in ambiguity; it's merely a heuristic
9477        about when not to warn.  */
9478     else if (ck_uni && bracket == -1)
9479         check_uni();
9480     if (bracket != -1) {
9481         bool skip;
9482         char *s2;
9483         /* If we were processing {...} notation then...  */
9484         if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
9485             || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9486                  && isWORDCHAR(*s))
9487         ) {
9488             /* note we have to check for a normal identifier first,
9489              * as it handles utf8 symbols, and only after that has
9490              * been ruled out can we look at the caret words */
9491             if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
9492                 /* if it starts as a valid identifier, assume that it is one.
9493                    (the later check for } being at the expected point will trap
9494                    cases where this doesn't pan out.)  */
9495                 d += is_utf8 ? UTF8SKIP(d) : 1;
9496                 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
9497                 *d = '\0';
9498             }
9499             else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
9500                 d++;
9501                 while (isWORDCHAR(*s) && d < e) {
9502                     *d++ = *s++;
9503                 }
9504                 if (d >= e)
9505                     Perl_croak(aTHX_ "%s", ident_too_long);
9506                 *d = '\0';
9507             }
9508             tmp_copline = CopLINE(PL_curcop);
9509             if (s < PL_bufend && isSPACE(*s)) {
9510                 s = skipspace(s);
9511             }
9512             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9513                 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation.  */
9514                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9515                     const char * const brack =
9516                         (const char *)
9517                         ((*s == '[') ? "[...]" : "{...}");
9518                     orig_copline = CopLINE(PL_curcop);
9519                     CopLINE_set(PL_curcop, tmp_copline);
9520    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9521                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9522                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9523                         funny, dest, brack, funny, dest, brack);
9524                     CopLINE_set(PL_curcop, orig_copline);
9525                 }
9526                 bracket++;
9527                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9528                 PL_lex_allbrackets++;
9529                 return s;
9530             }
9531         }
9532
9533         if ( !tmp_copline )
9534             tmp_copline = CopLINE(PL_curcop);
9535         if ((skip = s < PL_bufend && isSPACE(*s))) {
9536             /* Avoid incrementing line numbers or resetting PL_linestart,
9537                in case we have to back up.  */
9538             STRLEN s_off = s - SvPVX(PL_linestr);
9539             s2 = peekspace(s);
9540             s = SvPVX(PL_linestr) + s_off;
9541         }
9542         else
9543             s2 = s;
9544
9545         /* Expect to find a closing } after consuming any trailing whitespace.
9546          */
9547         if (*s2 == '}') {
9548             /* Now increment line numbers if applicable.  */
9549             if (skip)
9550                 s = skipspace(s);
9551             s++;
9552             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9553                 PL_lex_state = LEX_INTERPEND;
9554                 PL_expect = XREF;
9555             }
9556             if (PL_lex_state == LEX_NORMAL) {
9557                 if (ckWARN(WARN_AMBIGUOUS)
9558                     && (keyword(dest, d - dest, 0)
9559                         || get_cvn_flags(dest, d - dest, is_utf8
9560                            ? SVf_UTF8
9561                            : 0)))
9562                 {
9563                     SV *tmp = newSVpvn_flags( dest, d - dest,
9564                                         SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9565                     if (funny == '#')
9566                         funny = '@';
9567                     orig_copline = CopLINE(PL_curcop);
9568                     CopLINE_set(PL_curcop, tmp_copline);
9569                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9570                         "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
9571                         funny, SVfARG(tmp), funny, SVfARG(tmp));
9572                     CopLINE_set(PL_curcop, orig_copline);
9573                 }
9574             }
9575         }
9576         else {
9577             /* Didn't find the closing } at the point we expected, so restore
9578                state such that the next thing to process is the opening { and */
9579             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9580             CopLINE_set(PL_curcop, orig_copline);
9581             PL_parser->herelines = herelines;
9582             *dest = '\0';
9583         }
9584     }
9585     else if (   PL_lex_state == LEX_INTERPNORMAL
9586              && !PL_lex_brackets
9587              && !intuit_more(s, PL_bufend))
9588         PL_lex_state = LEX_INTERPEND;
9589     return s;
9590 }
9591
9592 static bool
9593 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
9594
9595     /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
9596      * found in the parse starting at 's', based on the subset that are valid
9597      * in this context input to this routine in 'valid_flags'. Advances s.
9598      * Returns TRUE if the input should be treated as a valid flag, so the next
9599      * char may be as well; otherwise FALSE. 'charset' should point to a NUL
9600      * upon first call on the current regex.  This routine will set it to any
9601      * charset modifier found.  The caller shouldn't change it.  This way,
9602      * another charset modifier encountered in the parse can be detected as an
9603      * error, as we have decided to allow only one */
9604
9605     const char c = **s;
9606     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9607
9608     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9609         if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
9610             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9611                        UTF ? SVf_UTF8 : 0);
9612             (*s) += charlen;
9613             /* Pretend that it worked, so will continue processing before
9614              * dieing */
9615             return TRUE;
9616         }
9617         return FALSE;
9618     }
9619
9620     switch (c) {
9621
9622         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
9623         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9624         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9625         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9626         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9627         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9628         case LOCALE_PAT_MOD:
9629             if (*charset) {
9630                 goto multiple_charsets;
9631             }
9632             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9633             *charset = c;
9634             break;
9635         case UNICODE_PAT_MOD:
9636             if (*charset) {
9637                 goto multiple_charsets;
9638             }
9639             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9640             *charset = c;
9641             break;
9642         case ASCII_RESTRICT_PAT_MOD:
9643             if (! *charset) {
9644                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9645             }
9646             else {
9647
9648                 /* Error if previous modifier wasn't an 'a', but if it was, see
9649                  * if, and accept, a second occurrence (only) */
9650                 if (*charset != 'a'
9651                     || get_regex_charset(*pmfl)
9652                         != REGEX_ASCII_RESTRICTED_CHARSET)
9653                 {
9654                         goto multiple_charsets;
9655                 }
9656                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9657             }
9658             *charset = c;
9659             break;
9660         case DEPENDS_PAT_MOD:
9661             if (*charset) {
9662                 goto multiple_charsets;
9663             }
9664             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9665             *charset = c;
9666             break;
9667     }
9668
9669     (*s)++;
9670     return TRUE;
9671
9672     multiple_charsets:
9673         if (*charset != c) {
9674             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9675         }
9676         else if (c == 'a') {
9677   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9678             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9679         }
9680         else {
9681             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9682         }
9683
9684         /* Pretend that it worked, so will continue processing before dieing */
9685         (*s)++;
9686         return TRUE;
9687 }
9688
9689 STATIC char *
9690 S_scan_pat(pTHX_ char *start, I32 type)
9691 {
9692     PMOP *pm;
9693     char *s;
9694     const char * const valid_flags =
9695         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9696     char charset = '\0';    /* character set modifier */
9697     unsigned int x_mod_count = 0;
9698
9699     PERL_ARGS_ASSERT_SCAN_PAT;
9700
9701     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
9702     if (!s)
9703         Perl_croak(aTHX_ "Search pattern not terminated");
9704
9705     pm = (PMOP*)newPMOP(type, 0);
9706     if (PL_multi_open == '?') {
9707         /* This is the only point in the code that sets PMf_ONCE:  */
9708         pm->op_pmflags |= PMf_ONCE;
9709
9710         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9711            allows us to restrict the list needed by reset to just the ??
9712            matches.  */
9713         assert(type != OP_TRANS);
9714         if (PL_curstash) {
9715             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9716             U32 elements;
9717             if (!mg) {
9718                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9719                                  0);
9720             }
9721             elements = mg->mg_len / sizeof(PMOP**);
9722             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9723             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9724             mg->mg_len = elements * sizeof(PMOP**);
9725             PmopSTASH_set(pm,PL_curstash);
9726         }
9727     }
9728
9729     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9730      * anon CV. False positives like qr/[(?{]/ are harmless */
9731
9732     if (type == OP_QR) {
9733         STRLEN len;
9734         char *e, *p = SvPV(PL_lex_stuff, len);
9735         e = p + len;
9736         for (; p < e; p++) {
9737             if (p[0] == '(' && p[1] == '?'
9738                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9739             {
9740                 pm->op_pmflags |= PMf_HAS_CV;
9741                 break;
9742             }
9743         }
9744         pm->op_pmflags |= PMf_IS_QR;
9745     }
9746
9747     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
9748                                 &s, &charset, &x_mod_count))
9749     {};
9750     /* issue a warning if /c is specified,but /g is not */
9751     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9752     {
9753         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9754                        "Use of /c modifier is meaningless without /g" );
9755     }
9756
9757     PL_lex_op = (OP*)pm;
9758     pl_yylval.ival = OP_MATCH;
9759     return s;
9760 }
9761
9762 STATIC char *
9763 S_scan_subst(pTHX_ char *start)
9764 {
9765     char *s;
9766     PMOP *pm;
9767     I32 first_start;
9768     line_t first_line;
9769     line_t linediff = 0;
9770     I32 es = 0;
9771     char charset = '\0';    /* character set modifier */
9772     unsigned int x_mod_count = 0;
9773     char *t;
9774
9775     PERL_ARGS_ASSERT_SCAN_SUBST;
9776
9777     pl_yylval.ival = OP_NULL;
9778
9779     s = scan_str(start, TRUE, FALSE, FALSE, &t);
9780
9781     if (!s)
9782         Perl_croak(aTHX_ "Substitution pattern not terminated");
9783
9784     s = t;
9785
9786     first_start = PL_multi_start;
9787     first_line = CopLINE(PL_curcop);
9788     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9789     if (!s) {
9790         SvREFCNT_dec_NN(PL_lex_stuff);
9791         PL_lex_stuff = NULL;
9792         Perl_croak(aTHX_ "Substitution replacement not terminated");
9793     }
9794     PL_multi_start = first_start;       /* so whole substitution is taken together */
9795
9796     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9797
9798
9799     while (*s) {
9800         if (*s == EXEC_PAT_MOD) {
9801             s++;
9802             es++;
9803         }
9804         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
9805                                   &s, &charset, &x_mod_count))
9806         {
9807             break;
9808         }
9809     }
9810
9811     if ((pm->op_pmflags & PMf_CONTINUE)) {
9812         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9813     }
9814
9815     if (es) {
9816         SV * const repl = newSVpvs("");
9817
9818         PL_multi_end = 0;
9819         pm->op_pmflags |= PMf_EVAL;
9820         for (; es > 1; es--) {
9821             sv_catpvs(repl, "eval ");
9822         }
9823         sv_catpvs(repl, "do {");
9824         sv_catsv(repl, PL_parser->lex_sub_repl);
9825         sv_catpvs(repl, "}");
9826         SvREFCNT_dec(PL_parser->lex_sub_repl);
9827         PL_parser->lex_sub_repl = repl;
9828     }
9829
9830
9831     linediff = CopLINE(PL_curcop) - first_line;
9832     if (linediff)
9833         CopLINE_set(PL_curcop, first_line);
9834
9835     if (linediff || es) {
9836         /* the IVX field indicates that the replacement string is a s///e;
9837          * the NVX field indicates how many src code lines the replacement
9838          * spreads over */
9839         sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
9840         ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = 0;
9841         ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
9842                                                                     cBOOL(es);
9843     }
9844
9845     PL_lex_op = (OP*)pm;
9846     pl_yylval.ival = OP_SUBST;
9847     return s;
9848 }
9849
9850 STATIC char *
9851 S_scan_trans(pTHX_ char *start)
9852 {
9853     char* s;
9854     OP *o;
9855     U8 squash;
9856     U8 del;
9857     U8 complement;
9858     bool nondestruct = 0;
9859     char *t;
9860
9861     PERL_ARGS_ASSERT_SCAN_TRANS;
9862
9863     pl_yylval.ival = OP_NULL;
9864
9865     s = scan_str(start,FALSE,FALSE,FALSE,&t);
9866     if (!s)
9867         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9868
9869     s = t;
9870
9871     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9872     if (!s) {
9873         SvREFCNT_dec_NN(PL_lex_stuff);
9874         PL_lex_stuff = NULL;
9875         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9876     }
9877
9878     complement = del = squash = 0;
9879     while (1) {
9880         switch (*s) {
9881         case 'c':
9882             complement = OPpTRANS_COMPLEMENT;
9883             break;
9884         case 'd':
9885             del = OPpTRANS_DELETE;
9886             break;
9887         case 's':
9888             squash = OPpTRANS_SQUASH;
9889             break;
9890         case 'r':
9891             nondestruct = 1;
9892             break;
9893         default:
9894             goto no_more;
9895         }
9896         s++;
9897     }
9898   no_more:
9899
9900     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9901     o->op_private &= ~OPpTRANS_ALL;
9902     o->op_private |= del|squash|complement|
9903       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9904       (DO_UTF8(PL_parser->lex_sub_repl) ? OPpTRANS_TO_UTF   : 0);
9905
9906     PL_lex_op = o;
9907     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9908
9909
9910     return s;
9911 }
9912
9913 /* scan_heredoc
9914    Takes a pointer to the first < in <<FOO.
9915    Returns a pointer to the byte following <<FOO.
9916
9917    This function scans a heredoc, which involves different methods
9918    depending on whether we are in a string eval, quoted construct, etc.
9919    This is because PL_linestr could containing a single line of input, or
9920    a whole string being evalled, or the contents of the current quote-
9921    like operator.
9922
9923    The two basic methods are:
9924     - Steal lines from the input stream
9925     - Scan the heredoc in PL_linestr and remove it therefrom
9926
9927    In a file scope or filtered eval, the first method is used; in a
9928    string eval, the second.
9929
9930    In a quote-like operator, we have to choose between the two,
9931    depending on where we can find a newline.  We peek into outer lex-
9932    ing scopes until we find one with a newline in it.  If we reach the
9933    outermost lexing scope and it is a file, we use the stream method.
9934    Otherwise it is treated as an eval.
9935 */
9936
9937 STATIC char *
9938 S_scan_heredoc(pTHX_ char *s)
9939 {
9940     I32 op_type = OP_SCALAR;
9941     I32 len;
9942     SV *tmpstr;
9943     char term;
9944     char *d;
9945     char *e;
9946     char *peek;
9947     char *indent = 0;
9948     I32 indent_len = 0;
9949     bool indented = FALSE;
9950     const bool infile = PL_rsfp || PL_parser->filtered;
9951     const line_t origline = CopLINE(PL_curcop);
9952     LEXSHARED *shared = PL_parser->lex_shared;
9953
9954     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9955
9956     s += 2;
9957     d = PL_tokenbuf + 1;
9958     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9959     *PL_tokenbuf = '\n';
9960     peek = s;
9961     if (*peek == '~') {
9962         indented = TRUE;
9963         peek++; s++;
9964     }
9965     while (SPACE_OR_TAB(*peek))
9966         peek++;
9967     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9968         s = peek;
9969         term = *s++;
9970         s = delimcpy(d, e, s, PL_bufend, term, &len);
9971         if (s == PL_bufend)
9972             Perl_croak(aTHX_ "Unterminated delimiter for here document");
9973         d += len;
9974         s++;
9975     }
9976     else {
9977         if (*s == '\\')
9978             /* <<\FOO is equivalent to <<'FOO' */
9979             s++, term = '\'';
9980         else
9981             term = '"';
9982         if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9983             Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
9984         peek = s;
9985         while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
9986             peek += UTF ? UTF8SKIP(peek) : 1;
9987         }
9988         len = (peek - s >= e - d) ? (e - d) : (peek - s);
9989         Copy(s, d, len, char);
9990         s += len;
9991         d += len;
9992     }
9993     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9994         Perl_croak(aTHX_ "Delimiter for here document is too long");
9995     *d++ = '\n';
9996     *d = '\0';
9997     len = d - PL_tokenbuf;
9998
9999 #ifndef PERL_STRICT_CR
10000     d = (char *) memchr(s, '\r', PL_bufend - s);
10001     if (d) {
10002         char * const olds = s;
10003         s = d;
10004         while (s < PL_bufend) {
10005             if (*s == '\r') {
10006                 *d++ = '\n';
10007                 if (*++s == '\n')
10008                     s++;
10009             }
10010             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
10011                 *d++ = *s++;
10012                 s++;
10013             }
10014             else
10015                 *d++ = *s++;
10016         }
10017         *d = '\0';
10018         PL_bufend = d;
10019         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10020         s = olds;
10021     }
10022 #endif
10023
10024     tmpstr = newSV_type(SVt_PVIV);
10025     SvGROW(tmpstr, 80);
10026     if (term == '\'') {
10027         op_type = OP_CONST;
10028         SvIV_set(tmpstr, -1);
10029     }
10030     else if (term == '`') {
10031         op_type = OP_BACKTICK;
10032         SvIV_set(tmpstr, '\\');
10033     }
10034
10035     PL_multi_start = origline + 1 + PL_parser->herelines;
10036     PL_multi_open = PL_multi_close = '<';
10037     /* inside a string eval or quote-like operator */
10038     if (!infile || PL_lex_inwhat) {
10039         SV *linestr;
10040         char *bufend;
10041         char * const olds = s;
10042         PERL_CONTEXT * const cx = CX_CUR();
10043         /* These two fields are not set until an inner lexing scope is
10044            entered.  But we need them set here. */
10045         shared->ls_bufptr  = s;
10046         shared->ls_linestr = PL_linestr;
10047         if (PL_lex_inwhat)
10048           /* Look for a newline.  If the current buffer does not have one,
10049              peek into the line buffer of the parent lexing scope, going
10050              up as many levels as necessary to find one with a newline
10051              after bufptr.
10052            */
10053           while (!(s = (char *)memchr(
10054                     (void *)shared->ls_bufptr, '\n',
10055                     SvEND(shared->ls_linestr)-shared->ls_bufptr
10056                 ))) {
10057             shared = shared->ls_prev;
10058             /* shared is only null if we have gone beyond the outermost
10059                lexing scope.  In a file, we will have broken out of the
10060                loop in the previous iteration.  In an eval, the string buf-
10061                fer ends with "\n;", so the while condition above will have
10062                evaluated to false.  So shared can never be null.  Or so you
10063                might think.  Odd syntax errors like s;@{<<; can gobble up
10064                the implicit semicolon at the end of a flie, causing the
10065                file handle to be closed even when we are not in a string
10066                eval.  So shared may be null in that case.
10067                (Closing '}' here to balance the earlier open brace for
10068                editors that look for matched pairs.) */
10069             if (UNLIKELY(!shared))
10070                 goto interminable;
10071             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10072                most lexing scope.  In a file, shared->ls_linestr at that
10073                level is just one line, so there is no body to steal. */
10074             if (infile && !shared->ls_prev) {
10075                 s = olds;
10076                 goto streaming;
10077             }
10078           }
10079         else {  /* eval or we've already hit EOF */
10080             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10081             if (!s)
10082                 goto interminable;
10083         }
10084         linestr = shared->ls_linestr;
10085         bufend = SvEND(linestr);
10086         d = s;
10087         if (indented) {
10088             char *myolds = s;
10089
10090             while (s < bufend - len + 1) {
10091                 if (*s++ == '\n')
10092                     ++PL_parser->herelines;
10093
10094                 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
10095                     char *backup = s;
10096                     indent_len = 0;
10097
10098                     /* Only valid if it's preceded by whitespace only */
10099                     while (backup != myolds && --backup >= myolds) {
10100                         if (! SPACE_OR_TAB(*backup)) {
10101                             break;
10102                         }
10103
10104                         indent_len++;
10105                     }
10106
10107                     /* No whitespace or all! */
10108                     if (backup == s || *backup == '\n') {
10109                         Newx(indent, indent_len + 1, char);
10110                         memcpy(indent, backup + 1, indent_len);
10111                         indent[indent_len] = 0;
10112                         s--; /* before our delimiter */
10113                         PL_parser->herelines--; /* this line doesn't count */
10114                         break;
10115                     }
10116                 }
10117             }
10118         } else {
10119             while (s < bufend - len + 1
10120                    && memNE(s,PL_tokenbuf,len) )
10121             {
10122                 if (*s++ == '\n')
10123                     ++PL_parser->herelines;
10124             }
10125         }
10126
10127         if (s >= bufend - len + 1) {
10128             goto interminable;
10129         }
10130         sv_setpvn(tmpstr,d+1,s-d);
10131         s += len - 1;
10132         /* the preceding stmt passes a newline */
10133         PL_parser->herelines++;
10134
10135         /* s now points to the newline after the heredoc terminator.
10136            d points to the newline before the body of the heredoc.
10137          */
10138
10139         /* We are going to modify linestr in place here, so set
10140            aside copies of the string if necessary for re-evals or
10141            (caller $n)[6]. */
10142         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10143            check shared->re_eval_str. */
10144         if (shared->re_eval_start || shared->re_eval_str) {
10145             /* Set aside the rest of the regexp */
10146             if (!shared->re_eval_str)
10147                 shared->re_eval_str =
10148                        newSVpvn(shared->re_eval_start,
10149                                 bufend - shared->re_eval_start);
10150             shared->re_eval_start -= s-d;
10151         }
10152         if (cxstack_ix >= 0
10153             && CxTYPE(cx) == CXt_EVAL
10154             && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10155             && cx->blk_eval.cur_text == linestr)
10156         {
10157             cx->blk_eval.cur_text = newSVsv(linestr);
10158             cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10159         }
10160         /* Copy everything from s onwards back to d. */
10161         Move(s,d,bufend-s + 1,char);
10162         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10163         /* Setting PL_bufend only applies when we have not dug deeper
10164            into other scopes, because sublex_done sets PL_bufend to
10165            SvEND(PL_linestr). */
10166         if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10167         s = olds;
10168     }
10169     else
10170     {
10171       SV *linestr_save;
10172       char *oldbufptr_save;
10173       char *oldoldbufptr_save;
10174      streaming:
10175       SvPVCLEAR(tmpstr);   /* avoid "uninitialized" warning */
10176       term = PL_tokenbuf[1];
10177       len--;
10178       linestr_save = PL_linestr; /* must restore this afterwards */
10179       d = s;                     /* and this */
10180       oldbufptr_save = PL_oldbufptr;
10181       oldoldbufptr_save = PL_oldoldbufptr;
10182       PL_linestr = newSVpvs("");
10183       PL_bufend = SvPVX(PL_linestr);
10184       while (1) {
10185         PL_bufptr = PL_bufend;
10186         CopLINE_set(PL_curcop,
10187                     origline + 1 + PL_parser->herelines);
10188         if (!lex_next_chunk(LEX_NO_TERM)
10189          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10190             /* Simply freeing linestr_save might seem simpler here, as it
10191                does not matter what PL_linestr points to, since we are
10192                about to croak; but in a quote-like op, linestr_save
10193                will have been prospectively freed already, via
10194                SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
10195                restore PL_linestr. */
10196             SvREFCNT_dec_NN(PL_linestr);
10197             PL_linestr = linestr_save;
10198             PL_oldbufptr = oldbufptr_save;
10199             PL_oldoldbufptr = oldoldbufptr_save;
10200             goto interminable;
10201         }
10202         CopLINE_set(PL_curcop, origline);
10203         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10204             s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10205             /* ^That should be enough to avoid this needing to grow:  */
10206             sv_catpvs(PL_linestr, "\n\0");
10207             assert(s == SvPVX(PL_linestr));
10208             PL_bufend = SvEND(PL_linestr);
10209         }
10210         s = PL_bufptr;
10211         PL_parser->herelines++;
10212         PL_last_lop = PL_last_uni = NULL;
10213 #ifndef PERL_STRICT_CR
10214         if (PL_bufend - PL_linestart >= 2) {
10215             if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10216                 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10217             {
10218                 PL_bufend[-2] = '\n';
10219                 PL_bufend--;
10220                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10221             }
10222             else if (PL_bufend[-1] == '\r')
10223                 PL_bufend[-1] = '\n';
10224         }
10225         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10226             PL_bufend[-1] = '\n';
10227 #endif
10228         if (indented && (PL_bufend-s) >= len) {
10229             char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
10230
10231             if (found) {
10232                 char *backup = found;
10233                 indent_len = 0;
10234
10235                 /* Only valid if it's preceded by whitespace only */
10236                 while (backup != s && --backup >= s) {
10237                     if (! SPACE_OR_TAB(*backup)) {
10238                         break;
10239                     }
10240                     indent_len++;
10241                 }
10242
10243                 /* All whitespace or none! */
10244                 if (backup == found || SPACE_OR_TAB(*backup)) {
10245                     Newx(indent, indent_len + 1, char);
10246                     memcpy(indent, backup, indent_len);
10247                     indent[indent_len] = 0;
10248                     SvREFCNT_dec(PL_linestr);
10249                     PL_linestr = linestr_save;
10250                     PL_linestart = SvPVX(linestr_save);
10251                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10252                     PL_oldbufptr = oldbufptr_save;
10253                     PL_oldoldbufptr = oldoldbufptr_save;
10254                     s = d;
10255                     break;
10256                 }
10257             }
10258
10259             /* Didn't find it */
10260             sv_catsv(tmpstr,PL_linestr);
10261         } else {
10262             if (*s == term && PL_bufend-s >= len
10263                 && memEQ(s,PL_tokenbuf + 1,len))
10264             {
10265                 SvREFCNT_dec(PL_linestr);
10266                 PL_linestr = linestr_save;
10267                 PL_linestart = SvPVX(linestr_save);
10268                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10269                 PL_oldbufptr = oldbufptr_save;
10270                 PL_oldoldbufptr = oldoldbufptr_save;
10271                 s = d;
10272                 break;
10273             } else {
10274                 sv_catsv(tmpstr,PL_linestr);
10275             }
10276         }
10277       }
10278     }
10279     PL_multi_end = origline + PL_parser->herelines;
10280     if (indented && indent) {
10281         STRLEN linecount = 1;
10282         STRLEN herelen = SvCUR(tmpstr);
10283         char *ss = SvPVX(tmpstr);
10284         char *se = ss + herelen;
10285         SV *newstr = newSV(herelen+1);
10286         SvPOK_on(newstr);
10287
10288         /* Trim leading whitespace */
10289         while (ss < se) {
10290             /* newline only? Copy and move on */
10291             if (*ss == '\n') {
10292                 sv_catpv(newstr,"\n");
10293                 ss++;
10294                 linecount++;
10295
10296             /* Found our indentation? Strip it */
10297             } else if (se - ss >= indent_len
10298                        && memEQ(ss, indent, indent_len))
10299             {
10300                 STRLEN le = 0;
10301
10302                 ss += indent_len;
10303
10304                 while ((ss + le) < se && *(ss + le) != '\n')
10305                     le++;
10306
10307                 sv_catpvn(newstr, ss, le);
10308
10309                 ss += le;
10310
10311             /* Line doesn't begin with our indentation? Croak */
10312             } else {
10313                 Perl_croak(aTHX_
10314                     "Indentation on line %d of here-doc doesn't match delimiter",
10315                     (int)linecount
10316                 );
10317             }
10318         }
10319         /* avoid sv_setsv() as we dont wan't to COW here */
10320         sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
10321         Safefree(indent);
10322         SvREFCNT_dec_NN(newstr);
10323     }
10324     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10325         SvPV_shrink_to_cur(tmpstr);
10326     }
10327     if (!IN_BYTES) {
10328         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10329             SvUTF8_on(tmpstr);
10330     }
10331     PL_lex_stuff = tmpstr;
10332     pl_yylval.ival = op_type;
10333     return s;
10334
10335   interminable:
10336     SvREFCNT_dec(tmpstr);
10337     CopLINE_set(PL_curcop, origline);
10338     missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
10339 }
10340
10341 /* scan_inputsymbol
10342    takes: position of first '<' in input buffer
10343    returns: position of first char following the matching '>' in
10344             input buffer
10345    side-effects: pl_yylval and lex_op are set.
10346
10347    This code handles:
10348
10349    <>           read from ARGV
10350    <<>>         read from ARGV without magic open
10351    <FH>         read from filehandle
10352    <pkg::FH>    read from package qualified filehandle
10353    <pkg'FH>     read from package qualified filehandle
10354    <$fh>        read from filehandle in $fh
10355    <*.h>        filename glob
10356
10357 */
10358
10359 STATIC char *
10360 S_scan_inputsymbol(pTHX_ char *start)
10361 {
10362     char *s = start;            /* current position in buffer */
10363     char *end;
10364     I32 len;
10365     bool nomagicopen = FALSE;
10366     char *d = PL_tokenbuf;                                      /* start of temp holding space */
10367     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
10368
10369     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10370
10371     end = (char *) memchr(s, '\n', PL_bufend - s);
10372     if (!end)
10373         end = PL_bufend;
10374     if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
10375         nomagicopen = TRUE;
10376         *d = '\0';
10377         len = 0;
10378         s += 3;
10379     }
10380     else
10381         s = delimcpy(d, e, s + 1, end, '>', &len);      /* extract until > */
10382
10383     /* die if we didn't have space for the contents of the <>,
10384        or if it didn't end, or if we see a newline
10385     */
10386
10387     if (len >= (I32)sizeof PL_tokenbuf)
10388         Perl_croak(aTHX_ "Excessively long <> operator");
10389     if (s >= end)
10390         Perl_croak(aTHX_ "Unterminated <> operator");
10391
10392     s++;
10393
10394     /* check for <$fh>
10395        Remember, only scalar variables are interpreted as filehandles by
10396        this code.  Anything more complex (e.g., <$fh{$num}>) will be
10397        treated as a glob() call.
10398        This code makes use of the fact that except for the $ at the front,
10399        a scalar variable and a filehandle look the same.
10400     */
10401     if (*d == '$' && d[1]) d++;
10402
10403     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10404     while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
10405         d += UTF ? UTF8SKIP(d) : 1;
10406     }
10407
10408     /* If we've tried to read what we allow filehandles to look like, and
10409        there's still text left, then it must be a glob() and not a getline.
10410        Use scan_str to pull out the stuff between the <> and treat it
10411        as nothing more than a string.
10412     */
10413
10414     if (d - PL_tokenbuf != len) {
10415         pl_yylval.ival = OP_GLOB;
10416         s = scan_str(start,FALSE,FALSE,FALSE,NULL);
10417         if (!s)
10418            Perl_croak(aTHX_ "Glob not terminated");
10419         return s;
10420     }
10421     else {
10422         bool readline_overriden = FALSE;
10423         GV *gv_readline;
10424         /* we're in a filehandle read situation */
10425         d = PL_tokenbuf;
10426
10427         /* turn <> into <ARGV> */
10428         if (!len)
10429             Copy("ARGV",d,5,char);
10430
10431         /* Check whether readline() is overriden */
10432         if ((gv_readline = gv_override("readline",8)))
10433             readline_overriden = TRUE;
10434
10435         /* if <$fh>, create the ops to turn the variable into a
10436            filehandle
10437         */
10438         if (*d == '$') {
10439             /* try to find it in the pad for this block, otherwise find
10440                add symbol table ops
10441             */
10442             const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
10443             if (tmp != NOT_IN_PAD) {
10444                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10445                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10446                     HEK * const stashname = HvNAME_HEK(stash);
10447                     SV * const sym = sv_2mortal(newSVhek(stashname));
10448                     sv_catpvs(sym, "::");
10449                     sv_catpv(sym, d+1);
10450                     d = SvPVX(sym);
10451                     goto intro_sym;
10452                 }
10453                 else {
10454                     OP * const o = newOP(OP_PADSV, 0);
10455                     o->op_targ = tmp;
10456                     PL_lex_op = readline_overriden
10457                         ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10458                                 op_append_elem(OP_LIST, o,
10459                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10460                         : newUNOP(OP_READLINE, 0, o);
10461                 }
10462             }
10463             else {
10464                 GV *gv;
10465                 ++d;
10466               intro_sym:
10467                 gv = gv_fetchpv(d,
10468                                 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
10469                                 SVt_PV);
10470                 PL_lex_op = readline_overriden
10471                     ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10472                             op_append_elem(OP_LIST,
10473                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10474                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10475                     : newUNOP(OP_READLINE, 0,
10476                             newUNOP(OP_RV2SV, 0,
10477                                 newGVOP(OP_GV, 0, gv)));
10478             }
10479             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10480             pl_yylval.ival = OP_NULL;
10481         }
10482
10483         /* If it's none of the above, it must be a literal filehandle
10484            (<Foo::BAR> or <FOO>) so build a simple readline OP */
10485         else {
10486             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10487             PL_lex_op = readline_overriden
10488                 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10489                         op_append_elem(OP_LIST,
10490                             newGVOP(OP_GV, 0, gv),
10491                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10492                 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
10493             pl_yylval.ival = OP_NULL;
10494         }
10495     }
10496
10497     return s;
10498 }
10499
10500
10501 /* scan_str
10502    takes:
10503         start                   position in buffer
10504         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
10505                                 only if they are of the open/close form
10506         keep_delims             preserve the delimiters around the string
10507         re_reparse              compiling a run-time /(?{})/:
10508                                    collapse // to /,  and skip encoding src
10509         delimp                  if non-null, this is set to the position of
10510                                 the closing delimiter, or just after it if
10511                                 the closing and opening delimiters differ
10512                                 (i.e., the opening delimiter of a substitu-
10513                                 tion replacement)
10514    returns: position to continue reading from buffer
10515    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10516         updates the read buffer.
10517
10518    This subroutine pulls a string out of the input.  It is called for:
10519         q               single quotes           q(literal text)
10520         '               single quotes           'literal text'
10521         qq              double quotes           qq(interpolate $here please)
10522         "               double quotes           "interpolate $here please"
10523         qx              backticks               qx(/bin/ls -l)
10524         `               backticks               `/bin/ls -l`
10525         qw              quote words             @EXPORT_OK = qw( func() $spam )
10526         m//             regexp match            m/this/
10527         s///            regexp substitute       s/this/that/
10528         tr///           string transliterate    tr/this/that/
10529         y///            string transliterate    y/this/that/
10530         ($*@)           sub prototypes          sub foo ($)
10531         (stuff)         sub attr parameters     sub foo : attr(stuff)
10532         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
10533
10534    In most of these cases (all but <>, patterns and transliterate)
10535    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
10536    calls scan_str().  s/// makes yylex() call scan_subst() which calls
10537    scan_str().  tr/// and y/// make yylex() call scan_trans() which
10538    calls scan_str().
10539
10540    It skips whitespace before the string starts, and treats the first
10541    character as the delimiter.  If the delimiter is one of ([{< then
10542    the corresponding "close" character )]}> is used as the closing
10543    delimiter.  It allows quoting of delimiters, and if the string has
10544    balanced delimiters ([{<>}]) it allows nesting.
10545
10546    On success, the SV with the resulting string is put into lex_stuff or,
10547    if that is already non-NULL, into lex_repl. The second case occurs only
10548    when parsing the RHS of the special constructs s/// and tr/// (y///).
10549    For convenience, the terminating delimiter character is stuffed into
10550    SvIVX of the SV.
10551 */
10552
10553 STATIC char *
10554 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
10555                  char **delimp
10556     )
10557 {
10558     SV *sv;                     /* scalar value: string */
10559     const char *tmps;           /* temp string, used for delimiter matching */
10560     char *s = start;            /* current position in the buffer */
10561     char term;                  /* terminating character */
10562     char *to;                   /* current position in the sv's data */
10563     I32 brackets = 1;           /* bracket nesting level */
10564     bool has_utf8 = FALSE;      /* is there any utf8 content? */
10565     IV termcode;                /* terminating char. code */
10566     U8 termstr[UTF8_MAXBYTES];  /* terminating string */
10567     STRLEN termlen;             /* length of terminating string */
10568     line_t herelines;
10569
10570     /* The delimiters that have a mirror-image closing one */
10571     const char * opening_delims = "([{<";
10572     const char * closing_delims = ")]}>";
10573
10574     const char * non_grapheme_msg = "Use of unassigned code point or"
10575                                     " non-standalone grapheme for a delimiter"
10576                                     " will be a fatal error starting in Perl"
10577                                     " 5.30";
10578     /* The only non-UTF character that isn't a stand alone grapheme is
10579      * white-space, hence can't be a delimiter.  So can skip for non-UTF-8 */
10580     bool check_grapheme = UTF && ckWARN_d(WARN_DEPRECATED);
10581
10582     PERL_ARGS_ASSERT_SCAN_STR;
10583
10584     /* skip space before the delimiter */
10585     if (isSPACE(*s)) {
10586         s = skipspace(s);
10587     }
10588
10589     /* mark where we are, in case we need to report errors */
10590     CLINE;
10591
10592     /* after skipping whitespace, the next character is the terminator */
10593     term = *s;
10594     if (!UTF || UTF8_IS_INVARIANT(term)) {
10595         termcode = termstr[0] = term;
10596         termlen = 1;
10597     }
10598     else {
10599         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10600         if (check_grapheme) {
10601             if (   UNLIKELY(UNICODE_IS_SUPER(termcode))
10602                 || UNLIKELY(UNICODE_IS_NONCHAR(termcode)))
10603             {
10604                 /* These are considered graphemes, and since the ending
10605                  * delimiter will be the same, we don't have to check the other
10606                  * end */
10607                 check_grapheme = FALSE;
10608             }
10609             else if (UNLIKELY(! _is_grapheme((U8 *) start,
10610                                              (U8 *) s,
10611                                              (U8 *) PL_bufend,
10612                                              termcode)))
10613             {
10614                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "%s", non_grapheme_msg);
10615
10616                 /* Don't have to check the other end, as have already warned at
10617                  * this one */
10618                 check_grapheme = FALSE;
10619             }
10620         }
10621
10622         Copy(s, termstr, termlen, U8);
10623     }
10624
10625     /* mark where we are */
10626     PL_multi_start = CopLINE(PL_curcop);
10627     PL_multi_open = termcode;
10628     herelines = PL_parser->herelines;
10629
10630     /* If the delimiter has a mirror-image closing one, get it */
10631     if (term && (tmps = strchr(opening_delims, term))) {
10632         termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
10633     }
10634
10635     PL_multi_close = termcode;
10636
10637     if (PL_multi_open == PL_multi_close) {
10638         keep_bracketed_quoted = FALSE;
10639     }
10640
10641     /* create a new SV to hold the contents.  79 is the SV's initial length.
10642        What a random number. */
10643     sv = newSV_type(SVt_PVIV);
10644     SvGROW(sv, 80);
10645     SvIV_set(sv, termcode);
10646     (void)SvPOK_only(sv);               /* validate pointer */
10647
10648     /* move past delimiter and try to read a complete string */
10649     if (keep_delims)
10650         sv_catpvn(sv, s, termlen);
10651     s += termlen;
10652     for (;;) {
10653         /* extend sv if need be */
10654         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10655         /* set 'to' to the next character in the sv's string */
10656         to = SvPVX(sv)+SvCUR(sv);
10657
10658         /* if open delimiter is the close delimiter read unbridle */
10659         if (PL_multi_open == PL_multi_close) {
10660             for (; s < PL_bufend; s++,to++) {
10661                 /* embedded newlines increment the current line number */
10662                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10663                     COPLINE_INC_WITH_HERELINES;
10664                 /* handle quoted delimiters */
10665                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10666                     if (!keep_bracketed_quoted
10667                         && (s[1] == term
10668                             || (re_reparse && s[1] == '\\'))
10669                     )
10670                         s++;
10671                     else /* any other quotes are simply copied straight through */
10672                         *to++ = *s++;
10673                 }
10674                 /* terminate when run out of buffer (the for() condition), or
10675                    have found the terminator */
10676                 else if (*s == term) {  /* First byte of terminator matches */
10677                     if (termlen == 1)   /* If is the only byte, are done */
10678                         break;
10679
10680                     /* If the remainder of the terminator matches, also are
10681                      * done, after checking that is a separate grapheme */
10682                     if (   s + termlen <= PL_bufend
10683                         && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
10684                     {
10685                         if (   check_grapheme
10686                             && UNLIKELY(! _is_grapheme((U8 *) start,
10687                                                               (U8 *) s,
10688                                                               (U8 *) PL_bufend,
10689                                                               termcode)))
10690                         {
10691                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10692                                         "%s", non_grapheme_msg);
10693                         }
10694                         break;
10695                     }
10696                 }
10697                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
10698                     has_utf8 = TRUE;
10699                 }
10700
10701                 *to = *s;
10702             }
10703         }
10704
10705         /* if the terminator isn't the same as the start character (e.g.,
10706            matched brackets), we have to allow more in the quoting, and
10707            be prepared for nested brackets.
10708         */
10709         else {
10710             /* read until we run out of string, or we find the terminator */
10711             for (; s < PL_bufend; s++,to++) {
10712                 /* embedded newlines increment the line count */
10713                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10714                     COPLINE_INC_WITH_HERELINES;
10715                 /* backslashes can escape the open or closing characters */
10716                 if (*s == '\\' && s+1 < PL_bufend) {
10717                     if (!keep_bracketed_quoted
10718                        && ( ((UV)s[1] == PL_multi_open)
10719                          || ((UV)s[1] == PL_multi_close) ))
10720                     {
10721                         s++;
10722                     }
10723                     else
10724                         *to++ = *s++;
10725                 }
10726                 /* allow nested opens and closes */
10727                 else if ((UV)*s == PL_multi_close && --brackets <= 0)
10728                     break;
10729                 else if ((UV)*s == PL_multi_open)
10730                     brackets++;
10731                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10732                     has_utf8 = TRUE;
10733                 *to = *s;
10734             }
10735         }
10736         /* terminate the copied string and update the sv's end-of-string */
10737         *to = '\0';
10738         SvCUR_set(sv, to - SvPVX_const(sv));
10739
10740         /*
10741          * this next chunk reads more into the buffer if we're not done yet
10742          */
10743
10744         if (s < PL_bufend)
10745             break;              /* handle case where we are done yet :-) */
10746
10747 #ifndef PERL_STRICT_CR
10748         if (to - SvPVX_const(sv) >= 2) {
10749             if (   (to[-2] == '\r' && to[-1] == '\n')
10750                 || (to[-2] == '\n' && to[-1] == '\r'))
10751             {
10752                 to[-2] = '\n';
10753                 to--;
10754                 SvCUR_set(sv, to - SvPVX_const(sv));
10755             }
10756             else if (to[-1] == '\r')
10757                 to[-1] = '\n';
10758         }
10759         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10760             to[-1] = '\n';
10761 #endif
10762
10763         /* if we're out of file, or a read fails, bail and reset the current
10764            line marker so we can report where the unterminated string began
10765         */
10766         COPLINE_INC_WITH_HERELINES;
10767         PL_bufptr = PL_bufend;
10768         if (!lex_next_chunk(0)) {
10769             sv_free(sv);
10770             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10771             return NULL;
10772         }
10773         s = start = PL_bufptr;
10774     }
10775
10776     /* at this point, we have successfully read the delimited string */
10777
10778     if (keep_delims)
10779             sv_catpvn(sv, s, termlen);
10780     s += termlen;
10781
10782     if (has_utf8)
10783         SvUTF8_on(sv);
10784
10785     PL_multi_end = CopLINE(PL_curcop);
10786     CopLINE_set(PL_curcop, PL_multi_start);
10787     PL_parser->herelines = herelines;
10788
10789     /* if we allocated too much space, give some back */
10790     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10791         SvLEN_set(sv, SvCUR(sv) + 1);
10792         SvPV_renew(sv, SvLEN(sv));
10793     }
10794
10795     /* decide whether this is the first or second quoted string we've read
10796        for this op
10797     */
10798
10799     if (PL_lex_stuff)
10800         PL_parser->lex_sub_repl = sv;
10801     else
10802         PL_lex_stuff = sv;
10803     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10804     return s;
10805 }
10806
10807 /*
10808   scan_num
10809   takes: pointer to position in buffer
10810   returns: pointer to new position in buffer
10811   side-effects: builds ops for the constant in pl_yylval.op
10812
10813   Read a number in any of the formats that Perl accepts:
10814
10815   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10816   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10817   0b[01](_?[01])*                                       binary integers
10818   0[0-7](_?[0-7])*                                      octal integers
10819   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
10820   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
10821
10822   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10823   thing it reads.
10824
10825   If it reads a number without a decimal point or an exponent, it will
10826   try converting the number to an integer and see if it can do so
10827   without loss of precision.
10828 */
10829
10830 char *
10831 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10832 {
10833     const char *s = start;      /* current position in buffer */
10834     char *d;                    /* destination in temp buffer */
10835     char *e;                    /* end of temp buffer */
10836     NV nv;                              /* number read, as a double */
10837     SV *sv = NULL;                      /* place to put the converted number */
10838     bool floatit;                       /* boolean: int or float? */
10839     const char *lastub = NULL;          /* position of last underbar */
10840     static const char* const number_too_long = "Number too long";
10841     bool warned_about_underscore = 0;
10842 #define WARN_ABOUT_UNDERSCORE() \
10843         do { \
10844             if (!warned_about_underscore) { \
10845                 warned_about_underscore = 1; \
10846                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
10847                                "Misplaced _ in number"); \
10848             } \
10849         } while(0)
10850     /* Hexadecimal floating point.
10851      *
10852      * In many places (where we have quads and NV is IEEE 754 double)
10853      * we can fit the mantissa bits of a NV into an unsigned quad.
10854      * (Note that UVs might not be quads even when we have quads.)
10855      * This will not work everywhere, though (either no quads, or
10856      * using long doubles), in which case we have to resort to NV,
10857      * which will probably mean horrible loss of precision due to
10858      * multiple fp operations. */
10859     bool hexfp = FALSE;
10860     int total_bits = 0;
10861     int significant_bits = 0;
10862 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
10863 #  define HEXFP_UQUAD
10864     Uquad_t hexfp_uquad = 0;
10865     int hexfp_frac_bits = 0;
10866 #else
10867 #  define HEXFP_NV
10868     NV hexfp_nv = 0.0;
10869 #endif
10870     NV hexfp_mult = 1.0;
10871     UV high_non_zero = 0; /* highest digit */
10872     int non_zero_integer_digits = 0;
10873
10874     PERL_ARGS_ASSERT_SCAN_NUM;
10875
10876     /* We use the first character to decide what type of number this is */
10877
10878     switch (*s) {
10879     default:
10880         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10881
10882     /* if it starts with a 0, it could be an octal number, a decimal in
10883        0.13 disguise, or a hexadecimal number, or a binary number. */
10884     case '0':
10885         {
10886           /* variables:
10887              u          holds the "number so far"
10888              shift      the power of 2 of the base
10889                         (hex == 4, octal == 3, binary == 1)
10890              overflowed was the number more than we can hold?
10891
10892              Shift is used when we add a digit.  It also serves as an "are
10893              we in octal/hex/binary?" indicator to disallow hex characters
10894              when in octal mode.
10895            */
10896             NV n = 0.0;
10897             UV u = 0;
10898             I32 shift;
10899             bool overflowed = FALSE;
10900             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10901             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10902             static const char* const bases[5] =
10903               { "", "binary", "", "octal", "hexadecimal" };
10904             static const char* const Bases[5] =
10905               { "", "Binary", "", "Octal", "Hexadecimal" };
10906             static const char* const maxima[5] =
10907               { "",
10908                 "0b11111111111111111111111111111111",
10909                 "",
10910                 "037777777777",
10911                 "0xffffffff" };
10912             const char *base, *Base, *max;
10913
10914             /* check for hex */
10915             if (isALPHA_FOLD_EQ(s[1], 'x')) {
10916                 shift = 4;
10917                 s += 2;
10918                 just_zero = FALSE;
10919             } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
10920                 shift = 1;
10921                 s += 2;
10922                 just_zero = FALSE;
10923             }
10924             /* check for a decimal in disguise */
10925             else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
10926                 goto decimal;
10927             /* so it must be octal */
10928             else {
10929                 shift = 3;
10930                 s++;
10931             }
10932
10933             if (*s == '_') {
10934                 WARN_ABOUT_UNDERSCORE();
10935                lastub = s++;
10936             }
10937
10938             base = bases[shift];
10939             Base = Bases[shift];
10940             max  = maxima[shift];
10941
10942             /* read the rest of the number */
10943             for (;;) {
10944                 /* x is used in the overflow test,
10945                    b is the digit we're adding on. */
10946                 UV x, b;
10947
10948                 switch (*s) {
10949
10950                 /* if we don't mention it, we're done */
10951                 default:
10952                     goto out;
10953
10954                 /* _ are ignored -- but warned about if consecutive */
10955                 case '_':
10956                     if (lastub && s == lastub + 1)
10957                         WARN_ABOUT_UNDERSCORE();
10958                     lastub = s++;
10959                     break;
10960
10961                 /* 8 and 9 are not octal */
10962                 case '8': case '9':
10963                     if (shift == 3)
10964                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10965                     /* FALLTHROUGH */
10966
10967                 /* octal digits */
10968                 case '2': case '3': case '4':
10969                 case '5': case '6': case '7':
10970                     if (shift == 1)
10971                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10972                     /* FALLTHROUGH */
10973
10974                 case '0': case '1':
10975                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10976                     goto digit;
10977
10978                 /* hex digits */
10979                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10980                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10981                     /* make sure they said 0x */
10982                     if (shift != 4)
10983                         goto out;
10984                     b = (*s++ & 7) + 9;
10985
10986                     /* Prepare to put the digit we have onto the end
10987                        of the number so far.  We check for overflows.
10988                     */
10989
10990                   digit:
10991                     just_zero = FALSE;
10992                     if (!overflowed) {
10993                         assert(shift >= 0);
10994                         x = u << shift; /* make room for the digit */
10995
10996                         total_bits += shift;
10997
10998                         if ((x >> shift) != u
10999                             && !(PL_hints & HINT_NEW_BINARY)) {
11000                             overflowed = TRUE;
11001                             n = (NV) u;
11002                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11003                                              "Integer overflow in %s number",
11004                                              base);
11005                         } else
11006                             u = x | b;          /* add the digit to the end */
11007                     }
11008                     if (overflowed) {
11009                         n *= nvshift[shift];
11010                         /* If an NV has not enough bits in its
11011                          * mantissa to represent an UV this summing of
11012                          * small low-order numbers is a waste of time
11013                          * (because the NV cannot preserve the
11014                          * low-order bits anyway): we could just
11015                          * remember when did we overflow and in the
11016                          * end just multiply n by the right
11017                          * amount. */
11018                         n += (NV) b;
11019                     }
11020
11021                     if (high_non_zero == 0 && b > 0)
11022                         high_non_zero = b;
11023
11024                     if (high_non_zero)
11025                         non_zero_integer_digits++;
11026
11027                     /* this could be hexfp, but peek ahead
11028                      * to avoid matching ".." */
11029                     if (UNLIKELY(HEXFP_PEEK(s))) {
11030                         goto out;
11031                     }
11032
11033                     break;
11034                 }
11035             }
11036
11037           /* if we get here, we had success: make a scalar value from
11038              the number.
11039           */
11040           out:
11041
11042             /* final misplaced underbar check */
11043             if (s[-1] == '_')
11044                 WARN_ABOUT_UNDERSCORE();
11045
11046             if (UNLIKELY(HEXFP_PEEK(s))) {
11047                 /* Do sloppy (on the underbars) but quick detection
11048                  * (and value construction) for hexfp, the decimal
11049                  * detection will shortly be more thorough with the
11050                  * underbar checks. */
11051                 const char* h = s;
11052                 significant_bits = non_zero_integer_digits * shift;
11053 #ifdef HEXFP_UQUAD
11054                 hexfp_uquad = u;
11055 #else /* HEXFP_NV */
11056                 hexfp_nv = u;
11057 #endif
11058                 /* Ignore the leading zero bits of
11059                  * the high (first) non-zero digit. */
11060                 if (high_non_zero) {
11061                     if (high_non_zero < 0x8)
11062                         significant_bits--;
11063                     if (high_non_zero < 0x4)
11064                         significant_bits--;
11065                     if (high_non_zero < 0x2)
11066                         significant_bits--;
11067                 }
11068
11069                 if (*h == '.') {
11070 #ifdef HEXFP_NV
11071                     NV nv_mult = 1.0;
11072 #endif
11073                     bool accumulate = TRUE;
11074                     U8 b;
11075                     int lim = 1 << shift;
11076                     for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
11077                                *h == '_'); h++) {
11078                         if (isXDIGIT(*h)) {
11079                             significant_bits += shift;
11080 #ifdef HEXFP_UQUAD
11081                             if (accumulate) {
11082                                 if (significant_bits < NV_MANT_DIG) {
11083                                     /* We are in the long "run" of xdigits,
11084                                      * accumulate the full four bits. */
11085                                     assert(shift >= 0);
11086                                     hexfp_uquad <<= shift;
11087                                     hexfp_uquad |= b;
11088                                     hexfp_frac_bits += shift;
11089                                 } else if (significant_bits - shift < NV_MANT_DIG) {
11090                                     /* We are at a hexdigit either at,
11091                                      * or straddling, the edge of mantissa.
11092                                      * We will try grabbing as many as
11093                                      * possible bits. */
11094                                     int tail =
11095                                       significant_bits - NV_MANT_DIG;
11096                                     if (tail <= 0)
11097                                        tail += shift;
11098                                     assert(tail >= 0);
11099                                     hexfp_uquad <<= tail;
11100                                     assert((shift - tail) >= 0);
11101                                     hexfp_uquad |= b >> (shift - tail);
11102                                     hexfp_frac_bits += tail;
11103
11104                                     /* Ignore the trailing zero bits
11105                                      * of the last non-zero xdigit.
11106                                      *
11107                                      * The assumption here is that if
11108                                      * one has input of e.g. the xdigit
11109                                      * eight (0x8), there is only one
11110                                      * bit being input, not the full
11111                                      * four bits.  Conversely, if one
11112                                      * specifies a zero xdigit, the
11113                                      * assumption is that one really
11114                                      * wants all those bits to be zero. */
11115                                     if (b) {
11116                                         if ((b & 0x1) == 0x0) {
11117                                             significant_bits--;
11118                                             if ((b & 0x2) == 0x0) {
11119                                                 significant_bits--;
11120                                                 if ((b & 0x4) == 0x0) {
11121                                                     significant_bits--;
11122                                                 }
11123                                             }
11124                                         }
11125                                     }
11126
11127                                     accumulate = FALSE;
11128                                 }
11129                             } else {
11130                                 /* Keep skipping the xdigits, and
11131                                  * accumulating the significant bits,
11132                                  * but do not shift the uquad
11133                                  * (which would catastrophically drop
11134                                  * high-order bits) or accumulate the
11135                                  * xdigits anymore. */
11136                             }
11137 #else /* HEXFP_NV */
11138                             if (accumulate) {
11139                                 nv_mult /= nvshift[shift];
11140                                 if (nv_mult > 0.0)
11141                                     hexfp_nv += b * nv_mult;
11142                                 else
11143                                     accumulate = FALSE;
11144                             }
11145 #endif
11146                         }
11147                         if (significant_bits >= NV_MANT_DIG)
11148                             accumulate = FALSE;
11149                     }
11150                 }
11151
11152                 if ((total_bits > 0 || significant_bits > 0) &&
11153                     isALPHA_FOLD_EQ(*h, 'p')) {
11154                     bool negexp = FALSE;
11155                     h++;
11156                     if (*h == '+')
11157                         h++;
11158                     else if (*h == '-') {
11159                         negexp = TRUE;
11160                         h++;
11161                     }
11162                     if (isDIGIT(*h)) {
11163                         I32 hexfp_exp = 0;
11164                         while (isDIGIT(*h) || *h == '_') {
11165                             if (isDIGIT(*h)) {
11166                                 hexfp_exp *= 10;
11167                                 hexfp_exp += *h - '0';
11168 #ifdef NV_MIN_EXP
11169                                 if (negexp
11170                                     && -hexfp_exp < NV_MIN_EXP - 1) {
11171                                     /* NOTE: this means that the exponent
11172                                      * underflow warning happens for
11173                                      * the IEEE 754 subnormals (denormals),
11174                                      * because DBL_MIN_EXP etc are the lowest
11175                                      * possible binary (or, rather, DBL_RADIX-base)
11176                                      * exponent for normals, not subnormals.
11177                                      *
11178                                      * This may or may not be a good thing. */
11179                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11180                                                    "Hexadecimal float: exponent underflow");
11181                                     break;
11182                                 }
11183 #endif
11184 #ifdef NV_MAX_EXP
11185                                 if (!negexp
11186                                     && hexfp_exp > NV_MAX_EXP - 1) {
11187                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11188                                                    "Hexadecimal float: exponent overflow");
11189                                     break;
11190                                 }
11191 #endif
11192                             }
11193                             h++;
11194                         }
11195                         if (negexp)
11196                             hexfp_exp = -hexfp_exp;
11197 #ifdef HEXFP_UQUAD
11198                         hexfp_exp -= hexfp_frac_bits;
11199 #endif
11200                         hexfp_mult = Perl_pow(2.0, hexfp_exp);
11201                         hexfp = TRUE;
11202                         goto decimal;
11203                     }
11204                 }
11205             }
11206
11207             if (overflowed) {
11208                 if (n > 4294967295.0)
11209                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11210                                    "%s number > %s non-portable",
11211                                    Base, max);
11212                 sv = newSVnv(n);
11213             }
11214             else {
11215 #if UVSIZE > 4
11216                 if (u > 0xffffffff)
11217                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11218                                    "%s number > %s non-portable",
11219                                    Base, max);
11220 #endif
11221                 sv = newSVuv(u);
11222             }
11223             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11224                 sv = new_constant(start, s - start, "integer",
11225                                   sv, NULL, NULL, 0);
11226             else if (PL_hints & HINT_NEW_BINARY)
11227                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
11228         }
11229         break;
11230
11231     /*
11232       handle decimal numbers.
11233       we're also sent here when we read a 0 as the first digit
11234     */
11235     case '1': case '2': case '3': case '4': case '5':
11236     case '6': case '7': case '8': case '9': case '.':
11237       decimal:
11238         d = PL_tokenbuf;
11239         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11240         floatit = FALSE;
11241         if (hexfp) {
11242             floatit = TRUE;
11243             *d++ = '0';
11244             *d++ = 'x';
11245             s = start + 2;
11246         }
11247
11248         /* read next group of digits and _ and copy into d */
11249         while (isDIGIT(*s)
11250                || *s == '_'
11251                || UNLIKELY(hexfp && isXDIGIT(*s)))
11252         {
11253             /* skip underscores, checking for misplaced ones
11254                if -w is on
11255             */
11256             if (*s == '_') {
11257                 if (lastub && s == lastub + 1)
11258                     WARN_ABOUT_UNDERSCORE();
11259                 lastub = s++;
11260             }
11261             else {
11262                 /* check for end of fixed-length buffer */
11263                 if (d >= e)
11264                     Perl_croak(aTHX_ "%s", number_too_long);
11265                 /* if we're ok, copy the character */
11266                 *d++ = *s++;
11267             }
11268         }
11269
11270         /* final misplaced underbar check */
11271         if (lastub && s == lastub + 1)
11272             WARN_ABOUT_UNDERSCORE();
11273
11274         /* read a decimal portion if there is one.  avoid
11275            3..5 being interpreted as the number 3. followed
11276            by .5
11277         */
11278         if (*s == '.' && s[1] != '.') {
11279             floatit = TRUE;
11280             *d++ = *s++;
11281
11282             if (*s == '_') {
11283                 WARN_ABOUT_UNDERSCORE();
11284                 lastub = s;
11285             }
11286
11287             /* copy, ignoring underbars, until we run out of digits.
11288             */
11289             for (; isDIGIT(*s)
11290                    || *s == '_'
11291                    || UNLIKELY(hexfp && isXDIGIT(*s));
11292                  s++)
11293             {
11294                 /* fixed length buffer check */
11295                 if (d >= e)
11296                     Perl_croak(aTHX_ "%s", number_too_long);
11297                 if (*s == '_') {
11298                    if (lastub && s == lastub + 1)
11299                         WARN_ABOUT_UNDERSCORE();
11300                    lastub = s;
11301                 }
11302                 else
11303                     *d++ = *s;
11304             }
11305             /* fractional part ending in underbar? */
11306             if (s[-1] == '_')
11307                 WARN_ABOUT_UNDERSCORE();
11308             if (*s == '.' && isDIGIT(s[1])) {
11309                 /* oops, it's really a v-string, but without the "v" */
11310                 s = start;
11311                 goto vstring;
11312             }
11313         }
11314
11315         /* read exponent part, if present */
11316         if ((isALPHA_FOLD_EQ(*s, 'e')
11317               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
11318             && strchr("+-0123456789_", s[1]))
11319         {
11320             int exp_digits = 0;
11321             const char *save_s = s;
11322             char * save_d = d;
11323
11324             /* regardless of whether user said 3E5 or 3e5, use lower 'e',
11325                ditto for p (hexfloats) */
11326             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
11327                 /* At least some Mach atof()s don't grok 'E' */
11328                 *d++ = 'e';
11329             }
11330             else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
11331                 *d++ = 'p';
11332             }
11333
11334             s++;
11335
11336
11337             /* stray preinitial _ */
11338             if (*s == '_') {
11339                 WARN_ABOUT_UNDERSCORE();
11340                 lastub = s++;
11341             }
11342
11343             /* allow positive or negative exponent */
11344             if (*s == '+' || *s == '-')
11345                 *d++ = *s++;
11346
11347             /* stray initial _ */
11348             if (*s == '_') {
11349                 WARN_ABOUT_UNDERSCORE();
11350                 lastub = s++;
11351             }
11352
11353             /* read digits of exponent */
11354             while (isDIGIT(*s) || *s == '_') {
11355                 if (isDIGIT(*s)) {
11356                     ++exp_digits;
11357                     if (d >= e)
11358                         Perl_croak(aTHX_ "%s", number_too_long);
11359                     *d++ = *s++;
11360                 }
11361                 else {
11362                    if (((lastub && s == lastub + 1)
11363                         || (!isDIGIT(s[1]) && s[1] != '_')))
11364                         WARN_ABOUT_UNDERSCORE();
11365                    lastub = s++;
11366                 }
11367             }
11368
11369             if (!exp_digits) {
11370                 /* no exponent digits, the [eEpP] could be for something else,
11371                  * though in practice we don't get here for p since that's preparsed
11372                  * earlier, and results in only the 0xX being consumed, so behave similarly
11373                  * for decimal floats and consume only the D.DD, leaving the [eE] to the
11374                  * next token.
11375                  */
11376                 s = save_s;
11377                 d = save_d;
11378             }
11379             else {
11380                 floatit = TRUE;
11381             }
11382         }
11383
11384
11385         /*
11386            We try to do an integer conversion first if no characters
11387            indicating "float" have been found.
11388          */
11389
11390         if (!floatit) {
11391             UV uv;
11392             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11393
11394             if (flags == IS_NUMBER_IN_UV) {
11395               if (uv <= IV_MAX)
11396                 sv = newSViv(uv); /* Prefer IVs over UVs. */
11397               else
11398                 sv = newSVuv(uv);
11399             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11400               if (uv <= (UV) IV_MIN)
11401                 sv = newSViv(-(IV)uv);
11402               else
11403                 floatit = TRUE;
11404             } else
11405               floatit = TRUE;
11406         }
11407         if (floatit) {
11408             STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD();
11409             /* terminate the string */
11410             *d = '\0';
11411             if (UNLIKELY(hexfp)) {
11412 #  ifdef NV_MANT_DIG
11413                 if (significant_bits > NV_MANT_DIG)
11414                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11415                                    "Hexadecimal float: mantissa overflow");
11416 #  endif
11417 #ifdef HEXFP_UQUAD
11418                 nv = hexfp_uquad * hexfp_mult;
11419 #else /* HEXFP_NV */
11420                 nv = hexfp_nv * hexfp_mult;
11421 #endif
11422             } else {
11423                 nv = Atof(PL_tokenbuf);
11424             }
11425             RESTORE_LC_NUMERIC_UNDERLYING();
11426             sv = newSVnv(nv);
11427         }
11428
11429         if ( floatit
11430              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11431             const char *const key = floatit ? "float" : "integer";
11432             const STRLEN keylen = floatit ? 5 : 7;
11433             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11434                                 key, keylen, sv, NULL, NULL, 0);
11435         }
11436         break;
11437
11438     /* if it starts with a v, it could be a v-string */
11439     case 'v':
11440     vstring:
11441                 sv = newSV(5); /* preallocate storage space */
11442                 ENTER_with_name("scan_vstring");
11443                 SAVEFREESV(sv);
11444                 s = scan_vstring(s, PL_bufend, sv);
11445                 SvREFCNT_inc_simple_void_NN(sv);
11446                 LEAVE_with_name("scan_vstring");
11447         break;
11448     }
11449
11450     /* make the op for the constant and return */
11451
11452     if (sv)
11453         lvalp->opval = newSVOP(OP_CONST, 0, sv);
11454     else
11455         lvalp->opval = NULL;
11456
11457     return (char *)s;
11458 }
11459
11460 STATIC char *
11461 S_scan_formline(pTHX_ char *s)
11462 {
11463     SV * const stuff = newSVpvs("");
11464     bool needargs = FALSE;
11465     bool eofmt = FALSE;
11466
11467     PERL_ARGS_ASSERT_SCAN_FORMLINE;
11468
11469     while (!needargs) {
11470         char *eol;
11471         if (*s == '.') {
11472             char *t = s+1;
11473 #ifdef PERL_STRICT_CR
11474             while (SPACE_OR_TAB(*t))
11475                 t++;
11476 #else
11477             while (SPACE_OR_TAB(*t) || *t == '\r')
11478                 t++;
11479 #endif
11480             if (*t == '\n' || t == PL_bufend) {
11481                 eofmt = TRUE;
11482                 break;
11483             }
11484         }
11485         eol = (char *) memchr(s,'\n',PL_bufend-s);
11486         if (!eol++)
11487                 eol = PL_bufend;
11488         if (*s != '#') {
11489             char *t;
11490             for (t = s; t < eol; t++) {
11491                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11492                     needargs = FALSE;
11493                     goto enough;        /* ~~ must be first line in formline */
11494                 }
11495                 if (*t == '@' || *t == '^')
11496                     needargs = TRUE;
11497             }
11498             if (eol > s) {
11499                 sv_catpvn(stuff, s, eol-s);
11500 #ifndef PERL_STRICT_CR
11501                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11502                     char *end = SvPVX(stuff) + SvCUR(stuff);
11503                     end[-2] = '\n';
11504                     end[-1] = '\0';
11505                     SvCUR_set(stuff, SvCUR(stuff) - 1);
11506                 }
11507 #endif
11508             }
11509             else
11510               break;
11511         }
11512         s = (char*)eol;
11513         if ((PL_rsfp || PL_parser->filtered)
11514          && PL_parser->form_lex_state == LEX_NORMAL) {
11515             bool got_some;
11516             PL_bufptr = PL_bufend;
11517             COPLINE_INC_WITH_HERELINES;
11518             got_some = lex_next_chunk(0);
11519             CopLINE_dec(PL_curcop);
11520             s = PL_bufptr;
11521             if (!got_some)
11522                 break;
11523         }
11524         incline(s, PL_bufend);
11525     }
11526   enough:
11527     if (!SvCUR(stuff) || needargs)
11528         PL_lex_state = PL_parser->form_lex_state;
11529     if (SvCUR(stuff)) {
11530         PL_expect = XSTATE;
11531         if (needargs) {
11532             const char *s2 = s;
11533             while (isSPACE(*s2) && *s2 != '\n')
11534                 s2++;
11535             if (*s2 == '{') {
11536                 PL_expect = XTERMBLOCK;
11537                 NEXTVAL_NEXTTOKE.ival = 0;
11538                 force_next(DO);
11539             }
11540             NEXTVAL_NEXTTOKE.ival = 0;
11541             force_next(FORMLBRACK);
11542         }
11543         if (!IN_BYTES) {
11544             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11545                 SvUTF8_on(stuff);
11546         }
11547         NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
11548         force_next(THING);
11549     }
11550     else {
11551         SvREFCNT_dec(stuff);
11552         if (eofmt)
11553             PL_lex_formbrack = 0;
11554     }
11555     return s;
11556 }
11557
11558 I32
11559 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11560 {
11561     const I32 oldsavestack_ix = PL_savestack_ix;
11562     CV* const outsidecv = PL_compcv;
11563
11564     SAVEI32(PL_subline);
11565     save_item(PL_subname);
11566     SAVESPTR(PL_compcv);
11567
11568     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11569     CvFLAGS(PL_compcv) |= flags;
11570
11571     PL_subline = CopLINE(PL_curcop);
11572     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11573     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11574     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11575     if (outsidecv && CvPADLIST(outsidecv))
11576         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
11577
11578     return oldsavestack_ix;
11579 }
11580
11581 static int
11582 S_yywarn(pTHX_ const char *const s, U32 flags)
11583 {
11584     PERL_ARGS_ASSERT_YYWARN;
11585
11586     PL_in_eval |= EVAL_WARNONLY;
11587     yyerror_pv(s, flags);
11588     return 0;
11589 }
11590
11591 void
11592 Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
11593 {
11594     PERL_ARGS_ASSERT_ABORT_EXECUTION;
11595
11596     if (PL_minus_c)
11597         Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
11598     else {
11599         Perl_croak(aTHX_
11600                 "%sExecution of %s aborted due to compilation errors.\n", msg, name);
11601     }
11602     NOT_REACHED; /* NOTREACHED */
11603 }
11604
11605 void
11606 Perl_yyquit(pTHX)
11607 {
11608     /* Called, after at least one error has been found, to abort the parse now,
11609      * instead of trying to forge ahead */
11610
11611     yyerror_pvn(NULL, 0, 0);
11612 }
11613
11614 int
11615 Perl_yyerror(pTHX_ const char *const s)
11616 {
11617     PERL_ARGS_ASSERT_YYERROR;
11618     return yyerror_pvn(s, strlen(s), 0);
11619 }
11620
11621 int
11622 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11623 {
11624     PERL_ARGS_ASSERT_YYERROR_PV;
11625     return yyerror_pvn(s, strlen(s), flags);
11626 }
11627
11628 int
11629 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11630 {
11631     const char *context = NULL;
11632     int contlen = -1;
11633     SV *msg;
11634     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11635     int yychar  = PL_parser->yychar;
11636
11637     /* Output error message 's' with length 'len'.  'flags' are SV flags that
11638      * apply.  If the number of errors found is large enough, it abandons
11639      * parsing.  If 's' is NULL, there is no message, and it abandons
11640      * processing unconditionally */
11641
11642     if (s != NULL) {
11643         if (!yychar || (yychar == ';' && !PL_rsfp))
11644             sv_catpvs(where_sv, "at EOF");
11645         else if (   PL_oldoldbufptr
11646                  && PL_bufptr > PL_oldoldbufptr
11647                  && PL_bufptr - PL_oldoldbufptr < 200
11648                  && PL_oldoldbufptr != PL_oldbufptr
11649                  && PL_oldbufptr != PL_bufptr)
11650         {
11651             /*
11652                     Only for NetWare:
11653                     The code below is removed for NetWare because it
11654                     abends/crashes on NetWare when the script has error such as
11655                     not having the closing quotes like:
11656                         if ($var eq "value)
11657                     Checking of white spaces is anyway done in NetWare code.
11658             */
11659 #ifndef NETWARE
11660             while (isSPACE(*PL_oldoldbufptr))
11661                 PL_oldoldbufptr++;
11662 #endif
11663             context = PL_oldoldbufptr;
11664             contlen = PL_bufptr - PL_oldoldbufptr;
11665         }
11666         else if (  PL_oldbufptr
11667                 && PL_bufptr > PL_oldbufptr
11668                 && PL_bufptr - PL_oldbufptr < 200
11669                 && PL_oldbufptr != PL_bufptr) {
11670             /*
11671                     Only for NetWare:
11672                     The code below is removed for NetWare because it
11673                     abends/crashes on NetWare when the script has error such as
11674                     not having the closing quotes like:
11675                         if ($var eq "value)
11676                     Checking of white spaces is anyway done in NetWare code.
11677             */
11678 #ifndef NETWARE
11679             while (isSPACE(*PL_oldbufptr))
11680                 PL_oldbufptr++;
11681 #endif
11682             context = PL_oldbufptr;
11683             contlen = PL_bufptr - PL_oldbufptr;
11684         }
11685         else if (yychar > 255)
11686             sv_catpvs(where_sv, "next token ???");
11687         else if (yychar == YYEMPTY) {
11688             if (PL_lex_state == LEX_NORMAL)
11689                 sv_catpvs(where_sv, "at end of line");
11690             else if (PL_lex_inpat)
11691                 sv_catpvs(where_sv, "within pattern");
11692             else
11693                 sv_catpvs(where_sv, "within string");
11694         }
11695         else {
11696             sv_catpvs(where_sv, "next char ");
11697             if (yychar < 32)
11698                 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11699             else if (isPRINT_LC(yychar)) {
11700                 const char string = yychar;
11701                 sv_catpvn(where_sv, &string, 1);
11702             }
11703             else
11704                 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11705         }
11706         msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11707         Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
11708             OutCopFILE(PL_curcop),
11709             (IV)(PL_parser->preambling == NOLINE
11710                    ? CopLINE(PL_curcop)
11711                    : PL_parser->preambling));
11712         if (context)
11713             Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
11714                                  UTF8fARG(UTF, contlen, context));
11715         else
11716             Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
11717         if (   PL_multi_start < PL_multi_end
11718             && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
11719         {
11720             Perl_sv_catpvf(aTHX_ msg,
11721             "  (Might be a runaway multi-line %c%c string starting on"
11722             " line %" IVdf ")\n",
11723                     (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11724             PL_multi_end = 0;
11725         }
11726         if (PL_in_eval & EVAL_WARNONLY) {
11727             PL_in_eval &= ~EVAL_WARNONLY;
11728             Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
11729         }
11730         else {
11731             qerror(msg);
11732         }
11733     }
11734     if (s == NULL || PL_error_count >= 10) {
11735         const char * msg = "";
11736         const char * const name = OutCopFILE(PL_curcop);
11737
11738         if (PL_in_eval) {
11739             SV * errsv = ERRSV;
11740             if (SvCUR(errsv)) {
11741                 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
11742             }
11743         }
11744
11745         if (s == NULL) {
11746             abort_execution(msg, name);
11747         }
11748         else {
11749             Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
11750         }
11751     }
11752     PL_in_my = 0;
11753     PL_in_my_stash = NULL;
11754     return 0;
11755 }
11756
11757 STATIC char*
11758 S_swallow_bom(pTHX_ U8 *s)
11759 {
11760     const STRLEN slen = SvCUR(PL_linestr);
11761
11762     PERL_ARGS_ASSERT_SWALLOW_BOM;
11763
11764     switch (s[0]) {
11765     case 0xFF:
11766         if (s[1] == 0xFE) {
11767             /* UTF-16 little-endian? (or UTF-32LE?) */
11768             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11769                 /* diag_listed_as: Unsupported script encoding %s */
11770                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11771 #ifndef PERL_NO_UTF16_FILTER
11772 #ifdef DEBUGGING
11773             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11774 #endif
11775             s += 2;
11776             if (PL_bufend > (char*)s) {
11777                 s = add_utf16_textfilter(s, TRUE);
11778             }
11779 #else
11780             /* diag_listed_as: Unsupported script encoding %s */
11781             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11782 #endif
11783         }
11784         break;
11785     case 0xFE:
11786         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11787 #ifndef PERL_NO_UTF16_FILTER
11788 #ifdef DEBUGGING
11789             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11790 #endif
11791             s += 2;
11792             if (PL_bufend > (char *)s) {
11793                 s = add_utf16_textfilter(s, FALSE);
11794             }
11795 #else
11796             /* diag_listed_as: Unsupported script encoding %s */
11797             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11798 #endif
11799         }
11800         break;
11801     case BOM_UTF8_FIRST_BYTE: {
11802         if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
11803 #ifdef DEBUGGING
11804             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11805 #endif
11806             s += sizeof(BOM_UTF8) - 1;                     /* UTF-8 */
11807         }
11808         break;
11809     }
11810     case 0:
11811         if (slen > 3) {
11812              if (s[1] == 0) {
11813                   if (s[2] == 0xFE && s[3] == 0xFF) {
11814                        /* UTF-32 big-endian */
11815                        /* diag_listed_as: Unsupported script encoding %s */
11816                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11817                   }
11818              }
11819              else if (s[2] == 0 && s[3] != 0) {
11820                   /* Leading bytes
11821                    * 00 xx 00 xx
11822                    * are a good indicator of UTF-16BE. */
11823 #ifndef PERL_NO_UTF16_FILTER
11824 #ifdef DEBUGGING
11825                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11826 #endif
11827                   s = add_utf16_textfilter(s, FALSE);
11828 #else
11829                   /* diag_listed_as: Unsupported script encoding %s */
11830                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11831 #endif
11832              }
11833         }
11834         break;
11835
11836     default:
11837          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11838                   /* Leading bytes
11839                    * xx 00 xx 00
11840                    * are a good indicator of UTF-16LE. */
11841 #ifndef PERL_NO_UTF16_FILTER
11842 #ifdef DEBUGGING
11843               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11844 #endif
11845               s = add_utf16_textfilter(s, TRUE);
11846 #else
11847               /* diag_listed_as: Unsupported script encoding %s */
11848               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11849 #endif
11850          }
11851     }
11852     return (char*)s;
11853 }
11854
11855
11856 #ifndef PERL_NO_UTF16_FILTER
11857 static I32
11858 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11859 {
11860     SV *const filter = FILTER_DATA(idx);
11861     /* We re-use this each time round, throwing the contents away before we
11862        return.  */
11863     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11864     SV *const utf8_buffer = filter;
11865     IV status = IoPAGE(filter);
11866     const bool reverse = cBOOL(IoLINES(filter));
11867     I32 retval;
11868
11869     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11870
11871     /* As we're automatically added, at the lowest level, and hence only called
11872        from this file, we can be sure that we're not called in block mode. Hence
11873        don't bother writing code to deal with block mode.  */
11874     if (maxlen) {
11875         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11876     }
11877     if (status < 0) {
11878         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
11879     }
11880     DEBUG_P(PerlIO_printf(Perl_debug_log,
11881                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
11882                           FPTR2DPTR(void *, S_utf16_textfilter),
11883                           reverse ? 'l' : 'b', idx, maxlen, status,
11884                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11885
11886     while (1) {
11887         STRLEN chars;
11888         STRLEN have;
11889         I32 newlen;
11890         U8 *end;
11891         /* First, look in our buffer of existing UTF-8 data:  */
11892         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11893
11894         if (nl) {
11895             ++nl;
11896         } else if (status == 0) {
11897             /* EOF */
11898             IoPAGE(filter) = 0;
11899             nl = SvEND(utf8_buffer);
11900         }
11901         if (nl) {
11902             STRLEN got = nl - SvPVX(utf8_buffer);
11903             /* Did we have anything to append?  */
11904             retval = got != 0;
11905             sv_catpvn(sv, SvPVX(utf8_buffer), got);
11906             /* Everything else in this code works just fine if SVp_POK isn't
11907                set.  This, however, needs it, and we need it to work, else
11908                we loop infinitely because the buffer is never consumed.  */
11909             sv_chop(utf8_buffer, nl);
11910             break;
11911         }
11912
11913         /* OK, not a complete line there, so need to read some more UTF-16.
11914            Read an extra octect if the buffer currently has an odd number. */
11915         while (1) {
11916             if (status <= 0)
11917                 break;
11918             if (SvCUR(utf16_buffer) >= 2) {
11919                 /* Location of the high octet of the last complete code point.
11920                    Gosh, UTF-16 is a pain. All the benefits of variable length,
11921                    *coupled* with all the benefits of partial reads and
11922                    endianness.  */
11923                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11924                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11925
11926                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11927                     break;
11928                 }
11929
11930                 /* We have the first half of a surrogate. Read more.  */
11931                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11932             }
11933
11934             status = FILTER_READ(idx + 1, utf16_buffer,
11935                                  160 + (SvCUR(utf16_buffer) & 1));
11936             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
11937             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11938             if (status < 0) {
11939                 /* Error */
11940                 IoPAGE(filter) = status;
11941                 return status;
11942             }
11943         }
11944
11945         /* 'chars' isn't quite the right name, as code points above 0xFFFF
11946          * require 4 bytes per char */
11947         chars = SvCUR(utf16_buffer) >> 1;
11948         have = SvCUR(utf8_buffer);
11949
11950         /* Assume the worst case size as noted by the functions: twice the
11951          * number of input bytes */
11952         SvGROW(utf8_buffer, have + chars * 4 + 1);
11953
11954         if (reverse) {
11955             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11956                                          (U8*)SvPVX_const(utf8_buffer) + have,
11957                                          chars * 2, &newlen);
11958         } else {
11959             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11960                                 (U8*)SvPVX_const(utf8_buffer) + have,
11961                                 chars * 2, &newlen);
11962         }
11963         SvCUR_set(utf8_buffer, have + newlen);
11964         *end = '\0';
11965
11966         /* No need to keep this SV "well-formed" with a '\0' after the end, as
11967            it's private to us, and utf16_to_utf8{,reversed} take a
11968            (pointer,length) pair, rather than a NUL-terminated string.  */
11969         if(SvCUR(utf16_buffer) & 1) {
11970             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11971             SvCUR_set(utf16_buffer, 1);
11972         } else {
11973             SvCUR_set(utf16_buffer, 0);
11974         }
11975     }
11976     DEBUG_P(PerlIO_printf(Perl_debug_log,
11977                           "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
11978                           status,
11979                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11980     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11981     return retval;
11982 }
11983
11984 static U8 *
11985 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11986 {
11987     SV *filter = filter_add(S_utf16_textfilter, NULL);
11988
11989     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11990
11991     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11992     SvPVCLEAR(filter);
11993     IoLINES(filter) = reversed;
11994     IoPAGE(filter) = 1; /* Not EOF */
11995
11996     /* Sadly, we have to return a valid pointer, come what may, so we have to
11997        ignore any error return from this.  */
11998     SvCUR_set(PL_linestr, 0);
11999     if (FILTER_READ(0, PL_linestr, 0)) {
12000         SvUTF8_on(PL_linestr);
12001     } else {
12002         SvUTF8_on(PL_linestr);
12003     }
12004     PL_bufend = SvEND(PL_linestr);
12005     return (U8*)SvPVX(PL_linestr);
12006 }
12007 #endif
12008
12009 /*
12010 Returns a pointer to the next character after the parsed
12011 vstring, as well as updating the passed in sv.
12012
12013 Function must be called like
12014
12015         sv = sv_2mortal(newSV(5));
12016         s = scan_vstring(s,e,sv);
12017
12018 where s and e are the start and end of the string.
12019 The sv should already be large enough to store the vstring
12020 passed in, for performance reasons.
12021
12022 This function may croak if fatal warnings are enabled in the
12023 calling scope, hence the sv_2mortal in the example (to prevent
12024 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
12025 sv_2mortal.
12026
12027 */
12028
12029 char *
12030 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12031 {
12032     const char *pos = s;
12033     const char *start = s;
12034
12035     PERL_ARGS_ASSERT_SCAN_VSTRING;
12036
12037     if (*pos == 'v') pos++;  /* get past 'v' */
12038     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12039         pos++;
12040     if ( *pos != '.') {
12041         /* this may not be a v-string if followed by => */
12042         const char *next = pos;
12043         while (next < e && isSPACE(*next))
12044             ++next;
12045         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12046             /* return string not v-string */
12047             sv_setpvn(sv,(char *)s,pos-s);
12048             return (char *)pos;
12049         }
12050     }
12051
12052     if (!isALPHA(*pos)) {
12053         U8 tmpbuf[UTF8_MAXBYTES+1];
12054
12055         if (*s == 'v')
12056             s++;  /* get past 'v' */
12057
12058         SvPVCLEAR(sv);
12059
12060         for (;;) {
12061             /* this is atoi() that tolerates underscores */
12062             U8 *tmpend;
12063             UV rev = 0;
12064             const char *end = pos;
12065             UV mult = 1;
12066             while (--end >= s) {
12067                 if (*end != '_') {
12068                     const UV orev = rev;
12069                     rev += (*end - '0') * mult;
12070                     mult *= 10;
12071                     if (orev > rev)
12072                         /* diag_listed_as: Integer overflow in %s number */
12073                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12074                                          "Integer overflow in decimal number");
12075                 }
12076             }
12077
12078             /* Append native character for the rev point */
12079             tmpend = uvchr_to_utf8(tmpbuf, rev);
12080             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12081             if (!UVCHR_IS_INVARIANT(rev))
12082                  SvUTF8_on(sv);
12083             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12084                  s = ++pos;
12085             else {
12086                  s = pos;
12087                  break;
12088             }
12089             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12090                  pos++;
12091         }
12092         SvPOK_on(sv);
12093         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12094         SvRMAGICAL_on(sv);
12095     }
12096     return (char *)s;
12097 }
12098
12099 int
12100 Perl_keyword_plugin_standard(pTHX_
12101         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12102 {
12103     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
12104     PERL_UNUSED_CONTEXT;
12105     PERL_UNUSED_ARG(keyword_ptr);
12106     PERL_UNUSED_ARG(keyword_len);
12107     PERL_UNUSED_ARG(op_ptr);
12108     return KEYWORD_PLUGIN_DECLINE;
12109 }
12110
12111 /*
12112 =for apidoc Amx|void|wrap_keyword_plugin|Perl_keyword_plugin_t new_plugin|Perl_keyword_plugin_t *old_plugin_p
12113
12114 Puts a C function into the chain of keyword plugins.  This is the
12115 preferred way to manipulate the L</PL_keyword_plugin> variable.
12116 C<new_plugin> is a pointer to the C function that is to be added to the
12117 keyword plugin chain, and C<old_plugin_p> points to the storage location
12118 where a pointer to the next function in the chain will be stored.  The
12119 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
12120 while the value previously stored there is written to C<*old_plugin_p>.
12121
12122 L</PL_keyword_plugin> is global to an entire process, and a module wishing
12123 to hook keyword parsing may find itself invoked more than once per
12124 process, typically in different threads.  To handle that situation, this
12125 function is idempotent.  The location C<*old_plugin_p> must initially
12126 (once per process) contain a null pointer.  A C variable of static
12127 duration (declared at file scope, typically also marked C<static> to give
12128 it internal linkage) will be implicitly initialised appropriately, if it
12129 does not have an explicit initialiser.  This function will only actually
12130 modify the plugin chain if it finds C<*old_plugin_p> to be null.  This
12131 function is also thread safe on the small scale.  It uses appropriate
12132 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
12133
12134 When this function is called, the function referenced by C<new_plugin>
12135 must be ready to be called, except for C<*old_plugin_p> being unfilled.
12136 In a threading situation, C<new_plugin> may be called immediately, even
12137 before this function has returned.  C<*old_plugin_p> will always be
12138 appropriately set before C<new_plugin> is called.  If C<new_plugin>
12139 decides not to do anything special with the identifier that it is given
12140 (which is the usual case for most calls to a keyword plugin), it must
12141 chain the plugin function referenced by C<*old_plugin_p>.
12142
12143 Taken all together, XS code to install a keyword plugin should typically
12144 look something like this:
12145
12146     static Perl_keyword_plugin_t next_keyword_plugin;
12147     static OP *my_keyword_plugin(pTHX_
12148         char *keyword_plugin, STRLEN keyword_len, OP **op_ptr)
12149     {
12150         if (memEQs(keyword_ptr, keyword_len,
12151                    "my_new_keyword")) {
12152             ...
12153         } else {
12154             return next_keyword_plugin(aTHX_
12155                 keyword_ptr, keyword_len, op_ptr);
12156         }
12157     }
12158     BOOT:
12159         wrap_keyword_plugin(my_keyword_plugin,
12160                             &next_keyword_plugin);
12161
12162 Direct access to L</PL_keyword_plugin> should be avoided.
12163
12164 =cut
12165 */
12166
12167 void
12168 Perl_wrap_keyword_plugin(pTHX_
12169     Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
12170 {
12171     dVAR;
12172
12173     PERL_UNUSED_CONTEXT;
12174     PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
12175     if (*old_plugin_p) return;
12176     KEYWORD_PLUGIN_MUTEX_LOCK;
12177     if (!*old_plugin_p) {
12178         *old_plugin_p = PL_keyword_plugin;
12179         PL_keyword_plugin = new_plugin;
12180     }
12181     KEYWORD_PLUGIN_MUTEX_UNLOCK;
12182 }
12183
12184 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12185 static void
12186 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12187 {
12188     SAVEI32(PL_lex_brackets);
12189     if (PL_lex_brackets > 100)
12190         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12191     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12192     SAVEI32(PL_lex_allbrackets);
12193     PL_lex_allbrackets = 0;
12194     SAVEI8(PL_lex_fakeeof);
12195     PL_lex_fakeeof = (U8)fakeeof;
12196     if(yyparse(gramtype) && !PL_parser->error_count)
12197         qerror(Perl_mess(aTHX_ "Parse error"));
12198 }
12199
12200 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12201 static OP *
12202 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12203 {
12204     OP *o;
12205     ENTER;
12206     SAVEVPTR(PL_eval_root);
12207     PL_eval_root = NULL;
12208     parse_recdescent(gramtype, fakeeof);
12209     o = PL_eval_root;
12210     LEAVE;
12211     return o;
12212 }
12213
12214 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12215 static OP *
12216 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12217 {
12218     OP *exprop;
12219     if (flags & ~PARSE_OPTIONAL)
12220         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12221     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12222     if (!exprop && !(flags & PARSE_OPTIONAL)) {
12223         if (!PL_parser->error_count)
12224             qerror(Perl_mess(aTHX_ "Parse error"));
12225         exprop = newOP(OP_NULL, 0);
12226     }
12227     return exprop;
12228 }
12229
12230 /*
12231 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
12232
12233 Parse a Perl arithmetic expression.  This may contain operators of precedence
12234 down to the bit shift operators.  The expression must be followed (and thus
12235 terminated) either by a comparison or lower-precedence operator or by
12236 something that would normally terminate an expression such as semicolon.
12237 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12238 otherwise it is mandatory.  It is up to the caller to ensure that the
12239 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12240 the source of the code to be parsed and the lexical context for the
12241 expression.
12242
12243 The op tree representing the expression is returned.  If an optional
12244 expression is absent, a null pointer is returned, otherwise the pointer
12245 will be non-null.
12246
12247 If an error occurs in parsing or compilation, in most cases a valid op
12248 tree is returned anyway.  The error is reflected in the parser state,
12249 normally resulting in a single exception at the top level of parsing
12250 which covers all the compilation errors that occurred.  Some compilation
12251 errors, however, will throw an exception immediately.
12252
12253 =cut
12254 */
12255
12256 OP *
12257 Perl_parse_arithexpr(pTHX_ U32 flags)
12258 {
12259     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12260 }
12261
12262 /*
12263 =for apidoc Amx|OP *|parse_termexpr|U32 flags
12264
12265 Parse a Perl term expression.  This may contain operators of precedence
12266 down to the assignment operators.  The expression must be followed (and thus
12267 terminated) either by a comma or lower-precedence operator or by
12268 something that would normally terminate an expression such as semicolon.
12269 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12270 otherwise it is mandatory.  It is up to the caller to ensure that the
12271 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12272 the source of the code to be parsed and the lexical context for the
12273 expression.
12274
12275 The op tree representing the expression is returned.  If an optional
12276 expression is absent, a null pointer is returned, otherwise the pointer
12277 will be non-null.
12278
12279 If an error occurs in parsing or compilation, in most cases a valid op
12280 tree is returned anyway.  The error is reflected in the parser state,
12281 normally resulting in a single exception at the top level of parsing
12282 which covers all the compilation errors that occurred.  Some compilation
12283 errors, however, will throw an exception immediately.
12284
12285 =cut
12286 */
12287
12288 OP *
12289 Perl_parse_termexpr(pTHX_ U32 flags)
12290 {
12291     return parse_expr(LEX_FAKEEOF_COMMA, flags);
12292 }
12293
12294 /*
12295 =for apidoc Amx|OP *|parse_listexpr|U32 flags
12296
12297 Parse a Perl list expression.  This may contain operators of precedence
12298 down to the comma operator.  The expression must be followed (and thus
12299 terminated) either by a low-precedence logic operator such as C<or> or by
12300 something that would normally terminate an expression such as semicolon.
12301 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12302 otherwise it is mandatory.  It is up to the caller to ensure that the
12303 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12304 the source of the code to be parsed and the lexical context for the
12305 expression.
12306
12307 The op tree representing the expression is returned.  If an optional
12308 expression is absent, a null pointer is returned, otherwise the pointer
12309 will be non-null.
12310
12311 If an error occurs in parsing or compilation, in most cases a valid op
12312 tree is returned anyway.  The error is reflected in the parser state,
12313 normally resulting in a single exception at the top level of parsing
12314 which covers all the compilation errors that occurred.  Some compilation
12315 errors, however, will throw an exception immediately.
12316
12317 =cut
12318 */
12319
12320 OP *
12321 Perl_parse_listexpr(pTHX_ U32 flags)
12322 {
12323     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12324 }
12325
12326 /*
12327 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
12328
12329 Parse a single complete Perl expression.  This allows the full
12330 expression grammar, including the lowest-precedence operators such
12331 as C<or>.  The expression must be followed (and thus terminated) by a
12332 token that an expression would normally be terminated by: end-of-file,
12333 closing bracketing punctuation, semicolon, or one of the keywords that
12334 signals a postfix expression-statement modifier.  If C<flags> has the
12335 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
12336 mandatory.  It is up to the caller to ensure that the dynamic parser
12337 state (L</PL_parser> et al) is correctly set to reflect the source of
12338 the code to be parsed and the lexical context for the expression.
12339
12340 The op tree representing the expression is returned.  If an optional
12341 expression is absent, a null pointer is returned, otherwise the pointer
12342 will be non-null.
12343
12344 If an error occurs in parsing or compilation, in most cases a valid op
12345 tree is returned anyway.  The error is reflected in the parser state,
12346 normally resulting in a single exception at the top level of parsing
12347 which covers all the compilation errors that occurred.  Some compilation
12348 errors, however, will throw an exception immediately.
12349
12350 =cut
12351 */
12352
12353 OP *
12354 Perl_parse_fullexpr(pTHX_ U32 flags)
12355 {
12356     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12357 }
12358
12359 /*
12360 =for apidoc Amx|OP *|parse_block|U32 flags
12361
12362 Parse a single complete Perl code block.  This consists of an opening
12363 brace, a sequence of statements, and a closing brace.  The block
12364 constitutes a lexical scope, so C<my> variables and various compile-time
12365 effects can be contained within it.  It is up to the caller to ensure
12366 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12367 reflect the source of the code to be parsed and the lexical context for
12368 the statement.
12369
12370 The op tree representing the code block is returned.  This is always a
12371 real op, never a null pointer.  It will normally be a C<lineseq> list,
12372 including C<nextstate> or equivalent ops.  No ops to construct any kind
12373 of runtime scope are included by virtue of it being a block.
12374
12375 If an error occurs in parsing or compilation, in most cases a valid op
12376 tree (most likely null) is returned anyway.  The error is reflected in
12377 the parser state, normally resulting in a single exception at the top
12378 level of parsing which covers all the compilation errors that occurred.
12379 Some compilation errors, however, will throw an exception immediately.
12380
12381 The C<flags> parameter is reserved for future use, and must always
12382 be zero.
12383
12384 =cut
12385 */
12386
12387 OP *
12388 Perl_parse_block(pTHX_ U32 flags)
12389 {
12390     if (flags)
12391         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12392     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12393 }
12394
12395 /*
12396 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12397
12398 Parse a single unadorned Perl statement.  This may be a normal imperative
12399 statement or a declaration that has compile-time effect.  It does not
12400 include any label or other affixture.  It is up to the caller to ensure
12401 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12402 reflect the source of the code to be parsed and the lexical context for
12403 the statement.
12404
12405 The op tree representing the statement is returned.  This may be a
12406 null pointer if the statement is null, for example if it was actually
12407 a subroutine definition (which has compile-time side effects).  If not
12408 null, it will be ops directly implementing the statement, suitable to
12409 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
12410 equivalent op (except for those embedded in a scope contained entirely
12411 within the statement).
12412
12413 If an error occurs in parsing or compilation, in most cases a valid op
12414 tree (most likely null) is returned anyway.  The error is reflected in
12415 the parser state, normally resulting in a single exception at the top
12416 level of parsing which covers all the compilation errors that occurred.
12417 Some compilation errors, however, will throw an exception immediately.
12418
12419 The C<flags> parameter is reserved for future use, and must always
12420 be zero.
12421
12422 =cut
12423 */
12424
12425 OP *
12426 Perl_parse_barestmt(pTHX_ U32 flags)
12427 {
12428     if (flags)
12429         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12430     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12431 }
12432
12433 /*
12434 =for apidoc Amx|SV *|parse_label|U32 flags
12435
12436 Parse a single label, possibly optional, of the type that may prefix a
12437 Perl statement.  It is up to the caller to ensure that the dynamic parser
12438 state (L</PL_parser> et al) is correctly set to reflect the source of
12439 the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
12440 label is optional, otherwise it is mandatory.
12441
12442 The name of the label is returned in the form of a fresh scalar.  If an
12443 optional label is absent, a null pointer is returned.
12444
12445 If an error occurs in parsing, which can only occur if the label is
12446 mandatory, a valid label is returned anyway.  The error is reflected in
12447 the parser state, normally resulting in a single exception at the top
12448 level of parsing which covers all the compilation errors that occurred.
12449
12450 =cut
12451 */
12452
12453 SV *
12454 Perl_parse_label(pTHX_ U32 flags)
12455 {
12456     if (flags & ~PARSE_OPTIONAL)
12457         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12458     if (PL_nexttoke) {
12459         PL_parser->yychar = yylex();
12460         if (PL_parser->yychar == LABEL) {
12461             char * const lpv = pl_yylval.pval;
12462             STRLEN llen = strlen(lpv);
12463             PL_parser->yychar = YYEMPTY;
12464             return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12465         } else {
12466             yyunlex();
12467             goto no_label;
12468         }
12469     } else {
12470         char *s, *t;
12471         STRLEN wlen, bufptr_pos;
12472         lex_read_space(0);
12473         t = s = PL_bufptr;
12474         if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
12475             goto no_label;
12476         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12477         if (word_takes_any_delimiter(s, wlen))
12478             goto no_label;
12479         bufptr_pos = s - SvPVX(PL_linestr);
12480         PL_bufptr = t;
12481         lex_read_space(LEX_KEEP_PREVIOUS);
12482         t = PL_bufptr;
12483         s = SvPVX(PL_linestr) + bufptr_pos;
12484         if (t[0] == ':' && t[1] != ':') {
12485             PL_oldoldbufptr = PL_oldbufptr;
12486             PL_oldbufptr = s;
12487             PL_bufptr = t+1;
12488             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12489         } else {
12490             PL_bufptr = s;
12491             no_label:
12492             if (flags & PARSE_OPTIONAL) {
12493                 return NULL;
12494             } else {
12495                 qerror(Perl_mess(aTHX_ "Parse error"));
12496                 return newSVpvs("x");
12497             }
12498         }
12499     }
12500 }
12501
12502 /*
12503 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12504
12505 Parse a single complete Perl statement.  This may be a normal imperative
12506 statement or a declaration that has compile-time effect, and may include
12507 optional labels.  It is up to the caller to ensure that the dynamic
12508 parser state (L</PL_parser> et al) is correctly set to reflect the source
12509 of the code to be parsed and the lexical context for the statement.
12510
12511 The op tree representing the statement is returned.  This may be a
12512 null pointer if the statement is null, for example if it was actually
12513 a subroutine definition (which has compile-time side effects).  If not
12514 null, it will be the result of a L</newSTATEOP> call, normally including
12515 a C<nextstate> or equivalent op.
12516
12517 If an error occurs in parsing or compilation, in most cases a valid op
12518 tree (most likely null) is returned anyway.  The error is reflected in
12519 the parser state, normally resulting in a single exception at the top
12520 level of parsing which covers all the compilation errors that occurred.
12521 Some compilation errors, however, will throw an exception immediately.
12522
12523 The C<flags> parameter is reserved for future use, and must always
12524 be zero.
12525
12526 =cut
12527 */
12528
12529 OP *
12530 Perl_parse_fullstmt(pTHX_ U32 flags)
12531 {
12532     if (flags)
12533         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12534     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12535 }
12536
12537 /*
12538 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12539
12540 Parse a sequence of zero or more Perl statements.  These may be normal
12541 imperative statements, including optional labels, or declarations
12542 that have compile-time effect, or any mixture thereof.  The statement
12543 sequence ends when a closing brace or end-of-file is encountered in a
12544 place where a new statement could have validly started.  It is up to
12545 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12546 is correctly set to reflect the source of the code to be parsed and the
12547 lexical context for the statements.
12548
12549 The op tree representing the statement sequence is returned.  This may
12550 be a null pointer if the statements were all null, for example if there
12551 were no statements or if there were only subroutine definitions (which
12552 have compile-time side effects).  If not null, it will be a C<lineseq>
12553 list, normally including C<nextstate> or equivalent ops.
12554
12555 If an error occurs in parsing or compilation, in most cases a valid op
12556 tree is returned anyway.  The error is reflected in the parser state,
12557 normally resulting in a single exception at the top level of parsing
12558 which covers all the compilation errors that occurred.  Some compilation
12559 errors, however, will throw an exception immediately.
12560
12561 The C<flags> parameter is reserved for future use, and must always
12562 be zero.
12563
12564 =cut
12565 */
12566
12567 OP *
12568 Perl_parse_stmtseq(pTHX_ U32 flags)
12569 {
12570     OP *stmtseqop;
12571     I32 c;
12572     if (flags)
12573         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12574     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12575     c = lex_peek_unichar(0);
12576     if (c != -1 && c != /*{*/'}')
12577         qerror(Perl_mess(aTHX_ "Parse error"));
12578     return stmtseqop;
12579 }
12580
12581 /*
12582  * ex: set ts=8 sts=4 sw=4 et:
12583  */