toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26 This is the lower layer of the Perl parser, managing characters and tokens.
  27
  28 =for apidoc AmU|yy_parser *|PL_parser
  29
  30 Pointer to a structure encapsulating the state of the parsing operation
  31 currently in progress.  The pointer can be locally changed to perform
  32 a nested parse without interfering with the state of an outer parse.
  33 Individual members of C<PL_parser> have their own documentation.
  34
  35 =cut
  36 */
  37
  38 #include "EXTERN.h"
  39 #define PERL_IN_TOKE_C
  40 #include "perl.h"
  41 #include "dquote_inline.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g)     \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* XXX temporary backwards compatibility */
  49 #define PL_lex_brackets         (PL_parser->lex_brackets)
  50 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  51 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  52 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  53 #define PL_lex_casemods         (PL_parser->lex_casemods)
  54 #define PL_lex_casestack        (PL_parser->lex_casestack)
  55 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  56 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  57 #define PL_lex_inpat            (PL_parser->lex_inpat)
  58 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  59 #define PL_lex_op               (PL_parser->lex_op)
  60 #define PL_lex_repl             (PL_parser->lex_repl)
  61 #define PL_lex_starts           (PL_parser->lex_starts)
  62 #define PL_lex_stuff            (PL_parser->lex_stuff)
  63 #define PL_multi_start          (PL_parser->multi_start)
  64 #define PL_multi_open           (PL_parser->multi_open)
  65 #define PL_multi_close          (PL_parser->multi_close)
  66 #define PL_preambled            (PL_parser->preambled)
  67 #define PL_linestr              (PL_parser->linestr)
  68 #define PL_expect               (PL_parser->expect)
  69 #define PL_copline              (PL_parser->copline)
  70 #define PL_bufptr               (PL_parser->bufptr)
  71 #define PL_oldbufptr            (PL_parser->oldbufptr)
  72 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  73 #define PL_linestart            (PL_parser->linestart)
  74 #define PL_bufend               (PL_parser->bufend)
  75 #define PL_last_uni             (PL_parser->last_uni)
  76 #define PL_last_lop             (PL_parser->last_lop)
  77 #define PL_last_lop_op          (PL_parser->last_lop_op)
  78 #define PL_lex_state            (PL_parser->lex_state)
  79 #define PL_rsfp                 (PL_parser->rsfp)
  80 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  81 #define PL_in_my                (PL_parser->in_my)
  82 #define PL_in_my_stash          (PL_parser->in_my_stash)
  83 #define PL_tokenbuf             (PL_parser->tokenbuf)
  84 #define PL_multi_end            (PL_parser->multi_end)
  85 #define PL_error_count          (PL_parser->error_count)
  86
  87 #  define PL_nexttoke           (PL_parser->nexttoke)
  88 #  define PL_nexttype           (PL_parser->nexttype)
  89 #  define PL_nextval            (PL_parser->nextval)
  90
  91
  92 #define SvEVALED(sv) \
  93     (SvTYPE(sv) >= SVt_PVNV \
  94     && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
  95
  96 static const char* const ident_too_long = "Identifier too long";
  97
  98 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
  99
 100 #define XENUMMASK  0x3f
 101 #define XFAKEEOF   0x40
 102 #define XFAKEBRACK 0x80
 103
 104 #ifdef USE_UTF8_SCRIPTS
 105 #   define UTF cBOOL(!IN_BYTES)
 106 #else
 107 #   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 108 #endif
 109
 110 /* The maximum number of characters preceding the unrecognized one to display */
 111 #define UNRECOGNIZED_PRECEDE_COUNT 10
 112
 113 /* In variables named $^X, these are the legal values for X.
 114  * 1999-02-27 mjd-perl-patch@plover.com */
 115 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 116
 117 #define SPACE_OR_TAB(c) isBLANK_A(c)
 118
 119 #define HEXFP_PEEK(s)     \
 120     (((s[0] == '.') && \
 121       (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
 122      isALPHA_FOLD_EQ(s[0], 'p'))
 123
 124 /* LEX_* are values for PL_lex_state, the state of the lexer.
 125  * They are arranged oddly so that the guard on the switch statement
 126  * can get by with a single comparison (if the compiler is smart enough).
 127  *
 128  * These values refer to the various states within a sublex parse,
 129  * i.e. within a double quotish string
 130  */
 131
 132 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 133
 134 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 135 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 136 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 137 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 138 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 139
 140                                    /* at end of code, eg "$x" followed by:  */
 141 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 142 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 143
 144 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 145                                         string or after \E, $foo, etc       */
 146 #define LEX_INTERPCONST          2 /* NOT USED */
 147 #define LEX_FORMLINE             1 /* expecting a format line               */
 148
 149
 150 #ifdef DEBUGGING
 151 static const char* const lex_state_names[] = {
 152     "KNOWNEXT",
 153     "FORMLINE",
 154     "INTERPCONST",
 155     "INTERPCONCAT",
 156     "INTERPENDMAYBE",
 157     "INTERPEND",
 158     "INTERPSTART",
 159     "INTERPPUSH",
 160     "INTERPCASEMOD",
 161     "INTERPNORMAL",
 162     "NORMAL"
 163 };
 164 #endif
 165
 166 #include "keywords.h"
 167
 168 /* CLINE is a macro that ensures PL_copline has a sane value */
 169
 170 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 171
 172 /*
 173  * Convenience functions to return different tokens and prime the
 174  * lexer for the next token.  They all take an argument.
 175  *
 176  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 177  * OPERATOR     : generic operator
 178  * AOPERATOR    : assignment operator
 179  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 180  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 181  * PREREF       : *EXPR where EXPR is not a simple identifier
 182  * TERM         : expression term
 183  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 184  * LOOPX        : loop exiting command (goto, last, dump, etc)
 185  * FTST         : file test operator
 186  * FUN0         : zero-argument function
 187  * FUN0OP       : zero-argument function, with its op created in this file
 188  * FUN1         : not used, except for not, which isn't a UNIOP
 189  * BOop         : bitwise or or xor
 190  * BAop         : bitwise and
 191  * BCop         : bitwise complement
 192  * SHop         : shift operator
 193  * PWop         : power operator
 194  * PMop         : pattern-matching operator
 195  * Aop          : addition-level operator
 196  * AopNOASSIGN  : addition-level operator that is never part of .=
 197  * Mop          : multiplication-level operator
 198  * Eop          : equality-testing operator
 199  * Rop          : relational operator <= != gt
 200  *
 201  * Also see LOP and lop() below.
 202  */
 203
 204 #ifdef DEBUGGING /* Serve -DT. */
 205 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 206 #else
 207 #   define REPORT(retval) (retval)
 208 #endif
 209
 210 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 211 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 212 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
 213 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 214 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 215 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 216 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 217 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 218 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
 219                          pl_yylval.ival=f, \
 220                          PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
 221                          REPORT((int)LOOPEX))
 222 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 223 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 224 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 225 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 226 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
 227 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
 228 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
 229                        REPORT('~')
 230 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
 231 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
 232 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 233 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
 234 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
 235 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
 236 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 237 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 238
 239 /* This bit of chicanery makes a unary function followed by
 240  * a parenthesis into a function with one argument, highest precedence.
 241  * The UNIDOR macro is for unary functions that can be followed by the //
 242  * operator (such as C<shift // 0>).
 243  */
 244 #define UNI3(f,x,have_x) { \
 245         pl_yylval.ival = f; \
 246         if (have_x) PL_expect = x; \
 247         PL_bufptr = s; \
 248         PL_last_uni = PL_oldbufptr; \
 249         PL_last_lop_op = (f) < 0 ? -(f) : (f); \
 250         if (*s == '(') \
 251             return REPORT( (int)FUNC1 ); \
 252         s = skipspace(s); \
 253         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 254         }
 255 #define UNI(f)    UNI3(f,XTERM,1)
 256 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 257 #define UNIPROTO(f,optional) { \
 258         if (optional) PL_last_uni = PL_oldbufptr; \
 259         OPERATOR(f); \
 260         }
 261
 262 #define UNIBRACK(f) UNI3(f,0,0)
 263
 264 /* grandfather return to old style */
 265 #define OLDLOP(f) \
 266         do { \
 267             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 268                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 269             pl_yylval.ival = (f); \
 270             PL_expect = XTERM; \
 271             PL_bufptr = s; \
 272             return (int)LSTOP; \
 273         } while(0)
 274
 275 #define COPLINE_INC_WITH_HERELINES                  \
 276     STMT_START {                                     \
 277         CopLINE_inc(PL_curcop);                       \
 278         if (PL_parser->herelines)                      \
 279             CopLINE(PL_curcop) += PL_parser->herelines, \
 280             PL_parser->herelines = 0;                    \
 281     } STMT_END
 282 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 283  * is no sublex_push to follow. */
 284 #define COPLINE_SET_FROM_MULTI_END            \
 285     STMT_START {                               \
 286         CopLINE_set(PL_curcop, PL_multi_end);   \
 287         if (PL_multi_end != PL_multi_start)      \
 288             PL_parser->herelines = 0;             \
 289     } STMT_END
 290
 291
 292 #ifdef DEBUGGING
 293
 294 /* how to interpret the pl_yylval associated with the token */
 295 enum token_type {
 296     TOKENTYPE_NONE,
 297     TOKENTYPE_IVAL,
 298     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 299     TOKENTYPE_PVAL,
 300     TOKENTYPE_OPVAL
 301 };
 302
 303 static struct debug_tokens {
 304     const int token;
 305     enum token_type type;
 306     const char *name;
 307 } const debug_tokens[] =
 308 {
 309     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 310     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 311     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 312     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 313     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 314     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 315     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 316     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 317     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 318     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 319     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 320     { DO,               TOKENTYPE_NONE,         "DO" },
 321     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 322     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 323     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 324     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 325     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 326     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 327     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 328     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 329     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 330     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 331     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 332     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 333     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 334     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 335     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 336     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 337     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 338     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 339     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 340     { IF,               TOKENTYPE_IVAL,         "IF" },
 341     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 342     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 343     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 344     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 345     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 346     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 347     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 348     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 349     { MY,               TOKENTYPE_IVAL,         "MY" },
 350     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 351     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 352     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 353     { OROR,             TOKENTYPE_NONE,         "OROR" },
 354     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 355     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 356     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 357     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 358     { POSTJOIN,         TOKENTYPE_NONE,         "POSTJOIN" },
 359     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 360     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 361     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 362     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 363     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 364     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 365     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 366     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 367     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 368     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 369     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 370     { SUB,              TOKENTYPE_NONE,         "SUB" },
 371     { THING,            TOKENTYPE_OPVAL,        "THING" },
 372     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 373     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 374     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 375     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 376     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 377     { USE,              TOKENTYPE_IVAL,         "USE" },
 378     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 379     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 380     { BAREWORD,         TOKENTYPE_OPVAL,        "BAREWORD" },
 381     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 382     { 0,                TOKENTYPE_NONE,         NULL }
 383 };
 384
 385 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 386
 387 STATIC int
 388 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 389 {
 390     PERL_ARGS_ASSERT_TOKEREPORT;
 391
 392     if (DEBUG_T_TEST) {
 393         const char *name = NULL;
 394         enum token_type type = TOKENTYPE_NONE;
 395         const struct debug_tokens *p;
 396         SV* const report = newSVpvs("<== ");
 397
 398         for (p = debug_tokens; p->token; p++) {
 399             if (p->token == (int)rv) {
 400                 name = p->name;
 401                 type = p->type;
 402                 break;
 403             }
 404         }
 405         if (name)
 406             Perl_sv_catpv(aTHX_ report, name);
 407         else if (isGRAPH(rv))
 408         {
 409             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 410             if ((char)rv == 'p')
 411                 sv_catpvs(report, " (pending identifier)");
 412         }
 413         else if (!rv)
 414             sv_catpvs(report, "EOF");
 415         else
 416             Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
 417         switch (type) {
 418         case TOKENTYPE_NONE:
 419             break;
 420         case TOKENTYPE_IVAL:
 421             Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
 422             break;
 423         case TOKENTYPE_OPNUM:
 424             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 425                                     PL_op_name[lvalp->ival]);
 426             break;
 427         case TOKENTYPE_PVAL:
 428             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 429             break;
 430         case TOKENTYPE_OPVAL:
 431             if (lvalp->opval) {
 432                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 433                                     PL_op_name[lvalp->opval->op_type]);
 434                 if (lvalp->opval->op_type == OP_CONST) {
 435                     Perl_sv_catpvf(aTHX_ report, " %s",
 436                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 437                 }
 438
 439             }
 440             else
 441                 sv_catpvs(report, "(opval=null)");
 442             break;
 443         }
 444         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 445     };
 446     return (int)rv;
 447 }
 448
 449
 450 /* print the buffer with suitable escapes */
 451
 452 STATIC void
 453 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 454 {
 455     SV* const tmp = newSVpvs("");
 456
 457     PERL_ARGS_ASSERT_PRINTBUF;
 458
 459     GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
 460     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 461     GCC_DIAG_RESTORE;
 462     SvREFCNT_dec(tmp);
 463 }
 464
 465 #endif
 466
 467 /*
 468  * S_ao
 469  *
 470  * This subroutine looks for an '=' next to the operator that has just been
 471  * parsed and turns it into an ASSIGNOP if it finds one.
 472  */
 473
 474 STATIC int
 475 S_ao(pTHX_ int toketype)
 476 {
 477     if (*PL_bufptr == '=') {
 478         PL_bufptr++;
 479         if (toketype == ANDAND)
 480             pl_yylval.ival = OP_ANDASSIGN;
 481         else if (toketype == OROR)
 482             pl_yylval.ival = OP_ORASSIGN;
 483         else if (toketype == DORDOR)
 484             pl_yylval.ival = OP_DORASSIGN;
 485         toketype = ASSIGNOP;
 486     }
 487     return REPORT(toketype);
 488 }
 489
 490 /*
 491  * S_no_op
 492  * When Perl expects an operator and finds something else, no_op
 493  * prints the warning.  It always prints "<something> found where
 494  * operator expected.  It prints "Missing semicolon on previous line?"
 495  * if the surprise occurs at the start of the line.  "do you need to
 496  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 497  * where the compiler doesn't know if foo is a method call or a function.
 498  * It prints "Missing operator before end of line" if there's nothing
 499  * after the missing operator, or "... before <...>" if there is something
 500  * after the missing operator.
 501  *
 502  * PL_bufptr is expected to point to the start of the thing that was found,
 503  * and s after the next token or partial token.
 504  */
 505
 506 STATIC void
 507 S_no_op(pTHX_ const char *const what, char *s)
 508 {
 509     char * const oldbp = PL_bufptr;
 510     const bool is_first = (PL_oldbufptr == PL_linestart);
 511
 512     PERL_ARGS_ASSERT_NO_OP;
 513
 514     if (!s)
 515         s = oldbp;
 516     else
 517         PL_bufptr = s;
 518     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 519     if (ckWARN_d(WARN_SYNTAX)) {
 520         if (is_first)
 521             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 522                     "\t(Missing semicolon on previous line?)\n");
 523         else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
 524                                                            PL_bufend,
 525                                                            UTF))
 526         {
 527             const char *t;
 528             for (t = PL_oldoldbufptr;
 529                  (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
 530                  t += UTF ? UTF8SKIP(t) : 1)
 531             {
 532                 NOOP;
 533             }
 534             if (t < PL_bufptr && isSPACE(*t))
 535                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 536                         "\t(Do you need to predeclare %" UTF8f "?)\n",
 537                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 538         }
 539         else {
 540             assert(s >= oldbp);
 541             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 542                     "\t(Missing operator before %" UTF8f "?)\n",
 543                      UTF8fARG(UTF, s - oldbp, oldbp));
 544         }
 545     }
 546     PL_bufptr = oldbp;
 547 }
 548
 549 /*
 550  * S_missingterm
 551  * Complain about missing quote/regexp/heredoc terminator.
 552  * If it's called with NULL then it cauterizes the line buffer.
 553  * If we're in a delimited string and the delimiter is a control
 554  * character, it's reformatted into a two-char sequence like ^C.
 555  * This is fatal.
 556  */
 557
 558 STATIC void
 559 S_missingterm(pTHX_ char *s, STRLEN len)
 560 {
 561     char tmpbuf[UTF8_MAXBYTES + 1];
 562     char q;
 563     bool uni = FALSE;
 564     SV *sv;
 565     if (s) {
 566         char * const nl = (char *) my_memrchr(s, '\n', len);
 567         if (nl) {
 568             *nl = '\0';
 569             len = nl - s;
 570         }
 571         uni = UTF;
 572     }
 573     else if (PL_multi_close < 32) {
 574         *tmpbuf = '^';
 575         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 576         tmpbuf[2] = '\0';
 577         s = tmpbuf;
 578         len = 2;
 579     }
 580     else {
 581         if (LIKELY(PL_multi_close < 256)) {
 582             *tmpbuf = (char)PL_multi_close;
 583             tmpbuf[1] = '\0';
 584             len = 1;
 585         }
 586         else {
 587             char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
 588             *end = '\0';
 589             len = end - tmpbuf;
 590             uni = TRUE;
 591         }
 592         s = tmpbuf;
 593     }
 594     q = memchr(s, '"', len) ? '\'' : '"';
 595     sv = sv_2mortal(newSVpvn(s, len));
 596     if (uni)
 597         SvUTF8_on(sv);
 598     Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c"
 599                      " anywhere before EOF", q, SVfARG(sv), q);
 600 }
 601
 602 #include "feature.h"
 603
 604 /*
 605  * Check whether the named feature is enabled.
 606  */
 607 bool
 608 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 609 {
 610     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 611
 612     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 613
 614     assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
 615
 616     if (namelen > MAX_FEATURE_LEN)
 617         return FALSE;
 618     memcpy(&he_name[8], name, namelen);
 619
 620     return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
 621                                      REFCOUNTED_HE_EXISTS));
 622 }
 623
 624 /*
 625  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 626  * utf16-to-utf8-reversed.
 627  */
 628
 629 #ifdef PERL_CR_FILTER
 630 static void
 631 strip_return(SV *sv)
 632 {
 633     const char *s = SvPVX_const(sv);
 634     const char * const e = s + SvCUR(sv);
 635
 636     PERL_ARGS_ASSERT_STRIP_RETURN;
 637
 638     /* outer loop optimized to do nothing if there are no CR-LFs */
 639     while (s < e) {
 640         if (*s++ == '\r' && *s == '\n') {
 641             /* hit a CR-LF, need to copy the rest */
 642             char *d = s - 1;
 643             *d++ = *s++;
 644             while (s < e) {
 645                 if (*s == '\r' && s[1] == '\n')
 646                     s++;
 647                 *d++ = *s++;
 648             }
 649             SvCUR(sv) -= s - d;
 650             return;
 651         }
 652     }
 653 }
 654
 655 STATIC I32
 656 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 657 {
 658     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 659     if (count > 0 && !maxlen)
 660         strip_return(sv);
 661     return count;
 662 }
 663 #endif
 664
 665 /*
 666 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
 667
 668 Creates and initialises a new lexer/parser state object, supplying
 669 a context in which to lex and parse from a new source of Perl code.
 670 A pointer to the new state object is placed in L</PL_parser>.  An entry
 671 is made on the save stack so that upon unwinding, the new state object
 672 will be destroyed and the former value of L</PL_parser> will be restored.
 673 Nothing else need be done to clean up the parsing context.
 674
 675 The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
 676 non-null, provides a string (in SV form) containing code to be parsed.
 677 A copy of the string is made, so subsequent modification of C<line>
 678 does not affect parsing.  C<rsfp>, if non-null, provides an input stream
 679 from which code will be read to be parsed.  If both are non-null, the
 680 code in C<line> comes first and must consist of complete lines of input,
 681 and C<rsfp> supplies the remainder of the source.
 682
 683 The C<flags> parameter is reserved for future use.  Currently it is only
 684 used by perl internally, so extensions should always pass zero.
 685
 686 =cut
 687 */
 688
 689 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 690    can share filters with the current parser.
 691    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 692    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 693    destruction. This is used to handle the case of defaulting to reading the
 694    script from the standard input because no filename was given on the command
 695    line (without getting confused by situation where STDIN has been closed, so
 696    the script handle is opened on fd 0)  */
 697
 698 void
 699 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 700 {
 701     const char *s = NULL;
 702     yy_parser *parser, *oparser;
 703
 704     if (flags && flags & ~LEX_START_FLAGS)
 705         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 706
 707     /* create and initialise a parser */
 708
 709     Newxz(parser, 1, yy_parser);
 710     parser->old_parser = oparser = PL_parser;
 711     PL_parser = parser;
 712
 713     parser->stack = NULL;
 714     parser->stack_max1 = NULL;
 715     parser->ps = NULL;
 716
 717     /* on scope exit, free this parser and restore any outer one */
 718     SAVEPARSER(parser);
 719     parser->saved_curcop = PL_curcop;
 720
 721     /* initialise lexer state */
 722
 723     parser->nexttoke = 0;
 724     parser->error_count = oparser ? oparser->error_count : 0;
 725     parser->copline = parser->preambling = NOLINE;
 726     parser->lex_state = LEX_NORMAL;
 727     parser->expect = XSTATE;
 728     parser->rsfp = rsfp;
 729     parser->recheck_utf8_validity = FALSE;
 730     parser->rsfp_filters =
 731       !(flags & LEX_START_SAME_FILTER) || !oparser
 732         ? NULL
 733         : MUTABLE_AV(SvREFCNT_inc(
 734             oparser->rsfp_filters
 735              ? oparser->rsfp_filters
 736              : (oparser->rsfp_filters = newAV())
 737           ));
 738
 739     Newx(parser->lex_brackstack, 120, char);
 740     Newx(parser->lex_casestack, 12, char);
 741     *parser->lex_casestack = '\0';
 742     Newxz(parser->lex_shared, 1, LEXSHARED);
 743
 744     if (line) {
 745         STRLEN len;
 746         const U8* first_bad_char_loc;
 747
 748         s = SvPV_const(line, len);
 749
 750         if (   SvUTF8(line)
 751             && UNLIKELY(! is_utf8_string_loc((U8 *) s,
 752                                              SvCUR(line),
 753                                              &first_bad_char_loc)))
 754         {
 755             _force_out_malformed_utf8_message(first_bad_char_loc,
 756                                               (U8 *) s + SvCUR(line),
 757                                               0,
 758                                               1 /* 1 means die */ );
 759             NOT_REACHED; /* NOTREACHED */
 760         }
 761
 762         parser->linestr = flags & LEX_START_COPIED
 763                             ? SvREFCNT_inc_simple_NN(line)
 764                             : newSVpvn_flags(s, len, SvUTF8(line));
 765         if (!rsfp)
 766             sv_catpvs(parser->linestr, "\n;");
 767     } else {
 768         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 769     }
 770
 771     parser->oldoldbufptr =
 772         parser->oldbufptr =
 773         parser->bufptr =
 774         parser->linestart = SvPVX(parser->linestr);
 775     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 776     parser->last_lop = parser->last_uni = NULL;
 777
 778     STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 779                                                         |LEX_DONT_CLOSE_RSFP));
 780     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 781                                                         |LEX_DONT_CLOSE_RSFP));
 782
 783     parser->in_pod = parser->filtered = 0;
 784 }
 785
 786
 787 /* delete a parser object */
 788
 789 void
 790 Perl_parser_free(pTHX_  const yy_parser *parser)
 791 {
 792     PERL_ARGS_ASSERT_PARSER_FREE;
 793
 794     PL_curcop = parser->saved_curcop;
 795     SvREFCNT_dec(parser->linestr);
 796
 797     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 798         PerlIO_clearerr(parser->rsfp);
 799     else if (parser->rsfp && (!parser->old_parser
 800           || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 801         PerlIO_close(parser->rsfp);
 802     SvREFCNT_dec(parser->rsfp_filters);
 803     SvREFCNT_dec(parser->lex_stuff);
 804     SvREFCNT_dec(parser->lex_sub_repl);
 805
 806     Safefree(parser->lex_brackstack);
 807     Safefree(parser->lex_casestack);
 808     Safefree(parser->lex_shared);
 809     PL_parser = parser->old_parser;
 810     Safefree(parser);
 811 }
 812
 813 void
 814 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 815 {
 816     I32 nexttoke = parser->nexttoke;
 817     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 818     while (nexttoke--) {
 819         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 820          && parser->nextval[nexttoke].opval
 821          && parser->nextval[nexttoke].opval->op_slabbed
 822          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 823             op_free(parser->nextval[nexttoke].opval);
 824             parser->nextval[nexttoke].opval = NULL;
 825         }
 826     }
 827 }
 828
 829
 830 /*
 831 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 832
 833 Buffer scalar containing the chunk currently under consideration of the
 834 text currently being lexed.  This is always a plain string scalar (for
 835 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 836 normal scalar means; instead refer to the buffer directly by the pointer
 837 variables described below.
 838
 839 The lexer maintains various C<char*> pointers to things in the
 840 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 841 reallocated, all of these pointers must be updated.  Don't attempt to
 842 do this manually, but rather use L</lex_grow_linestr> if you need to
 843 reallocate the buffer.
 844
 845 The content of the text chunk in the buffer is commonly exactly one
 846 complete line of input, up to and including a newline terminator,
 847 but there are situations where it is otherwise.  The octets of the
 848 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 849 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 850 flag on this scalar, which may disagree with it.
 851
 852 For direct examination of the buffer, the variable
 853 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 854 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 855 of these pointers is usually preferable to examination of the scalar
 856 through normal scalar means.
 857
 858 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 859
 860 Direct pointer to the end of the chunk of text currently being lexed, the
 861 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 862 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
 863 always located at the end of the buffer, and does not count as part of
 864 the buffer's contents.
 865
 866 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 867
 868 Points to the current position of lexing inside the lexer buffer.
 869 Characters around this point may be freely examined, within
 870 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 871 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 872 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 873
 874 Lexing code (whether in the Perl core or not) moves this pointer past
 875 the characters that it consumes.  It is also expected to perform some
 876 bookkeeping whenever a newline character is consumed.  This movement
 877 can be more conveniently performed by the function L</lex_read_to>,
 878 which handles newlines appropriately.
 879
 880 Interpretation of the buffer's octets can be abstracted out by
 881 using the slightly higher-level functions L</lex_peek_unichar> and
 882 L</lex_read_unichar>.
 883
 884 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 885
 886 Points to the start of the current line inside the lexer buffer.
 887 This is useful for indicating at which column an error occurred, and
 888 not much else.  This must be updated by any lexing code that consumes
 889 a newline; the function L</lex_read_to> handles this detail.
 890
 891 =cut
 892 */
 893
 894 /*
 895 =for apidoc Amx|bool|lex_bufutf8
 896
 897 Indicates whether the octets in the lexer buffer
 898 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 899 of Unicode characters.  If not, they should be interpreted as Latin-1
 900 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 901
 902 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 903 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 904 encoding.
 905
 906 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 907 is significant, but not the whole story regarding the input character
 908 encoding.  Normally, when a file is being read, the scalar contains octets
 909 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 910 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 911 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 912 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 913 is in effect.  This logic may change in the future; use this function
 914 instead of implementing the logic yourself.
 915
 916 =cut
 917 */
 918
 919 bool
 920 Perl_lex_bufutf8(pTHX)
 921 {
 922     return UTF;
 923 }
 924
 925 /*
 926 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 927
 928 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 929 at least C<len> octets (including terminating C<NUL>).  Returns a
 930 pointer to the reallocated buffer.  This is necessary before making
 931 any direct modification of the buffer that would increase its length.
 932 L</lex_stuff_pvn> provides a more convenient way to insert text into
 933 the buffer.
 934
 935 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 936 this function updates all of the lexer's variables that point directly
 937 into the buffer.
 938
 939 =cut
 940 */
 941
 942 char *
 943 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 944 {
 945     SV *linestr;
 946     char *buf;
 947     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 948     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 949     bool current;
 950
 951     linestr = PL_parser->linestr;
 952     buf = SvPVX(linestr);
 953     if (len <= SvLEN(linestr))
 954         return buf;
 955
 956     /* Is the lex_shared linestr SV the same as the current linestr SV?
 957      * Only in this case does re_eval_start need adjusting, since it
 958      * points within lex_shared->ls_linestr's buffer */
 959     current = (   !PL_parser->lex_shared->ls_linestr
 960                || linestr == PL_parser->lex_shared->ls_linestr);
 961
 962     bufend_pos = PL_parser->bufend - buf;
 963     bufptr_pos = PL_parser->bufptr - buf;
 964     oldbufptr_pos = PL_parser->oldbufptr - buf;
 965     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 966     linestart_pos = PL_parser->linestart - buf;
 967     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 968     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 969     re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
 970                             PL_parser->lex_shared->re_eval_start - buf : 0;
 971
 972     buf = sv_grow(linestr, len);
 973
 974     PL_parser->bufend = buf + bufend_pos;
 975     PL_parser->bufptr = buf + bufptr_pos;
 976     PL_parser->oldbufptr = buf + oldbufptr_pos;
 977     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 978     PL_parser->linestart = buf + linestart_pos;
 979     if (PL_parser->last_uni)
 980         PL_parser->last_uni = buf + last_uni_pos;
 981     if (PL_parser->last_lop)
 982         PL_parser->last_lop = buf + last_lop_pos;
 983     if (current && PL_parser->lex_shared->re_eval_start)
 984         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 985     return buf;
 986 }
 987
 988 /*
 989 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 990
 991 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 992 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 993 reallocating the buffer if necessary.  This means that lexing code that
 994 runs later will see the characters as if they had appeared in the input.
 995 It is not recommended to do this as part of normal parsing, and most
 996 uses of this facility run the risk of the inserted characters being
 997 interpreted in an unintended manner.
 998
 999 The string to be inserted is represented by C<len> octets starting
1000 at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
1001 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1002 The characters are recoded for the lexer buffer, according to how the
1003 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
1004 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1005 function is more convenient.
1006
1007 =cut
1008 */
1009
1010 void
1011 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1012 {
1013     dVAR;
1014     char *bufptr;
1015     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1016     if (flags & ~(LEX_STUFF_UTF8))
1017         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1018     if (UTF) {
1019         if (flags & LEX_STUFF_UTF8) {
1020             goto plain_copy;
1021         } else {
1022             STRLEN highhalf = 0;    /* Count of variants */
1023             const char *p, *e = pv+len;
1024             for (p = pv; p != e; p++) {
1025                 if (! UTF8_IS_INVARIANT(*p)) {
1026                     highhalf++;
1027                 }
1028             }
1029             if (!highhalf)
1030                 goto plain_copy;
1031             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1032             bufptr = PL_parser->bufptr;
1033             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1034             SvCUR_set(PL_parser->linestr,
1035                 SvCUR(PL_parser->linestr) + len+highhalf);
1036             PL_parser->bufend += len+highhalf;
1037             for (p = pv; p != e; p++) {
1038                 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1039             }
1040         }
1041     } else {
1042         if (flags & LEX_STUFF_UTF8) {
1043             STRLEN highhalf = 0;
1044             const char *p, *e = pv+len;
1045             for (p = pv; p != e; p++) {
1046                 U8 c = (U8)*p;
1047                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1048                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1049                                 "non-Latin-1 character into Latin-1 input");
1050                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1051                     p++;
1052                     highhalf++;
1053                 } else assert(UTF8_IS_INVARIANT(c));
1054             }
1055             if (!highhalf)
1056                 goto plain_copy;
1057             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1058             bufptr = PL_parser->bufptr;
1059             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1060             SvCUR_set(PL_parser->linestr,
1061                 SvCUR(PL_parser->linestr) + len-highhalf);
1062             PL_parser->bufend += len-highhalf;
1063             p = pv;
1064             while (p < e) {
1065                 if (UTF8_IS_INVARIANT(*p)) {
1066                     *bufptr++ = *p;
1067                     p++;
1068                 }
1069                 else {
1070                     assert(p < e -1 );
1071                     *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1072                     p += 2;
1073                 }
1074             }
1075         } else {
1076           plain_copy:
1077             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1078             bufptr = PL_parser->bufptr;
1079             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1080             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1081             PL_parser->bufend += len;
1082             Copy(pv, bufptr, len, char);
1083         }
1084     }
1085 }
1086
1087 /*
1088 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1089
1090 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1091 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1092 reallocating the buffer if necessary.  This means that lexing code that
1093 runs later will see the characters as if they had appeared in the input.
1094 It is not recommended to do this as part of normal parsing, and most
1095 uses of this facility run the risk of the inserted characters being
1096 interpreted in an unintended manner.
1097
1098 The string to be inserted is represented by octets starting at C<pv>
1099 and continuing to the first nul.  These octets are interpreted as either
1100 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1101 in C<flags>.  The characters are recoded for the lexer buffer, according
1102 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1103 If it is not convenient to nul-terminate a string to be inserted, the
1104 L</lex_stuff_pvn> function is more appropriate.
1105
1106 =cut
1107 */
1108
1109 void
1110 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1111 {
1112     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1113     lex_stuff_pvn(pv, strlen(pv), flags);
1114 }
1115
1116 /*
1117 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1118
1119 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1120 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1121 reallocating the buffer if necessary.  This means that lexing code that
1122 runs later will see the characters as if they had appeared in the input.
1123 It is not recommended to do this as part of normal parsing, and most
1124 uses of this facility run the risk of the inserted characters being
1125 interpreted in an unintended manner.
1126
1127 The string to be inserted is the string value of C<sv>.  The characters
1128 are recoded for the lexer buffer, according to how the buffer is currently
1129 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1130 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1131 need to construct a scalar.
1132
1133 =cut
1134 */
1135
1136 void
1137 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1138 {
1139     char *pv;
1140     STRLEN len;
1141     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1142     if (flags)
1143         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1144     pv = SvPV(sv, len);
1145     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1146 }
1147
1148 /*
1149 =for apidoc Amx|void|lex_unstuff|char *ptr
1150
1151 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1152 C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
1153 This hides the discarded text from any lexing code that runs later,
1154 as if the text had never appeared.
1155
1156 This is not the normal way to consume lexed text.  For that, use
1157 L</lex_read_to>.
1158
1159 =cut
1160 */
1161
1162 void
1163 Perl_lex_unstuff(pTHX_ char *ptr)
1164 {
1165     char *buf, *bufend;
1166     STRLEN unstuff_len;
1167     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1168     buf = PL_parser->bufptr;
1169     if (ptr < buf)
1170         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1171     if (ptr == buf)
1172         return;
1173     bufend = PL_parser->bufend;
1174     if (ptr > bufend)
1175         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1176     unstuff_len = ptr - buf;
1177     Move(ptr, buf, bufend+1-ptr, char);
1178     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1179     PL_parser->bufend = bufend - unstuff_len;
1180 }
1181
1182 /*
1183 =for apidoc Amx|void|lex_read_to|char *ptr
1184
1185 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1186 to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1187 performing the correct bookkeeping whenever a newline character is passed.
1188 This is the normal way to consume lexed text.
1189
1190 Interpretation of the buffer's octets can be abstracted out by
1191 using the slightly higher-level functions L</lex_peek_unichar> and
1192 L</lex_read_unichar>.
1193
1194 =cut
1195 */
1196
1197 void
1198 Perl_lex_read_to(pTHX_ char *ptr)
1199 {
1200     char *s;
1201     PERL_ARGS_ASSERT_LEX_READ_TO;
1202     s = PL_parser->bufptr;
1203     if (ptr < s || ptr > PL_parser->bufend)
1204         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1205     for (; s != ptr; s++)
1206         if (*s == '\n') {
1207             COPLINE_INC_WITH_HERELINES;
1208             PL_parser->linestart = s+1;
1209         }
1210     PL_parser->bufptr = ptr;
1211 }
1212
1213 /*
1214 =for apidoc Amx|void|lex_discard_to|char *ptr
1215
1216 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1217 up to C<ptr>.  The remaining content of the buffer will be moved, and
1218 all pointers into the buffer updated appropriately.  C<ptr> must not
1219 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1220 it is not permitted to discard text that has yet to be lexed.
1221
1222 Normally it is not necessarily to do this directly, because it suffices to
1223 use the implicit discarding behaviour of L</lex_next_chunk> and things
1224 based on it.  However, if a token stretches across multiple lines,
1225 and the lexing code has kept multiple lines of text in the buffer for
1226 that purpose, then after completion of the token it would be wise to
1227 explicitly discard the now-unneeded earlier lines, to avoid future
1228 multi-line tokens growing the buffer without bound.
1229
1230 =cut
1231 */
1232
1233 void
1234 Perl_lex_discard_to(pTHX_ char *ptr)
1235 {
1236     char *buf;
1237     STRLEN discard_len;
1238     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1239     buf = SvPVX(PL_parser->linestr);
1240     if (ptr < buf)
1241         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1242     if (ptr == buf)
1243         return;
1244     if (ptr > PL_parser->bufptr)
1245         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1246     discard_len = ptr - buf;
1247     if (PL_parser->oldbufptr < ptr)
1248         PL_parser->oldbufptr = ptr;
1249     if (PL_parser->oldoldbufptr < ptr)
1250         PL_parser->oldoldbufptr = ptr;
1251     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1252         PL_parser->last_uni = NULL;
1253     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1254         PL_parser->last_lop = NULL;
1255     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1256     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1257     PL_parser->bufend -= discard_len;
1258     PL_parser->bufptr -= discard_len;
1259     PL_parser->oldbufptr -= discard_len;
1260     PL_parser->oldoldbufptr -= discard_len;
1261     if (PL_parser->last_uni)
1262         PL_parser->last_uni -= discard_len;
1263     if (PL_parser->last_lop)
1264         PL_parser->last_lop -= discard_len;
1265 }
1266
1267 void
1268 Perl_notify_parser_that_changed_to_utf8(pTHX)
1269 {
1270     /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1271      * off to on.  At compile time, this has the effect of entering a 'use
1272      * utf8' section.  This means that any input was not previously checked for
1273      * UTF-8 (because it was off), but now we do need to check it, or our
1274      * assumptions about the input being sane could be wrong, and we could
1275      * segfault.  This routine just sets a flag so that the next time we look
1276      * at the input we do the well-formed UTF-8 check.  If we aren't in the
1277      * proper phase, there may not be a parser object, but if there is, setting
1278      * the flag is harmless */
1279
1280     if (PL_parser) {
1281         PL_parser->recheck_utf8_validity = TRUE;
1282     }
1283 }
1284
1285 /*
1286 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1287
1288 Reads in the next chunk of text to be lexed, appending it to
1289 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1290 looked to the end of the current chunk and wants to know more.  It is
1291 usual, but not necessary, for lexing to have consumed the entirety of
1292 the current chunk at this time.
1293
1294 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1295 chunk (i.e., the current chunk has been entirely consumed), normally the
1296 current chunk will be discarded at the same time that the new chunk is
1297 read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1298 will not be discarded.  If the current chunk has not been entirely
1299 consumed, then it will not be discarded regardless of the flag.
1300
1301 Returns true if some new text was added to the buffer, or false if the
1302 buffer has reached the end of the input text.
1303
1304 =cut
1305 */
1306
1307 #define LEX_FAKE_EOF 0x80000000
1308 #define LEX_NO_TERM  0x40000000 /* here-doc */
1309
1310 bool
1311 Perl_lex_next_chunk(pTHX_ U32 flags)
1312 {
1313     SV *linestr;
1314     char *buf;
1315     STRLEN old_bufend_pos, new_bufend_pos;
1316     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1317     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1318     bool got_some_for_debugger = 0;
1319     bool got_some;
1320
1321     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1322         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1323     if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1324         return FALSE;
1325     linestr = PL_parser->linestr;
1326     buf = SvPVX(linestr);
1327     if (!(flags & LEX_KEEP_PREVIOUS)
1328           && PL_parser->bufptr == PL_parser->bufend)
1329     {
1330         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1331         linestart_pos = 0;
1332         if (PL_parser->last_uni != PL_parser->bufend)
1333             PL_parser->last_uni = NULL;
1334         if (PL_parser->last_lop != PL_parser->bufend)
1335             PL_parser->last_lop = NULL;
1336         last_uni_pos = last_lop_pos = 0;
1337         *buf = 0;
1338         SvCUR(linestr) = 0;
1339     } else {
1340         old_bufend_pos = PL_parser->bufend - buf;
1341         bufptr_pos = PL_parser->bufptr - buf;
1342         oldbufptr_pos = PL_parser->oldbufptr - buf;
1343         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1344         linestart_pos = PL_parser->linestart - buf;
1345         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1346         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1347     }
1348     if (flags & LEX_FAKE_EOF) {
1349         goto eof;
1350     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1351         got_some = 0;
1352     } else if (filter_gets(linestr, old_bufend_pos)) {
1353         got_some = 1;
1354         got_some_for_debugger = 1;
1355     } else if (flags & LEX_NO_TERM) {
1356         got_some = 0;
1357     } else {
1358         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1359             SvPVCLEAR(linestr);
1360         eof:
1361         /* End of real input.  Close filehandle (unless it was STDIN),
1362          * then add implicit termination.
1363          */
1364         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1365             PerlIO_clearerr(PL_parser->rsfp);
1366         else if (PL_parser->rsfp)
1367             (void)PerlIO_close(PL_parser->rsfp);
1368         PL_parser->rsfp = NULL;
1369         PL_parser->in_pod = PL_parser->filtered = 0;
1370         if (!PL_in_eval && PL_minus_p) {
1371             sv_catpvs(linestr,
1372                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1373             PL_minus_n = PL_minus_p = 0;
1374         } else if (!PL_in_eval && PL_minus_n) {
1375             sv_catpvs(linestr, /*{*/";}");
1376             PL_minus_n = 0;
1377         } else
1378             sv_catpvs(linestr, ";");
1379         got_some = 1;
1380     }
1381     buf = SvPVX(linestr);
1382     new_bufend_pos = SvCUR(linestr);
1383     PL_parser->bufend = buf + new_bufend_pos;
1384     PL_parser->bufptr = buf + bufptr_pos;
1385
1386     if (UTF) {
1387         const U8* first_bad_char_loc;
1388         if (UNLIKELY(! is_utf8_string_loc(
1389                             (U8 *) PL_parser->bufptr,
1390                                    PL_parser->bufend - PL_parser->bufptr,
1391                                    &first_bad_char_loc)))
1392         {
1393             _force_out_malformed_utf8_message(first_bad_char_loc,
1394                                               (U8 *) PL_parser->bufend,
1395                                               0,
1396                                               1 /* 1 means die */ );
1397             NOT_REACHED; /* NOTREACHED */
1398         }
1399     }
1400
1401     PL_parser->oldbufptr = buf + oldbufptr_pos;
1402     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1403     PL_parser->linestart = buf + linestart_pos;
1404     if (PL_parser->last_uni)
1405         PL_parser->last_uni = buf + last_uni_pos;
1406     if (PL_parser->last_lop)
1407         PL_parser->last_lop = buf + last_lop_pos;
1408     if (PL_parser->preambling != NOLINE) {
1409         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1410         PL_parser->preambling = NOLINE;
1411     }
1412     if (   got_some_for_debugger
1413         && PERLDB_LINE_OR_SAVESRC
1414         && PL_curstash != PL_debstash)
1415     {
1416         /* debugger active and we're not compiling the debugger code,
1417          * so store the line into the debugger's array of lines
1418          */
1419         update_debugger_info(NULL, buf+old_bufend_pos,
1420             new_bufend_pos-old_bufend_pos);
1421     }
1422     return got_some;
1423 }
1424
1425 /*
1426 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1427
1428 Looks ahead one (Unicode) character in the text currently being lexed.
1429 Returns the codepoint (unsigned integer value) of the next character,
1430 or -1 if lexing has reached the end of the input text.  To consume the
1431 peeked character, use L</lex_read_unichar>.
1432
1433 If the next character is in (or extends into) the next chunk of input
1434 text, the next chunk will be read in.  Normally the current chunk will be
1435 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1436 bit set, then the current chunk will not be discarded.
1437
1438 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1439 is encountered, an exception is generated.
1440
1441 =cut
1442 */
1443
1444 I32
1445 Perl_lex_peek_unichar(pTHX_ U32 flags)
1446 {
1447     dVAR;
1448     char *s, *bufend;
1449     if (flags & ~(LEX_KEEP_PREVIOUS))
1450         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1451     s = PL_parser->bufptr;
1452     bufend = PL_parser->bufend;
1453     if (UTF) {
1454         U8 head;
1455         I32 unichar;
1456         STRLEN len, retlen;
1457         if (s == bufend) {
1458             if (!lex_next_chunk(flags))
1459                 return -1;
1460             s = PL_parser->bufptr;
1461             bufend = PL_parser->bufend;
1462         }
1463         head = (U8)*s;
1464         if (UTF8_IS_INVARIANT(head))
1465             return head;
1466         if (UTF8_IS_START(head)) {
1467             len = UTF8SKIP(&head);
1468             while ((STRLEN)(bufend-s) < len) {
1469                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1470                     break;
1471                 s = PL_parser->bufptr;
1472                 bufend = PL_parser->bufend;
1473             }
1474         }
1475         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1476         if (retlen == (STRLEN)-1) {
1477             _force_out_malformed_utf8_message((U8 *) s,
1478                                               (U8 *) bufend,
1479                                               0,
1480                                               1 /* 1 means die */ );
1481             NOT_REACHED; /* NOTREACHED */
1482         }
1483         return unichar;
1484     } else {
1485         if (s == bufend) {
1486             if (!lex_next_chunk(flags))
1487                 return -1;
1488             s = PL_parser->bufptr;
1489         }
1490         return (U8)*s;
1491     }
1492 }
1493
1494 /*
1495 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1496
1497 Reads the next (Unicode) character in the text currently being lexed.
1498 Returns the codepoint (unsigned integer value) of the character read,
1499 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1500 if lexing has reached the end of the input text.  To non-destructively
1501 examine the next character, use L</lex_peek_unichar> instead.
1502
1503 If the next character is in (or extends into) the next chunk of input
1504 text, the next chunk will be read in.  Normally the current chunk will be
1505 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1506 bit set, then the current chunk will not be discarded.
1507
1508 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1509 is encountered, an exception is generated.
1510
1511 =cut
1512 */
1513
1514 I32
1515 Perl_lex_read_unichar(pTHX_ U32 flags)
1516 {
1517     I32 c;
1518     if (flags & ~(LEX_KEEP_PREVIOUS))
1519         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1520     c = lex_peek_unichar(flags);
1521     if (c != -1) {
1522         if (c == '\n')
1523             COPLINE_INC_WITH_HERELINES;
1524         if (UTF)
1525             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1526         else
1527             ++(PL_parser->bufptr);
1528     }
1529     return c;
1530 }
1531
1532 /*
1533 =for apidoc Amx|void|lex_read_space|U32 flags
1534
1535 Reads optional spaces, in Perl style, in the text currently being
1536 lexed.  The spaces may include ordinary whitespace characters and
1537 Perl-style comments.  C<#line> directives are processed if encountered.
1538 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1539 at a non-space character (or the end of the input text).
1540
1541 If spaces extend into the next chunk of input text, the next chunk will
1542 be read in.  Normally the current chunk will be discarded at the same
1543 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1544 chunk will not be discarded.
1545
1546 =cut
1547 */
1548
1549 #define LEX_NO_INCLINE    0x40000000
1550 #define LEX_NO_NEXT_CHUNK 0x80000000
1551
1552 void
1553 Perl_lex_read_space(pTHX_ U32 flags)
1554 {
1555     char *s, *bufend;
1556     const bool can_incline = !(flags & LEX_NO_INCLINE);
1557     bool need_incline = 0;
1558     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1559         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1560     s = PL_parser->bufptr;
1561     bufend = PL_parser->bufend;
1562     while (1) {
1563         char c = *s;
1564         if (c == '#') {
1565             do {
1566                 c = *++s;
1567             } while (!(c == '\n' || (c == 0 && s == bufend)));
1568         } else if (c == '\n') {
1569             s++;
1570             if (can_incline) {
1571                 PL_parser->linestart = s;
1572                 if (s == bufend)
1573                     need_incline = 1;
1574                 else
1575                     incline(s, bufend);
1576             }
1577         } else if (isSPACE(c)) {
1578             s++;
1579         } else if (c == 0 && s == bufend) {
1580             bool got_more;
1581             line_t l;
1582             if (flags & LEX_NO_NEXT_CHUNK)
1583                 break;
1584             PL_parser->bufptr = s;
1585             l = CopLINE(PL_curcop);
1586             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1587             got_more = lex_next_chunk(flags);
1588             CopLINE_set(PL_curcop, l);
1589             s = PL_parser->bufptr;
1590             bufend = PL_parser->bufend;
1591             if (!got_more)
1592                 break;
1593             if (can_incline && need_incline && PL_parser->rsfp) {
1594                 incline(s, bufend);
1595                 need_incline = 0;
1596             }
1597         } else if (!c) {
1598             s++;
1599         } else {
1600             break;
1601         }
1602     }
1603     PL_parser->bufptr = s;
1604 }
1605
1606 /*
1607
1608 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1609
1610 This function performs syntax checking on a prototype, C<proto>.
1611 If C<warn> is true, any illegal characters or mismatched brackets
1612 will trigger illegalproto warnings, declaring that they were
1613 detected in the prototype for C<name>.
1614
1615 The return value is C<true> if this is a valid prototype, and
1616 C<false> if it is not, regardless of whether C<warn> was C<true> or
1617 C<false>.
1618
1619 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1620
1621 =cut
1622
1623  */
1624
1625 bool
1626 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1627 {
1628     STRLEN len, origlen;
1629     char *p;
1630     bool bad_proto = FALSE;
1631     bool in_brackets = FALSE;
1632     bool after_slash = FALSE;
1633     char greedy_proto = ' ';
1634     bool proto_after_greedy_proto = FALSE;
1635     bool must_be_last = FALSE;
1636     bool underscore = FALSE;
1637     bool bad_proto_after_underscore = FALSE;
1638
1639     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1640
1641     if (!proto)
1642         return TRUE;
1643
1644     p = SvPV(proto, len);
1645     origlen = len;
1646     for (; len--; p++) {
1647         if (!isSPACE(*p)) {
1648             if (must_be_last)
1649                 proto_after_greedy_proto = TRUE;
1650             if (underscore) {
1651                 if (!strchr(";@%", *p))
1652                     bad_proto_after_underscore = TRUE;
1653                 underscore = FALSE;
1654             }
1655             if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1656                 bad_proto = TRUE;
1657             }
1658             else {
1659                 if (*p == '[')
1660                     in_brackets = TRUE;
1661                 else if (*p == ']')
1662                     in_brackets = FALSE;
1663                 else if ((*p == '@' || *p == '%')
1664                          && !after_slash
1665                          && !in_brackets )
1666                 {
1667                     must_be_last = TRUE;
1668                     greedy_proto = *p;
1669                 }
1670                 else if (*p == '_')
1671                     underscore = TRUE;
1672             }
1673             if (*p == '\\')
1674                 after_slash = TRUE;
1675             else
1676                 after_slash = FALSE;
1677         }
1678     }
1679
1680     if (warn) {
1681         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1682         p -= origlen;
1683         p = SvUTF8(proto)
1684             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1685                              origlen, UNI_DISPLAY_ISPRINT)
1686             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1687
1688         if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1689             SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1690             sv_catpvs(name2, "::");
1691             sv_catsv(name2, (SV *)name);
1692             name = name2;
1693         }
1694
1695         if (proto_after_greedy_proto)
1696             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1697                         "Prototype after '%c' for %" SVf " : %s",
1698                         greedy_proto, SVfARG(name), p);
1699         if (in_brackets)
1700             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1701                         "Missing ']' in prototype for %" SVf " : %s",
1702                         SVfARG(name), p);
1703         if (bad_proto)
1704             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1705                         "Illegal character in prototype for %" SVf " : %s",
1706                         SVfARG(name), p);
1707         if (bad_proto_after_underscore)
1708             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1709                         "Illegal character after '_' in prototype for %" SVf " : %s",
1710                         SVfARG(name), p);
1711     }
1712
1713     return (! (proto_after_greedy_proto || bad_proto) );
1714 }
1715
1716 /*
1717  * S_incline
1718  * This subroutine has nothing to do with tilting, whether at windmills
1719  * or pinball tables.  Its name is short for "increment line".  It
1720  * increments the current line number in CopLINE(PL_curcop) and checks
1721  * to see whether the line starts with a comment of the form
1722  *    # line 500 "foo.pm"
1723  * If so, it sets the current line number and file to the values in the comment.
1724  */
1725
1726 STATIC void
1727 S_incline(pTHX_ const char *s, const char *end)
1728 {
1729     const char *t;
1730     const char *n;
1731     const char *e;
1732     line_t line_num;
1733     UV uv;
1734
1735     PERL_ARGS_ASSERT_INCLINE;
1736
1737     assert(end >= s);
1738
1739     COPLINE_INC_WITH_HERELINES;
1740     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1741      && s+1 == PL_bufend && *s == ';') {
1742         /* fake newline in string eval */
1743         CopLINE_dec(PL_curcop);
1744         return;
1745     }
1746     if (*s++ != '#')
1747         return;
1748     while (SPACE_OR_TAB(*s))
1749         s++;
1750     if (memBEGINs(s, (STRLEN) (end - s), "line"))
1751         s += sizeof("line") - 1;
1752     else
1753         return;
1754     if (SPACE_OR_TAB(*s))
1755         s++;
1756     else
1757         return;
1758     while (SPACE_OR_TAB(*s))
1759         s++;
1760     if (!isDIGIT(*s))
1761         return;
1762
1763     n = s;
1764     while (isDIGIT(*s))
1765         s++;
1766     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1767         return;
1768     while (SPACE_OR_TAB(*s))
1769         s++;
1770     if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1771         s++;
1772         e = t + 1;
1773     }
1774     else {
1775         t = s;
1776         while (*t && !isSPACE(*t))
1777             t++;
1778         e = t;
1779     }
1780     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1781         e++;
1782     if (*e != '\n' && *e != '\0')
1783         return;         /* false alarm */
1784
1785     if (!grok_atoUV(n, &uv, &e))
1786         return;
1787     line_num = ((line_t)uv) - 1;
1788
1789     if (t - s > 0) {
1790         const STRLEN len = t - s;
1791
1792         if (!PL_rsfp && !PL_parser->filtered) {
1793             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1794              * to *{"::_<newfilename"} */
1795             /* However, the long form of evals is only turned on by the
1796                debugger - usually they're "(eval %lu)" */
1797             GV * const cfgv = CopFILEGV(PL_curcop);
1798             if (cfgv) {
1799                 char smallbuf[128];
1800                 STRLEN tmplen2 = len;
1801                 char *tmpbuf2;
1802                 GV *gv2;
1803
1804                 if (tmplen2 + 2 <= sizeof smallbuf)
1805                     tmpbuf2 = smallbuf;
1806                 else
1807                     Newx(tmpbuf2, tmplen2 + 2, char);
1808
1809                 tmpbuf2[0] = '_';
1810                 tmpbuf2[1] = '<';
1811
1812                 memcpy(tmpbuf2 + 2, s, tmplen2);
1813                 tmplen2 += 2;
1814
1815                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1816                 if (!isGV(gv2)) {
1817                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1818                     /* adjust ${"::_<newfilename"} to store the new file name */
1819                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1820                     /* The line number may differ. If that is the case,
1821                        alias the saved lines that are in the array.
1822                        Otherwise alias the whole array. */
1823                     if (CopLINE(PL_curcop) == line_num) {
1824                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1825                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1826                     }
1827                     else if (GvAV(cfgv)) {
1828                         AV * const av = GvAV(cfgv);
1829                         const I32 start = CopLINE(PL_curcop)+1;
1830                         I32 items = AvFILLp(av) - start;
1831                         if (items > 0) {
1832                             AV * const av2 = GvAVn(gv2);
1833                             SV **svp = AvARRAY(av) + start;
1834                             I32 l = (I32)line_num+1;
1835                             while (items--)
1836                                 av_store(av2, l++, SvREFCNT_inc(*svp++));
1837                         }
1838                     }
1839                 }
1840
1841                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1842             }
1843         }
1844         CopFILE_free(PL_curcop);
1845         CopFILE_setn(PL_curcop, s, len);
1846     }
1847     CopLINE_set(PL_curcop, line_num);
1848 }
1849
1850 STATIC void
1851 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1852 {
1853     AV *av = CopFILEAVx(PL_curcop);
1854     if (av) {
1855         SV * sv;
1856         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1857         else {
1858             sv = *av_fetch(av, 0, 1);
1859             SvUPGRADE(sv, SVt_PVMG);
1860         }
1861         if (!SvPOK(sv)) SvPVCLEAR(sv);
1862         if (orig_sv)
1863             sv_catsv(sv, orig_sv);
1864         else
1865             sv_catpvn(sv, buf, len);
1866         if (!SvIOK(sv)) {
1867             (void)SvIOK_on(sv);
1868             SvIV_set(sv, 0);
1869         }
1870         if (PL_parser->preambling == NOLINE)
1871             av_store(av, CopLINE(PL_curcop), sv);
1872     }
1873 }
1874
1875 /*
1876  * skipspace
1877  * Called to gobble the appropriate amount and type of whitespace.
1878  * Skips comments as well.
1879  * Returns the next character after the whitespace that is skipped.
1880  *
1881  * peekspace
1882  * Same thing, but look ahead without incrementing line numbers or
1883  * adjusting PL_linestart.
1884  */
1885
1886 #define skipspace(s) skipspace_flags(s, 0)
1887 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1888
1889 STATIC char *
1890 S_skipspace_flags(pTHX_ char *s, U32 flags)
1891 {
1892     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1893     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1894         while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1895             s++;
1896     } else {
1897         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1898         PL_bufptr = s;
1899         lex_read_space(flags | LEX_KEEP_PREVIOUS |
1900                 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1901                     LEX_NO_NEXT_CHUNK : 0));
1902         s = PL_bufptr;
1903         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1904         if (PL_linestart > PL_bufptr)
1905             PL_bufptr = PL_linestart;
1906         return s;
1907     }
1908     return s;
1909 }
1910
1911 /*
1912  * S_check_uni
1913  * Check the unary operators to ensure there's no ambiguity in how they're
1914  * used.  An ambiguous piece of code would be:
1915  *     rand + 5
1916  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1917  * the +5 is its argument.
1918  */
1919
1920 STATIC void
1921 S_check_uni(pTHX)
1922 {
1923     const char *s;
1924
1925     if (PL_oldoldbufptr != PL_last_uni)
1926         return;
1927     while (isSPACE(*PL_last_uni))
1928         PL_last_uni++;
1929     s = PL_last_uni;
1930     while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
1931         s += UTF ? UTF8SKIP(s) : 1;
1932     if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
1933         return;
1934
1935     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1936                      "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
1937                      UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1938 }
1939
1940 /*
1941  * LOP : macro to build a list operator.  Its behaviour has been replaced
1942  * with a subroutine, S_lop() for which LOP is just another name.
1943  */
1944
1945 #define LOP(f,x) return lop(f,x,s)
1946
1947 /*
1948  * S_lop
1949  * Build a list operator (or something that might be one).  The rules:
1950  *  - if we have a next token, then it's a list operator (no parens) for
1951  *    which the next token has already been parsed; e.g.,
1952  *       sort foo @args
1953  *       sort foo (@args)
1954  *  - if the next thing is an opening paren, then it's a function
1955  *  - else it's a list operator
1956  */
1957
1958 STATIC I32
1959 S_lop(pTHX_ I32 f, U8 x, char *s)
1960 {
1961     PERL_ARGS_ASSERT_LOP;
1962
1963     pl_yylval.ival = f;
1964     CLINE;
1965     PL_bufptr = s;
1966     PL_last_lop = PL_oldbufptr;
1967     PL_last_lop_op = (OPCODE)f;
1968     if (PL_nexttoke)
1969         goto lstop;
1970     PL_expect = x;
1971     if (*s == '(')
1972         return REPORT(FUNC);
1973     s = skipspace(s);
1974     if (*s == '(')
1975         return REPORT(FUNC);
1976     else {
1977         lstop:
1978         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1979             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1980         return REPORT(LSTOP);
1981     }
1982 }
1983
1984 /*
1985  * S_force_next
1986  * When the lexer realizes it knows the next token (for instance,
1987  * it is reordering tokens for the parser) then it can call S_force_next
1988  * to know what token to return the next time the lexer is called.  Caller
1989  * will need to set PL_nextval[] and possibly PL_expect to ensure
1990  * the lexer handles the token correctly.
1991  */
1992
1993 STATIC void
1994 S_force_next(pTHX_ I32 type)
1995 {
1996 #ifdef DEBUGGING
1997     if (DEBUG_T_TEST) {
1998         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1999         tokereport(type, &NEXTVAL_NEXTTOKE);
2000     }
2001 #endif
2002     assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2003     PL_nexttype[PL_nexttoke] = type;
2004     PL_nexttoke++;
2005 }
2006
2007 /*
2008  * S_postderef
2009  *
2010  * This subroutine handles postfix deref syntax after the arrow has already
2011  * been emitted.  @* $* etc. are emitted as two separate tokens right here.
2012  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2013  * only the first, leaving yylex to find the next.
2014  */
2015
2016 static int
2017 S_postderef(pTHX_ int const funny, char const next)
2018 {
2019     assert(funny == DOLSHARP || strchr("$@%&*", funny));
2020     if (next == '*') {
2021         PL_expect = XOPERATOR;
2022         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2023             assert('@' == funny || '$' == funny || DOLSHARP == funny);
2024             PL_lex_state = LEX_INTERPEND;
2025             if ('@' == funny)
2026                 force_next(POSTJOIN);
2027         }
2028         force_next(next);
2029         PL_bufptr+=2;
2030     }
2031     else {
2032         if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2033          && !PL_lex_brackets)
2034             PL_lex_dojoin = 2;
2035         PL_expect = XOPERATOR;
2036         PL_bufptr++;
2037     }
2038     return funny;
2039 }
2040
2041 void
2042 Perl_yyunlex(pTHX)
2043 {
2044     int yyc = PL_parser->yychar;
2045     if (yyc != YYEMPTY) {
2046         if (yyc) {
2047             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2048             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2049                 PL_lex_allbrackets--;
2050                 PL_lex_brackets--;
2051                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2052             } else if (yyc == '('/*)*/) {
2053                 PL_lex_allbrackets--;
2054                 yyc |= (2<<24);
2055             }
2056             force_next(yyc);
2057         }
2058         PL_parser->yychar = YYEMPTY;
2059     }
2060 }
2061
2062 STATIC SV *
2063 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2064 {
2065     SV * const sv = newSVpvn_utf8(start, len,
2066                     ! IN_BYTES
2067                   &&  UTF
2068                   &&  is_utf8_non_invariant_string((const U8*)start, len));
2069     return sv;
2070 }
2071
2072 /*
2073  * S_force_word
2074  * When the lexer knows the next thing is a word (for instance, it has
2075  * just seen -> and it knows that the next char is a word char, then
2076  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2077  * lookahead.
2078  *
2079  * Arguments:
2080  *   char *start : buffer position (must be within PL_linestr)
2081  *   int token   : PL_next* will be this type of bare word
2082  *                 (e.g., METHOD,BAREWORD)
2083  *   int check_keyword : if true, Perl checks to make sure the word isn't
2084  *       a keyword (do this if the word is a label, e.g. goto FOO)
2085  *   int allow_pack : if true, : characters will also be allowed (require,
2086  *       use, etc. do this)
2087  */
2088
2089 STATIC char *
2090 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2091 {
2092     char *s;
2093     STRLEN len;
2094
2095     PERL_ARGS_ASSERT_FORCE_WORD;
2096
2097     start = skipspace(start);
2098     s = start;
2099     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2100         || (allow_pack && *s == ':' && s[1] == ':') )
2101     {
2102         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2103         if (check_keyword) {
2104           char *s2 = PL_tokenbuf;
2105           STRLEN len2 = len;
2106           if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2107             s2 += sizeof("CORE::") - 1;
2108             len2 -= sizeof("CORE::") - 1;
2109           }
2110           if (keyword(s2, len2, 0))
2111             return start;
2112         }
2113         if (token == METHOD) {
2114             s = skipspace(s);
2115             if (*s == '(')
2116                 PL_expect = XTERM;
2117             else {
2118                 PL_expect = XOPERATOR;
2119             }
2120         }
2121         NEXTVAL_NEXTTOKE.opval
2122             = newSVOP(OP_CONST,0,
2123                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2124         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2125         force_next(token);
2126     }
2127     return s;
2128 }
2129
2130 /*
2131  * S_force_ident
2132  * Called when the lexer wants $foo *foo &foo etc, but the program
2133  * text only contains the "foo" portion.  The first argument is a pointer
2134  * to the "foo", and the second argument is the type symbol to prefix.
2135  * Forces the next token to be a "BAREWORD".
2136  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2137  */
2138
2139 STATIC void
2140 S_force_ident(pTHX_ const char *s, int kind)
2141 {
2142     PERL_ARGS_ASSERT_FORCE_IDENT;
2143
2144     if (s[0]) {
2145         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2146         OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2147                                                                 UTF ? SVf_UTF8 : 0));
2148         NEXTVAL_NEXTTOKE.opval = o;
2149         force_next(BAREWORD);
2150         if (kind) {
2151             o->op_private = OPpCONST_ENTERED;
2152             /* XXX see note in pp_entereval() for why we forgo typo
2153                warnings if the symbol must be introduced in an eval.
2154                GSAR 96-10-12 */
2155             gv_fetchpvn_flags(s, len,
2156                               (PL_in_eval ? GV_ADDMULTI
2157                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2158                               kind == '$' ? SVt_PV :
2159                               kind == '@' ? SVt_PVAV :
2160                               kind == '%' ? SVt_PVHV :
2161                               SVt_PVGV
2162                               );
2163         }
2164     }
2165 }
2166
2167 static void
2168 S_force_ident_maybe_lex(pTHX_ char pit)
2169 {
2170     NEXTVAL_NEXTTOKE.ival = pit;
2171     force_next('p');
2172 }
2173
2174 NV
2175 Perl_str_to_version(pTHX_ SV *sv)
2176 {
2177     NV retval = 0.0;
2178     NV nshift = 1.0;
2179     STRLEN len;
2180     const char *start = SvPV_const(sv,len);
2181     const char * const end = start + len;
2182     const bool utf = cBOOL(SvUTF8(sv));
2183
2184     PERL_ARGS_ASSERT_STR_TO_VERSION;
2185
2186     while (start < end) {
2187         STRLEN skip;
2188         UV n;
2189         if (utf)
2190             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2191         else {
2192             n = *(U8*)start;
2193             skip = 1;
2194         }
2195         retval += ((NV)n)/nshift;
2196         start += skip;
2197         nshift *= 1000;
2198     }
2199     return retval;
2200 }
2201
2202 /*
2203  * S_force_version
2204  * Forces the next token to be a version number.
2205  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2206  * and if "guessing" is TRUE, then no new token is created (and the caller
2207  * must use an alternative parsing method).
2208  */
2209
2210 STATIC char *
2211 S_force_version(pTHX_ char *s, int guessing)
2212 {
2213     OP *version = NULL;
2214     char *d;
2215
2216     PERL_ARGS_ASSERT_FORCE_VERSION;
2217
2218     s = skipspace(s);
2219
2220     d = s;
2221     if (*d == 'v')
2222         d++;
2223     if (isDIGIT(*d)) {
2224         while (isDIGIT(*d) || *d == '_' || *d == '.')
2225             d++;
2226         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2227             SV *ver;
2228             s = scan_num(s, &pl_yylval);
2229             version = pl_yylval.opval;
2230             ver = cSVOPx(version)->op_sv;
2231             if (SvPOK(ver) && !SvNIOK(ver)) {
2232                 SvUPGRADE(ver, SVt_PVNV);
2233                 SvNV_set(ver, str_to_version(ver));
2234                 SvNOK_on(ver);          /* hint that it is a version */
2235             }
2236         }
2237         else if (guessing) {
2238             return s;
2239         }
2240     }
2241
2242     /* NOTE: The parser sees the package name and the VERSION swapped */
2243     NEXTVAL_NEXTTOKE.opval = version;
2244     force_next(BAREWORD);
2245
2246     return s;
2247 }
2248
2249 /*
2250  * S_force_strict_version
2251  * Forces the next token to be a version number using strict syntax rules.
2252  */
2253
2254 STATIC char *
2255 S_force_strict_version(pTHX_ char *s)
2256 {
2257     OP *version = NULL;
2258     const char *errstr = NULL;
2259
2260     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2261
2262     while (isSPACE(*s)) /* leading whitespace */
2263         s++;
2264
2265     if (is_STRICT_VERSION(s,&errstr)) {
2266         SV *ver = newSV(0);
2267         s = (char *)scan_version(s, ver, 0);
2268         version = newSVOP(OP_CONST, 0, ver);
2269     }
2270     else if ((*s != ';' && *s != '{' && *s != '}' )
2271              && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2272     {
2273         PL_bufptr = s;
2274         if (errstr)
2275             yyerror(errstr); /* version required */
2276         return s;
2277     }
2278
2279     /* NOTE: The parser sees the package name and the VERSION swapped */
2280     NEXTVAL_NEXTTOKE.opval = version;
2281     force_next(BAREWORD);
2282
2283     return s;
2284 }
2285
2286 /*
2287  * S_tokeq
2288  * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2289  * modified as necessary.  However, if HINT_NEW_STRING is on, 'sv' is
2290  * unchanged, and a new SV containing the modified input is returned.
2291  */
2292
2293 STATIC SV *
2294 S_tokeq(pTHX_ SV *sv)
2295 {
2296     char *s;
2297     char *send;
2298     char *d;
2299     SV *pv = sv;
2300
2301     PERL_ARGS_ASSERT_TOKEQ;
2302
2303     assert (SvPOK(sv));
2304     assert (SvLEN(sv));
2305     assert (!SvIsCOW(sv));
2306     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2307         goto finish;
2308     s = SvPVX(sv);
2309     send = SvEND(sv);
2310     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2311     while (s < send && !(*s == '\\' && s[1] == '\\'))
2312         s++;
2313     if (s == send)
2314         goto finish;
2315     d = s;
2316     if ( PL_hints & HINT_NEW_STRING ) {
2317         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2318                             SVs_TEMP | SvUTF8(sv));
2319     }
2320     while (s < send) {
2321         if (*s == '\\') {
2322             if (s + 1 < send && (s[1] == '\\'))
2323                 s++;            /* all that, just for this */
2324         }
2325         *d++ = *s++;
2326     }
2327     *d = '\0';
2328     SvCUR_set(sv, d - SvPVX_const(sv));
2329   finish:
2330     if ( PL_hints & HINT_NEW_STRING )
2331        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2332     return sv;
2333 }
2334
2335 /*
2336  * Now come three functions related to double-quote context,
2337  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2338  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2339  * interact with PL_lex_state, and create fake ( ... ) argument lists
2340  * to handle functions and concatenation.
2341  * For example,
2342  *   "foo\lbar"
2343  * is tokenised as
2344  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2345  */
2346
2347 /*
2348  * S_sublex_start
2349  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2350  *
2351  * Pattern matching will set PL_lex_op to the pattern-matching op to
2352  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2353  *
2354  * OP_CONST is easy--just make the new op and return.
2355  *
2356  * Everything else becomes a FUNC.
2357  *
2358  * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2359  * had an OP_CONST.  This just sets us up for a
2360  * call to S_sublex_push().
2361  */
2362
2363 STATIC I32
2364 S_sublex_start(pTHX)
2365 {
2366     const I32 op_type = pl_yylval.ival;
2367
2368     if (op_type == OP_NULL) {
2369         pl_yylval.opval = PL_lex_op;
2370         PL_lex_op = NULL;
2371         return THING;
2372     }
2373     if (op_type == OP_CONST) {
2374         SV *sv = PL_lex_stuff;
2375         PL_lex_stuff = NULL;
2376         sv = tokeq(sv);
2377
2378         if (SvTYPE(sv) == SVt_PVIV) {
2379             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2380             STRLEN len;
2381             const char * const p = SvPV_const(sv, len);
2382             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2383             SvREFCNT_dec(sv);
2384             sv = nsv;
2385         }
2386         pl_yylval.opval = newSVOP(op_type, 0, sv);
2387         return THING;
2388     }
2389
2390     PL_parser->lex_super_state = PL_lex_state;
2391     PL_parser->lex_sub_inwhat = (U16)op_type;
2392     PL_parser->lex_sub_op = PL_lex_op;
2393     PL_lex_state = LEX_INTERPPUSH;
2394
2395     PL_expect = XTERM;
2396     if (PL_lex_op) {
2397         pl_yylval.opval = PL_lex_op;
2398         PL_lex_op = NULL;
2399         return PMFUNC;
2400     }
2401     else
2402         return FUNC;
2403 }
2404
2405 /*
2406  * S_sublex_push
2407  * Create a new scope to save the lexing state.  The scope will be
2408  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2409  * to the uc, lc, etc. found before.
2410  * Sets PL_lex_state to LEX_INTERPCONCAT.
2411  */
2412
2413 STATIC I32
2414 S_sublex_push(pTHX)
2415 {
2416     LEXSHARED *shared;
2417     const bool is_heredoc = PL_multi_close == '<';
2418     ENTER;
2419
2420     PL_lex_state = PL_parser->lex_super_state;
2421     SAVEI8(PL_lex_dojoin);
2422     SAVEI32(PL_lex_brackets);
2423     SAVEI32(PL_lex_allbrackets);
2424     SAVEI32(PL_lex_formbrack);
2425     SAVEI8(PL_lex_fakeeof);
2426     SAVEI32(PL_lex_casemods);
2427     SAVEI32(PL_lex_starts);
2428     SAVEI8(PL_lex_state);
2429     SAVESPTR(PL_lex_repl);
2430     SAVEVPTR(PL_lex_inpat);
2431     SAVEI16(PL_lex_inwhat);
2432     if (is_heredoc)
2433     {
2434         SAVECOPLINE(PL_curcop);
2435         SAVEI32(PL_multi_end);
2436         SAVEI32(PL_parser->herelines);
2437         PL_parser->herelines = 0;
2438     }
2439     SAVEIV(PL_multi_close);
2440     SAVEPPTR(PL_bufptr);
2441     SAVEPPTR(PL_bufend);
2442     SAVEPPTR(PL_oldbufptr);
2443     SAVEPPTR(PL_oldoldbufptr);
2444     SAVEPPTR(PL_last_lop);
2445     SAVEPPTR(PL_last_uni);
2446     SAVEPPTR(PL_linestart);
2447     SAVESPTR(PL_linestr);
2448     SAVEGENERICPV(PL_lex_brackstack);
2449     SAVEGENERICPV(PL_lex_casestack);
2450     SAVEGENERICPV(PL_parser->lex_shared);
2451     SAVEBOOL(PL_parser->lex_re_reparsing);
2452     SAVEI32(PL_copline);
2453
2454     /* The here-doc parser needs to be able to peek into outer lexing
2455        scopes to find the body of the here-doc.  So we put PL_linestr and
2456        PL_bufptr into lex_shared, to ‘share’ those values.
2457      */
2458     PL_parser->lex_shared->ls_linestr = PL_linestr;
2459     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2460
2461     PL_linestr = PL_lex_stuff;
2462     PL_lex_repl = PL_parser->lex_sub_repl;
2463     PL_lex_stuff = NULL;
2464     PL_parser->lex_sub_repl = NULL;
2465
2466     /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2467        set for an inner quote-like operator and then an error causes scope-
2468        popping.  We must not have a PL_lex_stuff value left dangling, as
2469        that breaks assumptions elsewhere.  See bug #123617.  */
2470     SAVEGENERICSV(PL_lex_stuff);
2471     SAVEGENERICSV(PL_parser->lex_sub_repl);
2472
2473     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2474         = SvPVX(PL_linestr);
2475     PL_bufend += SvCUR(PL_linestr);
2476     PL_last_lop = PL_last_uni = NULL;
2477     SAVEFREESV(PL_linestr);
2478     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2479
2480     PL_lex_dojoin = FALSE;
2481     PL_lex_brackets = PL_lex_formbrack = 0;
2482     PL_lex_allbrackets = 0;
2483     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2484     Newx(PL_lex_brackstack, 120, char);
2485     Newx(PL_lex_casestack, 12, char);
2486     PL_lex_casemods = 0;
2487     *PL_lex_casestack = '\0';
2488     PL_lex_starts = 0;
2489     PL_lex_state = LEX_INTERPCONCAT;
2490     if (is_heredoc)
2491         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2492     PL_copline = NOLINE;
2493
2494     Newxz(shared, 1, LEXSHARED);
2495     shared->ls_prev = PL_parser->lex_shared;
2496     PL_parser->lex_shared = shared;
2497
2498     PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2499     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2500     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2501         PL_lex_inpat = PL_parser->lex_sub_op;
2502     else
2503         PL_lex_inpat = NULL;
2504
2505     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2506     PL_in_eval &= ~EVAL_RE_REPARSING;
2507
2508     return '(';
2509 }
2510
2511 /*
2512  * S_sublex_done
2513  * Restores lexer state after a S_sublex_push.
2514  */
2515
2516 STATIC I32
2517 S_sublex_done(pTHX)
2518 {
2519     if (!PL_lex_starts++) {
2520         SV * const sv = newSVpvs("");
2521         if (SvUTF8(PL_linestr))
2522             SvUTF8_on(sv);
2523         PL_expect = XOPERATOR;
2524         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2525         return THING;
2526     }
2527
2528     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2529         PL_lex_state = LEX_INTERPCASEMOD;
2530         return yylex();
2531     }
2532
2533     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2534     assert(PL_lex_inwhat != OP_TRANSR);
2535     if (PL_lex_repl) {
2536         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2537         PL_linestr = PL_lex_repl;
2538         PL_lex_inpat = 0;
2539         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2540         PL_bufend += SvCUR(PL_linestr);
2541         PL_last_lop = PL_last_uni = NULL;
2542         PL_lex_dojoin = FALSE;
2543         PL_lex_brackets = 0;
2544         PL_lex_allbrackets = 0;
2545         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2546         PL_lex_casemods = 0;
2547         *PL_lex_casestack = '\0';
2548         PL_lex_starts = 0;
2549         if (SvEVALED(PL_lex_repl)) {
2550             PL_lex_state = LEX_INTERPNORMAL;
2551             PL_lex_starts++;
2552             /*  we don't clear PL_lex_repl here, so that we can check later
2553                 whether this is an evalled subst; that means we rely on the
2554                 logic to ensure sublex_done() is called again only via the
2555                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2556         }
2557         else {
2558             PL_lex_state = LEX_INTERPCONCAT;
2559             PL_lex_repl = NULL;
2560         }
2561         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2562             CopLINE(PL_curcop) +=
2563                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2564                  + PL_parser->herelines;
2565             PL_parser->herelines = 0;
2566         }
2567         return '/';
2568     }
2569     else {
2570         const line_t l = CopLINE(PL_curcop);
2571         LEAVE;
2572         if (PL_multi_close == '<')
2573             PL_parser->herelines += l - PL_multi_end;
2574         PL_bufend = SvPVX(PL_linestr);
2575         PL_bufend += SvCUR(PL_linestr);
2576         PL_expect = XOPERATOR;
2577         return ')';
2578     }
2579 }
2580
2581 STATIC SV*
2582 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2583 {
2584     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2585      * interior, hence to the "}".  Finds what the name resolves to, returning
2586      * an SV* containing it; NULL if no valid one found */
2587
2588     SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2589
2590     HV * table;
2591     SV **cvp;
2592     SV *cv;
2593     SV *rv;
2594     HV *stash;
2595     const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2596
2597     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2598
2599     if (!SvCUR(res)) {
2600         SvREFCNT_dec_NN(res);
2601         /* diag_listed_as: Unknown charname '%s' */
2602         yyerror("Unknown charname ''");
2603         return NULL;
2604     }
2605
2606     res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2607                         /* include the <}> */
2608                         e - backslash_ptr + 1);
2609     if (! SvPOK(res)) {
2610         SvREFCNT_dec_NN(res);
2611         return NULL;
2612     }
2613
2614     /* See if the charnames handler is the Perl core's, and if so, we can skip
2615      * the validation needed for a user-supplied one, as Perl's does its own
2616      * validation. */
2617     table = GvHV(PL_hintgv);             /* ^H */
2618     cvp = hv_fetchs(table, "charnames", FALSE);
2619     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2620         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2621     {
2622         const char * const name = HvNAME(stash);
2623          if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2624            return res;
2625        }
2626     }
2627
2628     /* Here, it isn't Perl's charname handler.  We can't rely on a
2629      * user-supplied handler to validate the input name.  For non-ut8 input,
2630      * look to see that the first character is legal.  Then loop through the
2631      * rest checking that each is a continuation */
2632
2633     /* This code makes the reasonable assumption that the only Latin1-range
2634      * characters that begin a character name alias are alphabetic, otherwise
2635      * would have to create a isCHARNAME_BEGIN macro */
2636
2637     if (! UTF) {
2638         if (! isALPHAU(*s)) {
2639             goto bad_charname;
2640         }
2641         s++;
2642         while (s < e) {
2643             if (! isCHARNAME_CONT(*s)) {
2644                 goto bad_charname;
2645             }
2646             if (*s == ' ' && *(s-1) == ' ') {
2647                 goto multi_spaces;
2648             }
2649             s++;
2650         }
2651     }
2652     else {
2653         /* Similarly for utf8.  For invariants can check directly; for other
2654          * Latin1, can calculate their code point and check; otherwise  use a
2655          * swash */
2656         if (UTF8_IS_INVARIANT(*s)) {
2657             if (! isALPHAU(*s)) {
2658                 goto bad_charname;
2659             }
2660             s++;
2661         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2662             if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2663                 goto bad_charname;
2664             }
2665             s += 2;
2666         }
2667         else {
2668             if (! PL_utf8_charname_begin) {
2669                 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2670                 PL_utf8_charname_begin = _core_swash_init("utf8",
2671                                                         "_Perl_Charname_Begin",
2672                                                         &PL_sv_undef,
2673                                                         1, 0, NULL, &flags);
2674             }
2675             if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2676                 goto bad_charname;
2677             }
2678             s += UTF8SKIP(s);
2679         }
2680
2681         while (s < e) {
2682             if (UTF8_IS_INVARIANT(*s)) {
2683                 if (! isCHARNAME_CONT(*s)) {
2684                     goto bad_charname;
2685                 }
2686                 if (*s == ' ' && *(s-1) == ' ') {
2687                     goto multi_spaces;
2688                 }
2689                 s++;
2690             }
2691             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2692                 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2693                 {
2694                     goto bad_charname;
2695                 }
2696                 s += 2;
2697             }
2698             else {
2699                 if (! PL_utf8_charname_continue) {
2700                     U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2701                     PL_utf8_charname_continue = _core_swash_init("utf8",
2702                                                 "_Perl_Charname_Continue",
2703                                                 &PL_sv_undef,
2704                                                 1, 0, NULL, &flags);
2705                 }
2706                 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2707                     goto bad_charname;
2708                 }
2709                 s += UTF8SKIP(s);
2710             }
2711         }
2712     }
2713     if (*(s-1) == ' ') {
2714         /* diag_listed_as: charnames alias definitions may not contain
2715                            trailing white-space; marked by <-- HERE in %s
2716          */
2717         yyerror_pv(
2718             Perl_form(aTHX_
2719             "charnames alias definitions may not contain trailing "
2720             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2721             (int)(s - backslash_ptr + 1), backslash_ptr,
2722             (int)(e - s + 1), s + 1
2723             ),
2724         UTF ? SVf_UTF8 : 0);
2725         return NULL;
2726     }
2727
2728     if (SvUTF8(res)) { /* Don't accept malformed input */
2729         const U8* first_bad_char_loc;
2730         STRLEN len;
2731         const char* const str = SvPV_const(res, len);
2732         if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2733                                           &first_bad_char_loc)))
2734         {
2735             _force_out_malformed_utf8_message(first_bad_char_loc,
2736                                               (U8 *) PL_parser->bufend,
2737                                               0,
2738                                               0 /* 0 means don't die */ );
2739             /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
2740                                immediately after '%s' */
2741             yyerror_pv(
2742               Perl_form(aTHX_
2743                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2744                  (int) (e - backslash_ptr + 1), backslash_ptr,
2745                  (int) ((char *) first_bad_char_loc - str), str
2746               ),
2747               SVf_UTF8);
2748             return NULL;
2749         }
2750     }
2751
2752     return res;
2753
2754   bad_charname: {
2755
2756         /* The final %.*s makes sure that should the trailing NUL be missing
2757          * that this print won't run off the end of the string */
2758         /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
2759                            in \N{%s} */
2760         yyerror_pv(
2761           Perl_form(aTHX_
2762             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2763             (int)(s - backslash_ptr + 1), backslash_ptr,
2764             (int)(e - s + 1), s + 1
2765           ),
2766           UTF ? SVf_UTF8 : 0);
2767         return NULL;
2768     }
2769
2770   multi_spaces:
2771         /* diag_listed_as: charnames alias definitions may not contain a
2772                            sequence of multiple spaces; marked by <-- HERE
2773                            in %s */
2774         yyerror_pv(
2775           Perl_form(aTHX_
2776             "charnames alias definitions may not contain a sequence of "
2777             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2778             (int)(s - backslash_ptr + 1), backslash_ptr,
2779             (int)(e - s + 1), s + 1
2780           ),
2781           UTF ? SVf_UTF8 : 0);
2782         return NULL;
2783 }
2784
2785 /*
2786   scan_const
2787
2788   Extracts the next constant part of a pattern, double-quoted string,
2789   or transliteration.  This is terrifying code.
2790
2791   For example, in parsing the double-quoted string "ab\x63$d", it would
2792   stop at the '$' and return an OP_CONST containing 'abc'.
2793
2794   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2795   processing a pattern (PL_lex_inpat is true), a transliteration
2796   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2797
2798   Returns a pointer to the character scanned up to. If this is
2799   advanced from the start pointer supplied (i.e. if anything was
2800   successfully parsed), will leave an OP_CONST for the substring scanned
2801   in pl_yylval. Caller must intuit reason for not parsing further
2802   by looking at the next characters herself.
2803
2804   In patterns:
2805     expand:
2806       \N{FOO}  => \N{U+hex_for_character_FOO}
2807       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2808
2809     pass through:
2810         all other \-char, including \N and \N{ apart from \N{ABC}
2811
2812     stops on:
2813         @ and $ where it appears to be a var, but not for $ as tail anchor
2814         \l \L \u \U \Q \E
2815         (?{  or  (??{
2816
2817   In transliterations:
2818     characters are VERY literal, except for - not at the start or end
2819     of the string, which indicates a range.  However some backslash sequences
2820     are recognized: \r, \n, and the like
2821                     \007 \o{}, \x{}, \N{}
2822     If all elements in the transliteration are below 256,
2823     scan_const expands the range to the full set of intermediate
2824     characters. If the range is in utf8, the hyphen is replaced with
2825     a certain range mark which will be handled by pmtrans() in op.c.
2826
2827   In double-quoted strings:
2828     backslashes:
2829       all those recognized in transliterations
2830       deprecated backrefs: \1 (in substitution replacements)
2831       case and quoting: \U \Q \E
2832     stops on @ and $
2833
2834   scan_const does *not* construct ops to handle interpolated strings.
2835   It stops processing as soon as it finds an embedded $ or @ variable
2836   and leaves it to the caller to work out what's going on.
2837
2838   embedded arrays (whether in pattern or not) could be:
2839       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2840
2841   $ in double-quoted strings must be the symbol of an embedded scalar.
2842
2843   $ in pattern could be $foo or could be tail anchor.  Assumption:
2844   it's a tail anchor if $ is the last thing in the string, or if it's
2845   followed by one of "()| \r\n\t"
2846
2847   \1 (backreferences) are turned into $1 in substitutions
2848
2849   The structure of the code is
2850       while (there's a character to process) {
2851           handle transliteration ranges
2852           skip regexp comments /(?#comment)/ and codes /(?{code})/
2853           skip #-initiated comments in //x patterns
2854           check for embedded arrays
2855           check for embedded scalars
2856           if (backslash) {
2857               deprecate \1 in substitution replacements
2858               handle string-changing backslashes \l \U \Q \E, etc.
2859               switch (what was escaped) {
2860                   handle \- in a transliteration (becomes a literal -)
2861                   if a pattern and not \N{, go treat as regular character
2862                   handle \132 (octal characters)
2863                   handle \x15 and \x{1234} (hex characters)
2864                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2865                   handle \cV (control characters)
2866                   handle printf-style backslashes (\f, \r, \n, etc)
2867               } (end switch)
2868               continue
2869           } (end if backslash)
2870           handle regular character
2871     } (end while character to read)
2872
2873 */
2874
2875 STATIC char *
2876 S_scan_const(pTHX_ char *start)
2877 {
2878     char *send = PL_bufend;             /* end of the constant */
2879     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
2880                                            on sizing. */
2881     char *s = start;                    /* start of the constant */
2882     char *d = SvPVX(sv);                /* destination for copies */
2883     bool dorange = FALSE;               /* are we in a translit range? */
2884     bool didrange = FALSE;              /* did we just finish a range? */
2885     bool in_charclass = FALSE;          /* within /[...]/ */
2886     bool has_utf8 = FALSE;              /* Output constant is UTF8 */
2887     bool  this_utf8 = cBOOL(UTF);       /* Is the source string assumed to be
2888                                            UTF8?  But, this can show as true
2889                                            when the source isn't utf8, as for
2890                                            example when it is entirely composed
2891                                            of hex constants */
2892     STRLEN utf8_variant_count = 0;      /* When not in UTF-8, this counts the
2893                                            number of characters found so far
2894                                            that will expand (into 2 bytes)
2895                                            should we have to convert to
2896                                            UTF-8) */
2897     SV *res;                            /* result from charnames */
2898     STRLEN offset_to_max = 0;   /* The offset in the output to where the range
2899                                    high-end character is temporarily placed */
2900
2901     /* Does something require special handling in tr/// ?  This avoids extra
2902      * work in a less likely case.  As such, khw didn't feel it was worth
2903      * adding any branches to the more mainline code to handle this, which
2904      * means that this doesn't get set in some circumstances when things like
2905      * \x{100} get expanded out.  As a result there needs to be extra testing
2906      * done in the tr code */
2907     bool has_above_latin1 = FALSE;
2908
2909     /* Note on sizing:  The scanned constant is placed into sv, which is
2910      * initialized by newSV() assuming one byte of output for every byte of
2911      * input.  This routine expects newSV() to allocate an extra byte for a
2912      * trailing NUL, which this routine will append if it gets to the end of
2913      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2914      * CAPITAL LETTER A}), or more output than input if the constant ends up
2915      * recoded to utf8, but each time a construct is found that might increase
2916      * the needed size, SvGROW() is called.  Its size parameter each time is
2917      * based on the best guess estimate at the time, namely the length used so
2918      * far, plus the length the current construct will occupy, plus room for
2919      * the trailing NUL, plus one byte for every input byte still unscanned */
2920
2921     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2922                        before set */
2923 #ifdef EBCDIC
2924     int backslash_N = 0;            /* ? was the character from \N{} */
2925     int non_portable_endpoint = 0;  /* ? In a range is an endpoint
2926                                        platform-specific like \x65 */
2927 #endif
2928
2929     PERL_ARGS_ASSERT_SCAN_CONST;
2930
2931     assert(PL_lex_inwhat != OP_TRANSR);
2932     if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
2933         /* If we are doing a trans and we know we want UTF8 set expectation */
2934         has_utf8   = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2935         this_utf8  = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2936     }
2937
2938     /* Protect sv from errors and fatal warnings. */
2939     ENTER_with_name("scan_const");
2940     SAVEFREESV(sv);
2941
2942     while (s < send
2943            || dorange   /* Handle tr/// range at right edge of input */
2944     ) {
2945
2946         /* get transliterations out of the way (they're most literal) */
2947         if (PL_lex_inwhat == OP_TRANS) {
2948
2949             /* But there isn't any special handling necessary unless there is a
2950              * range, so for most cases we just drop down and handle the value
2951              * as any other.  There are two exceptions.
2952              *
2953              * 1.  A hyphen indicates that we are actually going to have a
2954              *     range.  In this case, skip the '-', set a flag, then drop
2955              *     down to handle what should be the end range value.
2956              * 2.  After we've handled that value, the next time through, that
2957              *     flag is set and we fix up the range.
2958              *
2959              * Ranges entirely within Latin1 are expanded out entirely, in
2960              * order to make the transliteration a simple table look-up.
2961              * Ranges that extend above Latin1 have to be done differently, so
2962              * there is no advantage to expanding them here, so they are
2963              * stored here as Min, ILLEGAL_UTF8_BYTE, Max.  The illegal byte
2964              * signifies a hyphen without any possible ambiguity.  On EBCDIC
2965              * machines, if the range is expressed as Unicode, the Latin1
2966              * portion is expanded out even if the range extends above
2967              * Latin1.  This is because each code point in it has to be
2968              * processed here individually to get its native translation */
2969
2970             if (! dorange) {
2971
2972                 /* Here, we don't think we're in a range.  If the new character
2973                  * is not a hyphen; or if it is a hyphen, but it's too close to
2974                  * either edge to indicate a range, or if we haven't output any
2975                  * characters yet then it's a regular character. */
2976                 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv)) {
2977
2978                     /* A regular character.  Process like any other, but first
2979                      * clear any flags */
2980                     didrange = FALSE;
2981                     dorange = FALSE;
2982 #ifdef EBCDIC
2983                     non_portable_endpoint = 0;
2984                     backslash_N = 0;
2985 #endif
2986                     /* The tests here for being above Latin1 and similar ones
2987                      * in the following 'else' suffice to find all such
2988                      * occurences in the constant, except those added by a
2989                      * backslash escape sequence, like \x{100}.  Mostly, those
2990                      * set 'has_above_latin1' as appropriate */
2991                     if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
2992                         has_above_latin1 = TRUE;
2993                     }
2994
2995                     /* Drops down to generic code to process current byte */
2996                 }
2997                 else {  /* Is a '-' in the context where it means a range */
2998                     if (didrange) { /* Something like y/A-C-Z// */
2999                         Perl_croak(aTHX_ "Ambiguous range in transliteration"
3000                                          " operator");
3001                     }
3002
3003                     dorange = TRUE;
3004
3005                     s++;    /* Skip past the hyphen */
3006
3007                     /* d now points to where the end-range character will be
3008                      * placed.  Save it so won't have to go finding it later,
3009                      * and drop down to get that character.  (Actually we
3010                      * instead save the offset, to handle the case where a
3011                      * realloc in the meantime could change the actual
3012                      * pointer).  We'll finish processing the range the next
3013                      * time through the loop */
3014                     offset_to_max = d - SvPVX_const(sv);
3015
3016                     if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3017                         has_above_latin1 = TRUE;
3018                     }
3019
3020                     /* Drops down to generic code to process current byte */
3021                 }
3022             }  /* End of not a range */
3023             else {
3024                 /* Here we have parsed a range.  Now must handle it.  At this
3025                  * point:
3026                  * 'sv' is a SV* that contains the output string we are
3027                  *      constructing.  The final two characters in that string
3028                  *      are the range start and range end, in order.
3029                  * 'd'  points to just beyond the range end in the 'sv' string,
3030                  *      where we would next place something
3031                  * 'offset_to_max' is the offset in 'sv' at which the character
3032                  *      (the range's maximum end point) before 'd'  begins.
3033                  */
3034                 char * max_ptr = SvPVX(sv) + offset_to_max;
3035                 char * min_ptr;
3036                 IV range_min;
3037                 IV range_max;   /* last character in range */
3038                 STRLEN grow;
3039                 Size_t offset_to_min = 0;
3040                 Size_t extras = 0;
3041 #ifdef EBCDIC
3042                 bool convert_unicode;
3043                 IV real_range_max = 0;
3044 #endif
3045                 /* Get the code point values of the range ends. */
3046                 if (has_utf8) {
3047                     /* We know the utf8 is valid, because we just constructed
3048                      * it ourselves in previous loop iterations */
3049                     min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3050                     range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3051                     range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3052
3053                     /* This compensates for not all code setting
3054                      * 'has_above_latin1', so that we don't skip stuff that
3055                      * should be executed */
3056                     if (range_max > 255) {
3057                         has_above_latin1 = TRUE;
3058                     }
3059                 }
3060                 else {
3061                     min_ptr = max_ptr - 1;
3062                     range_min = * (U8*) min_ptr;
3063                     range_max = * (U8*) max_ptr;
3064                 }
3065
3066                 /* If the range is just a single code point, like tr/a-a/.../,
3067                  * that code point is already in the output, twice.  We can
3068                  * just back up over the second instance and avoid all the rest
3069                  * of the work.  But if it is a variant character, it's been
3070                  * counted twice, so decrement.  (This unlikely scenario is
3071                  * special cased, like the one for a range of 2 code points
3072                  * below, only because the main-line code below needs a range
3073                  * of 3 or more to work without special casing.  Might as well
3074                  * get it out of the way now.) */
3075                 if (UNLIKELY(range_max == range_min)) {
3076                     d = max_ptr;
3077                     if (! has_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3078                         utf8_variant_count--;
3079                     }
3080                     goto range_done;
3081                 }
3082
3083 #ifdef EBCDIC
3084                 /* On EBCDIC platforms, we may have to deal with portable
3085                  * ranges.  These happen if at least one range endpoint is a
3086                  * Unicode value (\N{...}), or if the range is a subset of
3087                  * [A-Z] or [a-z], and both ends are literal characters,
3088                  * like 'A', and not like \x{C1} */
3089                 convert_unicode =
3090                                cBOOL(backslash_N)   /* \N{} forces Unicode,
3091                                                        hence portable range */
3092                     || (     ! non_portable_endpoint
3093                         && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
3094                            || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3095                 if (convert_unicode) {
3096
3097                     /* Special handling is needed for these portable ranges.
3098                      * They are defined to be in Unicode terms, which includes
3099                      * all the Unicode code points between the end points.
3100                      * Convert to Unicode to get the Unicode range.  Later we
3101                      * will convert each code point in the range back to
3102                      * native.  */
3103                     range_min = NATIVE_TO_UNI(range_min);
3104                     range_max = NATIVE_TO_UNI(range_max);
3105                 }
3106 #endif
3107
3108                 if (range_min > range_max) {
3109 #ifdef EBCDIC
3110                     if (convert_unicode) {
3111                         /* Need to convert back to native for meaningful
3112                          * messages for this platform */
3113                         range_min = UNI_TO_NATIVE(range_min);
3114                         range_max = UNI_TO_NATIVE(range_max);
3115                     }
3116 #endif
3117                     /* Use the characters themselves for the error message if
3118                      * ASCII printables; otherwise some visible representation
3119                      * of them */
3120                     if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3121                         Perl_croak(aTHX_
3122                          "Invalid range \"%c-%c\" in transliteration operator",
3123                          (char)range_min, (char)range_max);
3124                     }
3125 #ifdef EBCDIC
3126                     else if (convert_unicode) {
3127         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3128                         Perl_croak(aTHX_
3129                            "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3130                            UVXf "}\" in transliteration operator",
3131                            range_min, range_max);
3132                     }
3133 #endif
3134                     else {
3135         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3136                         Perl_croak(aTHX_
3137                            "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3138                            " in transliteration operator",
3139                            range_min, range_max);
3140                     }
3141                 }
3142
3143                 /* If the range is exactly two code points long, they are
3144                  * already both in the output */
3145                 if (UNLIKELY(range_min + 1 == range_max)) {
3146                     goto range_done;
3147                 }
3148
3149                 /* Here the range contains at least 3 code points */
3150
3151                 if (has_utf8) {
3152
3153                     /* If everything in the transliteration is below 256, we
3154                      * can avoid special handling later.  A translation table
3155                      * for each of those bytes is created by op.c.  So we
3156                      * expand out all ranges to their constituent code points.
3157                      * But if we've encountered something above 255, the
3158                      * expanding won't help, so skip doing that.  But if it's
3159                      * EBCDIC, we may have to look at each character below 256
3160                      * if we have to convert to/from Unicode values */
3161                     if (   has_above_latin1
3162 #ifdef EBCDIC
3163                         && (range_min > 255 || ! convert_unicode)
3164 #endif
3165                     ) {
3166                         /* Move the high character one byte to the right; then
3167                          * insert between it and the range begin, an illegal
3168                          * byte which serves to indicate this is a range (using
3169                          * a '-' would be ambiguous). */
3170                         char *e = d++;
3171                         while (e-- > max_ptr) {
3172                             *(e + 1) = *e;
3173                         }
3174                         *(e + 1) = (char) ILLEGAL_UTF8_BYTE;
3175                         goto range_done;
3176                     }
3177
3178                     /* Here, we're going to expand out the range.  For EBCDIC
3179                      * the range can extend above 255 (not so in ASCII), so
3180                      * for EBCDIC, split it into the parts above and below
3181                      * 255/256 */
3182 #ifdef EBCDIC
3183                     if (range_max > 255) {
3184                         real_range_max = range_max;
3185                         range_max = 255;
3186                     }
3187 #endif
3188                 }
3189
3190                 /* Here we need to expand out the string to contain each
3191                  * character in the range.  Grow the output to handle this.
3192                  * For non-UTF8, we need a byte for each code point in the
3193                  * range, minus the three that we've already allocated for: the
3194                  * hyphen, the min, and the max.  For UTF-8, we need this
3195                  * plus an extra byte for each code point that occupies two
3196                  * bytes (is variant) when in UTF-8 (except we've already
3197                  * allocated for the end points, including if they are
3198                  * variants).  For ASCII platforms and Unicode ranges on EBCDIC
3199                  * platforms, it's easy to calculate a precise number.  To
3200                  * start, we count the variants in the range, which we need
3201                  * elsewhere in this function anyway.  (For the case where it
3202                  * isn't easy to calculate, 'extras' has been initialized to 0,
3203                  * and the calculation is done in a loop further down.) */
3204 #ifdef EBCDIC
3205                 if (convert_unicode)
3206 #endif
3207                 {
3208                     /* This is executed unconditionally on ASCII, and for
3209                      * Unicode ranges on EBCDIC.  Under these conditions, all
3210                      * code points above a certain value are variant; and none
3211                      * under that value are.  We just need to find out how much
3212                      * of the range is above that value.  We don't count the
3213                      * end points here, as they will already have been counted
3214                      * as they were parsed. */
3215                     if (range_min >= UTF_CONTINUATION_MARK) {
3216
3217                         /* The whole range is made up of variants */
3218                         extras = (range_max - 1) - (range_min + 1) + 1;
3219                     }
3220                     else if (range_max >= UTF_CONTINUATION_MARK) {
3221
3222                         /* Only the higher portion of the range is variants */
3223                         extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3224                     }
3225
3226                     utf8_variant_count += extras;
3227                 }
3228
3229                 /* The base growth is the number of code points in the range,
3230                  * not including the endpoints, which have already been sized
3231                  * for (and output).  We don't subtract for the hyphen, as it
3232                  * has been parsed but not output, and the SvGROW below is
3233                  * based only on what's been output plus what's left to parse.
3234                  * */
3235                 grow = (range_max - 1) - (range_min + 1) + 1;
3236
3237                 if (has_utf8) {
3238 #ifdef EBCDIC
3239                     /* In some cases in EBCDIC, we haven't yet calculated a
3240                      * precise amount needed for the UTF-8 variants.  Just
3241                      * assume the worst case, that everything will expand by a
3242                      * byte */
3243                     if (! convert_unicode) {
3244                         grow *= 2;
3245                     }
3246                     else
3247 #endif
3248                     {
3249                         /* Otherwise we know exactly how many variants there
3250                          * are in the range. */
3251                         grow += extras;
3252                     }
3253                 }
3254
3255                 /* Grow, but position the output to overwrite the range min end
3256                  * point, because in some cases we overwrite that */
3257                 SvCUR_set(sv, d - SvPVX_const(sv));
3258                 offset_to_min = min_ptr - SvPVX_const(sv);
3259
3260                 /* See Note on sizing above. */
3261                 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3262                                              + (send - s)
3263                                              + grow
3264                                              + 1 /* Trailing NUL */ );
3265
3266                 /* Now, we can expand out the range. */
3267 #ifdef EBCDIC
3268                 if (convert_unicode) {
3269                     SSize_t i;
3270
3271                     /* Recall that the min and max are now in Unicode terms, so
3272                      * we have to convert each character to its native
3273                      * equivalent */
3274                     if (has_utf8) {
3275                         for (i = range_min; i <= range_max; i++) {
3276                             append_utf8_from_native_byte(
3277                                                     LATIN1_TO_NATIVE((U8) i),
3278                                                     (U8 **) &d);
3279                         }
3280                     }
3281                     else {
3282                         for (i = range_min; i <= range_max; i++) {
3283                             *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3284                         }
3285                     }
3286                 }
3287                 else
3288 #endif
3289                 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3290                 {
3291                     /* Here, no conversions are necessary, which means that the
3292                      * first character in the range is already in 'd' and
3293                      * valid, so we can skip overwriting it */
3294                     if (has_utf8) {
3295                         SSize_t i;
3296                         d += UTF8SKIP(d);
3297                         for (i = range_min + 1; i <= range_max; i++) {
3298                             append_utf8_from_native_byte((U8) i, (U8 **) &d);
3299                         }
3300                     }
3301                     else {
3302                         SSize_t i;
3303                         d++;
3304                         assert(range_min + 1 <= range_max);
3305                         for (i = range_min + 1; i < range_max; i++) {
3306 #ifdef EBCDIC
3307                             /* In this case on EBCDIC, we haven't calculated
3308                              * the variants.  Do it here, as we go along */
3309                             if (! UVCHR_IS_INVARIANT(i)) {
3310                                 utf8_variant_count++;
3311                             }
3312 #endif
3313                             *d++ = (char)i;
3314                         }
3315
3316                         /* The range_max is done outside the loop so as to
3317                          * avoid having to special case not incrementing
3318                          * 'utf8_variant_count' on EBCDIC (it's already been
3319                          * counted when originally parsed) */
3320                         *d++ = (char) range_max;
3321                     }
3322                 }
3323
3324 #ifdef EBCDIC
3325                 /* If the original range extended above 255, add in that
3326                  * portion. */
3327                 if (real_range_max) {
3328                     *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3329                     *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3330                     if (real_range_max > 0x100) {
3331                         if (real_range_max > 0x101) {
3332                             *d++ = (char) ILLEGAL_UTF8_BYTE;
3333                         }
3334                         d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3335                     }
3336                 }
3337 #endif
3338
3339               range_done:
3340                 /* mark the range as done, and continue */
3341                 didrange = TRUE;
3342                 dorange = FALSE;
3343 #ifdef EBCDIC
3344                 non_portable_endpoint = 0;
3345                 backslash_N = 0;
3346 #endif
3347                 continue;
3348             } /* End of is a range */
3349         } /* End of transliteration.  Joins main code after these else's */
3350         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3351             char *s1 = s-1;
3352             int esc = 0;
3353             while (s1 >= start && *s1-- == '\\')
3354                 esc = !esc;
3355             if (!esc)
3356                 in_charclass = TRUE;
3357         }
3358         else if (*s == ']' && PL_lex_inpat && in_charclass) {
3359             char *s1 = s-1;
3360             int esc = 0;
3361             while (s1 >= start && *s1-- == '\\')
3362                 esc = !esc;
3363             if (!esc)
3364                 in_charclass = FALSE;
3365         }
3366             /* skip for regexp comments /(?#comment)/, except for the last
3367              * char, which will be done separately.  Stop on (?{..}) and
3368              * friends */
3369         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3370             if (s[2] == '#') {
3371                 while (s+1 < send && *s != ')')
3372                     *d++ = *s++;
3373             }
3374             else if (!PL_lex_casemods
3375                      && (    s[2] == '{' /* This should match regcomp.c */
3376                          || (s[2] == '?' && s[3] == '{')))
3377             {
3378                 break;
3379             }
3380         }
3381             /* likewise skip #-initiated comments in //x patterns */
3382         else if (*s == '#'
3383                  && PL_lex_inpat
3384                  && !in_charclass
3385                  && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3386         {
3387             while (s < send && *s != '\n')
3388                 *d++ = *s++;
3389         }
3390             /* no further processing of single-quoted regex */
3391         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3392             goto default_action;
3393
3394             /* check for embedded arrays
3395              * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3396              */
3397         else if (*s == '@' && s[1]) {
3398             if (UTF
3399                ? isIDFIRST_utf8_safe(s+1, send)
3400                : isWORDCHAR_A(s[1]))
3401             {
3402                 break;
3403             }
3404             if (strchr(":'{$", s[1]))
3405                 break;
3406             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3407                 break; /* in regexp, neither @+ nor @- are interpolated */
3408         }
3409             /* check for embedded scalars.  only stop if we're sure it's a
3410              * variable.  */
3411         else if (*s == '$') {
3412             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3413                 break;
3414             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3415                 if (s[1] == '\\') {
3416                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3417                                    "Possible unintended interpolation of $\\ in regex");
3418                 }
3419                 break;          /* in regexp, $ might be tail anchor */
3420             }
3421         }
3422
3423         /* End of else if chain - OP_TRANS rejoin rest */
3424
3425         if (UNLIKELY(s >= send)) {
3426             assert(s == send);
3427             break;
3428         }
3429
3430         /* backslashes */
3431         if (*s == '\\' && s+1 < send) {
3432             char* e;    /* Can be used for ending '}', etc. */
3433
3434             s++;
3435
3436             /* warn on \1 - \9 in substitution replacements, but note that \11
3437              * is an octal; and \19 is \1 followed by '9' */
3438             if (PL_lex_inwhat == OP_SUBST
3439                 && !PL_lex_inpat
3440                 && isDIGIT(*s)
3441                 && *s != '0'
3442                 && !isDIGIT(s[1]))
3443             {
3444                 /* diag_listed_as: \%d better written as $%d */
3445                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3446                 *--s = '$';
3447                 break;
3448             }
3449
3450             /* string-change backslash escapes */
3451             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3452                 --s;
3453                 break;
3454             }
3455             /* In a pattern, process \N, but skip any other backslash escapes.
3456              * This is because we don't want to translate an escape sequence
3457              * into a meta symbol and have the regex compiler use the meta
3458              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3459              * in spite of this, we do have to process \N here while the proper
3460              * charnames handler is in scope.  See bugs #56444 and #62056.
3461              *
3462              * There is a complication because \N in a pattern may also stand
3463              * for 'match a non-nl', and not mean a charname, in which case its
3464              * processing should be deferred to the regex compiler.  To be a
3465              * charname it must be followed immediately by a '{', and not look
3466              * like \N followed by a curly quantifier, i.e., not something like
3467              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3468              * quantifier */
3469             else if (PL_lex_inpat
3470                     && (*s != 'N'
3471                         || s[1] != '{'
3472                         || regcurly(s + 1)))
3473             {
3474                 *d++ = '\\';
3475                 goto default_action;
3476             }
3477
3478             switch (*s) {
3479             default:
3480                 {
3481                     if ((isALPHANUMERIC(*s)))
3482                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3483                                        "Unrecognized escape \\%c passed through",
3484                                        *s);
3485                     /* default action is to copy the quoted character */
3486                     goto default_action;
3487                 }
3488
3489             /* eg. \132 indicates the octal constant 0132 */
3490             case '0': case '1': case '2': case '3':
3491             case '4': case '5': case '6': case '7':
3492                 {
3493                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3494                     STRLEN len = 3;
3495                     uv = grok_oct(s, &len, &flags, NULL);
3496                     s += len;
3497                     if (len < 3 && s < send && isDIGIT(*s)
3498                         && ckWARN(WARN_MISC))
3499                     {
3500                         Perl_warner(aTHX_ packWARN(WARN_MISC),
3501                                     "%s", form_short_octal_warning(s, len));
3502                     }
3503                 }
3504                 goto NUM_ESCAPE_INSERT;
3505
3506             /* eg. \o{24} indicates the octal constant \024 */
3507             case 'o':
3508                 {
3509                     const char* error;
3510
3511                     bool valid = grok_bslash_o(&s, PL_bufend,
3512                                                &uv, &error,
3513                                                TRUE, /* Output warning */
3514                                                FALSE, /* Not strict */
3515                                                TRUE, /* Output warnings for
3516                                                          non-portables */
3517                                                UTF);
3518                     if (! valid) {
3519                         yyerror(error);
3520                         uv = 0; /* drop through to ensure range ends are set */
3521                     }
3522                     goto NUM_ESCAPE_INSERT;
3523                 }
3524
3525             /* eg. \x24 indicates the hex constant 0x24 */
3526             case 'x':
3527                 {
3528                     const char* error;
3529
3530                     bool valid = grok_bslash_x(&s, PL_bufend,
3531                                                &uv, &error,
3532                                                TRUE, /* Output warning */
3533                                                FALSE, /* Not strict */
3534                                                TRUE,  /* Output warnings for
3535                                                          non-portables */
3536                                                UTF);
3537                     if (! valid) {
3538                         yyerror(error);
3539                         uv = 0; /* drop through to ensure range ends are set */
3540                     }
3541                 }
3542
3543               NUM_ESCAPE_INSERT:
3544                 /* Insert oct or hex escaped character. */
3545
3546                 /* Here uv is the ordinal of the next character being added */
3547                 if (UVCHR_IS_INVARIANT(uv)) {
3548                     *d++ = (char) uv;
3549                 }
3550                 else {
3551                     if (!has_utf8 && uv > 255) {
3552
3553                         /* Here, 'uv' won't fit unless we convert to UTF-8.
3554                          * If we've only seen invariants so far, all we have to
3555                          * do is turn on the flag */
3556                         if (utf8_variant_count == 0) {
3557                             SvUTF8_on(sv);
3558                         }
3559                         else {
3560                             SvCUR_set(sv, d - SvPVX_const(sv));
3561                             SvPOK_on(sv);
3562                             *d = '\0';
3563
3564                             sv_utf8_upgrade_flags_grow(
3565                                            sv,
3566                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3567
3568                                            /* Since we're having to grow here,
3569                                             * make sure we have enough room for
3570                                             * this escape and a NUL, so the
3571                                             * code immediately below won't have
3572                                             * to actually grow again */
3573                                           UVCHR_SKIP(uv)
3574                                         + (STRLEN)(send - s) + 1);
3575                             d = SvPVX(sv) + SvCUR(sv);
3576                         }
3577
3578                         has_above_latin1 = TRUE;
3579                         has_utf8 = TRUE;
3580                     }
3581
3582                     if (! has_utf8) {
3583                         *d++ = (char)uv;
3584                         utf8_variant_count++;
3585                     }
3586                     else {
3587                        /* Usually, there will already be enough room in 'sv'
3588                         * since such escapes are likely longer than any UTF-8
3589                         * sequence they can end up as.  This isn't the case on
3590                         * EBCDIC where \x{40000000} contains 12 bytes, and the
3591                         * UTF-8 for it contains 14.  And, we have to allow for
3592                         * a trailing NUL.  It probably can't happen on ASCII
3593                         * platforms, but be safe.  See Note on sizing above. */
3594                         const STRLEN needed = d - SvPVX(sv)
3595                                             + UVCHR_SKIP(uv)
3596                                             + (send - s)
3597                                             + 1;
3598                         if (UNLIKELY(needed > SvLEN(sv))) {
3599                             SvCUR_set(sv, d - SvPVX_const(sv));
3600                             d = SvCUR(sv) + SvGROW(sv, needed);
3601                         }
3602
3603                         d = (char*)uvchr_to_utf8((U8*)d, uv);
3604                         if (PL_lex_inwhat == OP_TRANS
3605                             && PL_parser->lex_sub_op)
3606                         {
3607                             PL_parser->lex_sub_op->op_private |=
3608                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
3609                                              : OPpTRANS_TO_UTF);
3610                         }
3611                     }
3612                 }
3613 #ifdef EBCDIC
3614                 non_portable_endpoint++;
3615 #endif
3616                 continue;
3617
3618             case 'N':
3619                 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3620                  * named character, like \N{LATIN SMALL LETTER A}, or a named
3621                  * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3622                  * GRAVE} (except y/// can't handle the latter, croaking).  For
3623                  * convenience all three forms are referred to as "named
3624                  * characters" below.
3625                  *
3626                  * For patterns, \N also can mean to match a non-newline.  Code
3627                  * before this 'switch' statement should already have handled
3628                  * this situation, and hence this code only has to deal with
3629                  * the named character cases.
3630                  *
3631                  * For non-patterns, the named characters are converted to
3632                  * their string equivalents.  In patterns, named characters are
3633                  * not converted to their ultimate forms for the same reasons
3634                  * that other escapes aren't (mainly that the ultimate
3635                  * character could be considered a meta-symbol by the regex
3636                  * compiler).  Instead, they are converted to the \N{U+...}
3637                  * form to get the value from the charnames that is in effect
3638                  * right now, while preserving the fact that it was a named
3639                  * character, so that the regex compiler knows this.
3640                  *
3641                  * The structure of this section of code (besides checking for
3642                  * errors and upgrading to utf8) is:
3643                  *    If the named character is of the form \N{U+...}, pass it
3644                  *      through if a pattern; otherwise convert the code point
3645                  *      to utf8
3646                  *    Otherwise must be some \N{NAME}: convert to
3647                  *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3648                  *
3649                  * Transliteration is an exception.  The conversion to utf8 is
3650                  * only done if the code point requires it to be representable.
3651                  *
3652                  * Here, 's' points to the 'N'; the test below is guaranteed to
3653                  * succeed if we are being called on a pattern, as we already
3654                  * know from a test above that the next character is a '{'.  A
3655                  * non-pattern \N must mean 'named character', which requires
3656                  * braces */
3657                 s++;
3658                 if (*s != '{') {
3659                     yyerror("Missing braces on \\N{}");
3660                     *d++ = '\0';
3661                     continue;
3662                 }
3663                 s++;
3664
3665                 /* If there is no matching '}', it is an error. */
3666                 if (! (e = (char *) memchr(s, '}', send - s))) {
3667                     if (! PL_lex_inpat) {
3668                         yyerror("Missing right brace on \\N{}");
3669                     } else {
3670                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3671                     }
3672                     yyquit(); /* Have exhausted the input. */
3673                 }
3674
3675                 /* Here it looks like a named character */
3676
3677                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3678                     s += 2;         /* Skip to next char after the 'U+' */
3679                     if (PL_lex_inpat) {
3680
3681                         /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3682                         /* Check the syntax.  */
3683                         const char *orig_s;
3684                         orig_s = s - 5;
3685                         if (!isXDIGIT(*s)) {
3686                           bad_NU:
3687                             yyerror(
3688                                 "Invalid hexadecimal number in \\N{U+...}"
3689                             );
3690                             s = e + 1;
3691                             *d++ = '\0';
3692                             continue;
3693                         }
3694                         while (++s < e) {
3695                             if (isXDIGIT(*s))
3696                                 continue;
3697                             else if ((*s == '.' || *s == '_')
3698                                   && isXDIGIT(s[1]))
3699                                 continue;
3700                             goto bad_NU;
3701                         }
3702
3703                         /* Pass everything through unchanged.
3704                          * +1 is for the '}' */
3705                         Copy(orig_s, d, e - orig_s + 1, char);
3706                         d += e - orig_s + 1;
3707                     }
3708                     else {  /* Not a pattern: convert the hex to string */
3709                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3710                                 | PERL_SCAN_SILENT_ILLDIGIT
3711                                 | PERL_SCAN_DISALLOW_PREFIX;
3712                         STRLEN len = e - s;
3713                         uv = grok_hex(s, &len, &flags, NULL);
3714                         if (len == 0 || (len != (STRLEN)(e - s)))
3715                             goto bad_NU;
3716
3717                          /* For non-tr///, if the destination is not in utf8,
3718                           * unconditionally recode it to be so.  This is
3719                           * because \N{} implies Unicode semantics, and scalars
3720                           * have to be in utf8 to guarantee those semantics.
3721                           * tr/// doesn't care about Unicode rules, so no need
3722                           * there to upgrade to UTF-8 for small enough code
3723                           * points */
3724                         if (! has_utf8 && (   uv > 0xFF
3725                                            || PL_lex_inwhat != OP_TRANS))
3726                         {
3727                             /* See Note on sizing above.  */
3728                             const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1;
3729
3730                             SvCUR_set(sv, d - SvPVX_const(sv));
3731                             SvPOK_on(sv);
3732                             *d = '\0';
3733
3734                             if (utf8_variant_count == 0) {
3735                                 SvUTF8_on(sv);
3736                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3737                             }
3738                             else {
3739                                 sv_utf8_upgrade_flags_grow(
3740                                                sv,
3741                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3742                                                extra);
3743                                 d = SvPVX(sv) + SvCUR(sv);
3744                             }
3745
3746                             has_utf8 = TRUE;
3747                             has_above_latin1 = TRUE;
3748                         }
3749
3750                         /* Add the (Unicode) code point to the output. */
3751                         if (! has_utf8 || OFFUNI_IS_INVARIANT(uv)) {
3752                             *d++ = (char) LATIN1_TO_NATIVE(uv);
3753                         }
3754                         else {
3755                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3756                         }
3757                     }
3758                 }
3759                 else /* Here is \N{NAME} but not \N{U+...}. */
3760                      if ((res = get_and_check_backslash_N_name(s, e)))
3761                 {
3762                     STRLEN len;
3763                     const char *str = SvPV_const(res, len);
3764                     if (PL_lex_inpat) {
3765
3766                         if (! len) { /* The name resolved to an empty string */
3767                             Copy("\\N{}", d, 4, char);
3768                             d += 4;
3769                         }
3770                         else {
3771                             /* In order to not lose information for the regex
3772                             * compiler, pass the result in the specially made
3773                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3774                             * the code points in hex of each character
3775                             * returned by charnames */
3776
3777                             const char *str_end = str + len;
3778                             const STRLEN off = d - SvPVX_const(sv);
3779
3780                             if (! SvUTF8(res)) {
3781                                 /* For the non-UTF-8 case, we can determine the
3782                                  * exact length needed without having to parse
3783                                  * through the string.  Each character takes up
3784                                  * 2 hex digits plus either a trailing dot or
3785                                  * the "}" */
3786                                 const char initial_text[] = "\\N{U+";
3787                                 const STRLEN initial_len = sizeof(initial_text)
3788                                                            - 1;
3789                                 d = off + SvGROW(sv, off
3790                                                     + 3 * len
3791
3792                                                     /* +1 for trailing NUL */
3793                                                     + initial_len + 1
3794
3795                                                     + (STRLEN)(send - e));
3796                                 Copy(initial_text, d, initial_len, char);
3797                                 d += initial_len;
3798                                 while (str < str_end) {
3799                                     char hex_string[4];
3800                                     int len =
3801                                         my_snprintf(hex_string,
3802                                                   sizeof(hex_string),
3803                                                   "%02X.",
3804
3805                                                   /* The regex compiler is
3806                                                    * expecting Unicode, not
3807                                                    * native */
3808                                                   NATIVE_TO_LATIN1(*str));
3809                                     PERL_MY_SNPRINTF_POST_GUARD(len,
3810                                                            sizeof(hex_string));
3811                                     Copy(hex_string, d, 3, char);
3812                                     d += 3;
3813                                     str++;
3814                                 }
3815                                 d--;    /* Below, we will overwrite the final
3816                                            dot with a right brace */
3817                             }
3818                             else {
3819                                 STRLEN char_length; /* cur char's byte length */
3820
3821                                 /* and the number of bytes after this is
3822                                  * translated into hex digits */
3823                                 STRLEN output_length;
3824
3825                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3826                                  * for max('U+', '.'); and 1 for NUL */
3827                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3828
3829                                 /* Get the first character of the result. */
3830                                 U32 uv = utf8n_to_uvchr((U8 *) str,
3831                                                         len,
3832                                                         &char_length,
3833                                                         UTF8_ALLOW_ANYUV);
3834                                 /* Convert first code point to Unicode hex,
3835                                  * including the boiler plate before it. */
3836                                 output_length =
3837                                     my_snprintf(hex_string, sizeof(hex_string),
3838                                              "\\N{U+%X",
3839                                              (unsigned int) NATIVE_TO_UNI(uv));
3840
3841                                 /* Make sure there is enough space to hold it */
3842                                 d = off + SvGROW(sv, off
3843                                                     + output_length
3844                                                     + (STRLEN)(send - e)
3845                                                     + 2);       /* '}' + NUL */
3846                                 /* And output it */
3847                                 Copy(hex_string, d, output_length, char);
3848                                 d += output_length;
3849
3850                                 /* For each subsequent character, append dot and
3851                                 * its Unicode code point in hex */
3852                                 while ((str += char_length) < str_end) {
3853                                     const STRLEN off = d - SvPVX_const(sv);
3854                                     U32 uv = utf8n_to_uvchr((U8 *) str,
3855                                                             str_end - str,
3856                                                             &char_length,
3857                                                             UTF8_ALLOW_ANYUV);
3858                                     output_length =
3859                                         my_snprintf(hex_string,
3860                                              sizeof(hex_string),
3861                                              ".%X",
3862                                              (unsigned int) NATIVE_TO_UNI(uv));
3863
3864                                     d = off + SvGROW(sv, off
3865                                                         + output_length
3866                                                         + (STRLEN)(send - e)
3867                                                         + 2);   /* '}' +  NUL */
3868                                     Copy(hex_string, d, output_length, char);
3869                                     d += output_length;
3870                                 }
3871                             }
3872
3873                             *d++ = '}'; /* Done.  Add the trailing brace */
3874                         }
3875                     }
3876                     else { /* Here, not in a pattern.  Convert the name to a
3877                             * string. */
3878
3879                         if (PL_lex_inwhat == OP_TRANS) {
3880                             str = SvPV_const(res, len);
3881                             if (len > ((SvUTF8(res))
3882                                        ? UTF8SKIP(str)
3883                                        : 1U))
3884                             {
3885                                 yyerror(Perl_form(aTHX_
3886                                     "%.*s must not be a named sequence"
3887                                     " in transliteration operator",
3888                                         /*  +1 to include the "}" */
3889                                     (int) (e + 1 - start), start));
3890                                 *d++ = '\0';
3891                                 goto end_backslash_N;
3892                             }
3893
3894                             if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
3895                                 has_above_latin1 = TRUE;
3896                             }
3897
3898                         }
3899                         else if (! SvUTF8(res)) {
3900                             /* Make sure \N{} return is UTF-8.  This is because
3901                              * \N{} implies Unicode semantics, and scalars have
3902                              * to be in utf8 to guarantee those semantics; but
3903                              * not needed in tr/// */
3904                             sv_utf8_upgrade_flags(res, 0);
3905                             str = SvPV_const(res, len);
3906                         }
3907
3908                          /* Upgrade destination to be utf8 if this new
3909                           * component is */
3910                         if (! has_utf8 && SvUTF8(res)) {
3911                             /* See Note on sizing above.  */
3912                             const STRLEN extra = len + (send - s) + 1;
3913
3914                             SvCUR_set(sv, d - SvPVX_const(sv));
3915                             SvPOK_on(sv);
3916                             *d = '\0';
3917
3918                             if (utf8_variant_count == 0) {
3919                                 SvUTF8_on(sv);
3920                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3921                             }
3922                             else {
3923                                 sv_utf8_upgrade_flags_grow(sv,
3924                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3925                                                 extra);
3926                                 d = SvPVX(sv) + SvCUR(sv);
3927                             }
3928                             has_utf8 = TRUE;
3929                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3930
3931                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3932                              * set correctly here). */
3933                             const STRLEN extra = len + (send - e) + 1;
3934                             const STRLEN off = d - SvPVX_const(sv);
3935                             d = off + SvGROW(sv, off + extra);
3936                         }
3937                         Copy(str, d, len, char);
3938                         d += len;
3939                     }
3940
3941                     SvREFCNT_dec(res);
3942
3943                 } /* End \N{NAME} */
3944
3945               end_backslash_N:
3946 #ifdef EBCDIC
3947                 backslash_N++; /* \N{} is defined to be Unicode */
3948 #endif
3949                 s = e + 1;  /* Point to just after the '}' */
3950                 continue;
3951
3952             /* \c is a control character */
3953             case 'c':
3954                 s++;
3955                 if (s < send) {
3956                     *d++ = grok_bslash_c(*s, 1);
3957                 }
3958                 else {
3959                     yyerror("Missing control char name in \\c");
3960                     yyquit();   /* Are at end of input, no sense continuing */
3961                 }
3962 #ifdef EBCDIC
3963                 non_portable_endpoint++;
3964 #endif
3965                 break;
3966
3967             /* printf-style backslashes, formfeeds, newlines, etc */
3968             case 'b':
3969                 *d++ = '\b';
3970                 break;
3971             case 'n':
3972                 *d++ = '\n';
3973                 break;
3974             case 'r':
3975                 *d++ = '\r';
3976                 break;
3977             case 'f':
3978                 *d++ = '\f';
3979                 break;
3980             case 't':
3981                 *d++ = '\t';
3982                 break;
3983             case 'e':
3984                 *d++ = ESC_NATIVE;
3985                 break;
3986             case 'a':
3987                 *d++ = '\a';
3988                 break;
3989             } /* end switch */
3990
3991             s++;
3992             continue;
3993         } /* end if (backslash) */
3994
3995     default_action:
3996         /* Just copy the input to the output, though we may have to convert
3997          * to/from UTF-8.
3998          *
3999          * If the input has the same representation in UTF-8 as not, it will be
4000          * a single byte, and we don't care about UTF8ness; just copy the byte */
4001         if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4002             *d++ = *s++;
4003         }
4004         else if (! this_utf8 && ! has_utf8) {
4005             /* If neither source nor output is UTF-8, is also a single byte,
4006              * just copy it; but this byte counts should we later have to
4007              * convert to UTF-8 */
4008             *d++ = *s++;
4009             utf8_variant_count++;
4010         }
4011         else if (this_utf8 && has_utf8) {   /* Both UTF-8, can just copy */
4012             const STRLEN len = UTF8SKIP(s);
4013
4014             /* We expect the source to have already been checked for
4015              * malformedness */
4016             assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4017
4018             Copy(s, d, len, U8);
4019             d += len;
4020             s += len;
4021         }
4022         else { /* UTF8ness matters and doesn't match, need to convert */
4023             STRLEN len = 1;
4024             const UV nextuv   = (this_utf8)
4025                                 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
4026                                 : (UV) ((U8) *s);
4027             STRLEN need = UVCHR_SKIP(nextuv);
4028
4029             if (!has_utf8) {
4030                 SvCUR_set(sv, d - SvPVX_const(sv));
4031                 SvPOK_on(sv);
4032                 *d = '\0';
4033
4034                 /* See Note on sizing above. */
4035                 need += (STRLEN)(send - s) + 1;
4036
4037                 if (utf8_variant_count == 0) {
4038                     SvUTF8_on(sv);
4039                     d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4040                 }
4041                 else {
4042                     sv_utf8_upgrade_flags_grow(sv,
4043                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4044                                                need);
4045                     d = SvPVX(sv) + SvCUR(sv);
4046                 }
4047                 has_utf8 = TRUE;
4048             } else if (need > len) {
4049                 /* encoded value larger than old, may need extra space (NOTE:
4050                  * SvCUR() is not set correctly here).   See Note on sizing
4051                  * above.  */
4052                 const STRLEN extra = need + (send - s) + 1;
4053                 const STRLEN off = d - SvPVX_const(sv);
4054                 d = off + SvGROW(sv, off + extra);
4055             }
4056             s += len;
4057
4058             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
4059         }
4060     } /* while loop to process each character */
4061
4062     /* terminate the string and set up the sv */
4063     *d = '\0';
4064     SvCUR_set(sv, d - SvPVX_const(sv));
4065     if (SvCUR(sv) >= SvLEN(sv))
4066         Perl_croak(aTHX_ "panic: constant overflowed allocated space, %" UVuf
4067                    " >= %" UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
4068
4069     SvPOK_on(sv);
4070     if (has_utf8) {
4071         SvUTF8_on(sv);
4072         if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
4073             PL_parser->lex_sub_op->op_private |=
4074                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
4075         }
4076     }
4077
4078     /* shrink the sv if we allocated more than we used */
4079     if (SvCUR(sv) + 5 < SvLEN(sv)) {
4080         SvPV_shrink_to_cur(sv);
4081     }
4082
4083     /* return the substring (via pl_yylval) only if we parsed anything */
4084     if (s > start) {
4085         char *s2 = start;
4086         for (; s2 < s; s2++) {
4087             if (*s2 == '\n')
4088                 COPLINE_INC_WITH_HERELINES;
4089         }
4090         SvREFCNT_inc_simple_void_NN(sv);
4091         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4092             && ! PL_parser->lex_re_reparsing)
4093         {
4094             const char *const key = PL_lex_inpat ? "qr" : "q";
4095             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4096             const char *type;
4097             STRLEN typelen;
4098
4099             if (PL_lex_inwhat == OP_TRANS) {
4100                 type = "tr";
4101                 typelen = 2;
4102             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4103                 type = "s";
4104                 typelen = 1;
4105             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4106                 type = "q";
4107                 typelen = 1;
4108             } else  {
4109                 type = "qq";
4110                 typelen = 2;
4111             }
4112
4113             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4114                                 type, typelen);
4115         }
4116         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4117     }
4118     LEAVE_with_name("scan_const");
4119     return s;
4120 }
4121
4122 /* S_intuit_more
4123  * Returns TRUE if there's more to the expression (e.g., a subscript),
4124  * FALSE otherwise.
4125  *
4126  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4127  *
4128  * ->[ and ->{ return TRUE
4129  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4130  * { and [ outside a pattern are always subscripts, so return TRUE
4131  * if we're outside a pattern and it's not { or [, then return FALSE
4132  * if we're in a pattern and the first char is a {
4133  *   {4,5} (any digits around the comma) returns FALSE
4134  * if we're in a pattern and the first char is a [
4135  *   [] returns FALSE
4136  *   [SOMETHING] has a funky algorithm to decide whether it's a
4137  *      character class or not.  It has to deal with things like
4138  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4139  * anything else returns TRUE
4140  */
4141
4142 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4143
4144 STATIC int
4145 S_intuit_more(pTHX_ char *s, char *e)
4146 {
4147     PERL_ARGS_ASSERT_INTUIT_MORE;
4148
4149     if (PL_lex_brackets)
4150         return TRUE;
4151     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4152         return TRUE;
4153     if (*s == '-' && s[1] == '>'
4154      && FEATURE_POSTDEREF_QQ_IS_ENABLED
4155      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4156         ||(s[2] == '@' && strchr("*[{",s[3])) ))
4157         return TRUE;
4158     if (*s != '{' && *s != '[')
4159         return FALSE;
4160     if (!PL_lex_inpat)
4161         return TRUE;
4162
4163     /* In a pattern, so maybe we have {n,m}. */
4164     if (*s == '{') {
4165         if (regcurly(s)) {
4166             return FALSE;
4167         }
4168         return TRUE;
4169     }
4170
4171     /* On the other hand, maybe we have a character class */
4172
4173     s++;
4174     if (*s == ']' || *s == '^')
4175         return FALSE;
4176     else {
4177         /* this is terrifying, and it works */
4178         int weight;
4179         char seen[256];
4180         const char * const send = (char *) memchr(s, ']', e - s);
4181         unsigned char un_char, last_un_char;
4182         char tmpbuf[sizeof PL_tokenbuf * 4];
4183
4184         if (!send)              /* has to be an expression */
4185             return TRUE;
4186         weight = 2;             /* let's weigh the evidence */
4187
4188         if (*s == '$')
4189             weight -= 3;
4190         else if (isDIGIT(*s)) {
4191             if (s[1] != ']') {
4192                 if (isDIGIT(s[1]) && s[2] == ']')
4193                     weight -= 10;
4194             }
4195             else
4196                 weight -= 100;
4197         }
4198         Zero(seen,256,char);
4199         un_char = 255;
4200         for (; s < send; s++) {
4201             last_un_char = un_char;
4202             un_char = (unsigned char)*s;
4203             switch (*s) {
4204             case '@':
4205             case '&':
4206             case '$':
4207                 weight -= seen[un_char] * 10;
4208                 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4209                     int len;
4210                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4211                     len = (int)strlen(tmpbuf);
4212                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4213                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
4214                         weight -= 100;
4215                     else
4216                         weight -= 10;
4217                 }
4218                 else if (*s == '$'
4219                          && s[1]
4220                          && strchr("[#!%*<>()-=",s[1]))
4221                 {
4222                     if (/*{*/ strchr("])} =",s[2]))
4223                         weight -= 10;
4224                     else
4225                         weight -= 1;
4226                 }
4227                 break;
4228             case '\\':
4229                 un_char = 254;
4230                 if (s[1]) {
4231                     if (strchr("wds]",s[1]))
4232                         weight += 100;
4233                     else if (seen[(U8)'\''] || seen[(U8)'"'])
4234                         weight += 1;
4235                     else if (strchr("rnftbxcav",s[1]))
4236                         weight += 40;
4237                     else if (isDIGIT(s[1])) {
4238                         weight += 40;
4239                         while (s[1] && isDIGIT(s[1]))
4240                             s++;
4241                     }
4242                 }
4243                 else
4244                     weight += 100;
4245                 break;
4246             case '-':
4247                 if (s[1] == '\\')
4248                     weight += 50;
4249                 if (strchr("aA01! ",last_un_char))
4250                     weight += 30;
4251                 if (strchr("zZ79~",s[1]))
4252                     weight += 30;
4253                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4254                     weight -= 5;        /* cope with negative subscript */
4255                 break;
4256             default:
4257                 if (!isWORDCHAR(last_un_char)
4258                     && !(last_un_char == '$' || last_un_char == '@'
4259                          || last_un_char == '&')
4260                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4261                     char *d = s;
4262                     while (isALPHA(*s))
4263                         s++;
4264                     if (keyword(d, s - d, 0))
4265                         weight -= 150;
4266                 }
4267                 if (un_char == last_un_char + 1)
4268                     weight += 5;
4269                 weight -= seen[un_char];
4270                 break;
4271             }
4272             seen[un_char]++;
4273         }
4274         if (weight >= 0)        /* probably a character class */
4275             return FALSE;
4276     }
4277
4278     return TRUE;
4279 }
4280
4281 /*
4282  * S_intuit_method
4283  *
4284  * Does all the checking to disambiguate
4285  *   foo bar
4286  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4287  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4288  *
4289  * First argument is the stuff after the first token, e.g. "bar".
4290  *
4291  * Not a method if foo is a filehandle.
4292  * Not a method if foo is a subroutine prototyped to take a filehandle.
4293  * Not a method if it's really "Foo $bar"
4294  * Method if it's "foo $bar"
4295  * Not a method if it's really "print foo $bar"
4296  * Method if it's really "foo package::" (interpreted as package->foo)
4297  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4298  * Not a method if bar is a filehandle or package, but is quoted with
4299  *   =>
4300  */
4301
4302 STATIC int
4303 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4304 {
4305     char *s = start + (*start == '$');
4306     char tmpbuf[sizeof PL_tokenbuf];
4307     STRLEN len;
4308     GV* indirgv;
4309         /* Mustn't actually add anything to a symbol table.
4310            But also don't want to "initialise" any placeholder
4311            constants that might already be there into full
4312            blown PVGVs with attached PVCV.  */
4313     GV * const gv =
4314         ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4315
4316     PERL_ARGS_ASSERT_INTUIT_METHOD;
4317
4318     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4319             return 0;
4320     if (cv && SvPOK(cv)) {
4321         const char *proto = CvPROTO(cv);
4322         if (proto) {
4323             while (*proto && (isSPACE(*proto) || *proto == ';'))
4324                 proto++;
4325             if (*proto == '*')
4326                 return 0;
4327         }
4328     }
4329
4330     if (*start == '$') {
4331         SSize_t start_off = start - SvPVX(PL_linestr);
4332         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4333             || isUPPER(*PL_tokenbuf))
4334             return 0;
4335         /* this could be $# */
4336         if (isSPACE(*s))
4337             s = skipspace(s);
4338         PL_bufptr = SvPVX(PL_linestr) + start_off;
4339         PL_expect = XREF;
4340         return *s == '(' ? FUNCMETH : METHOD;
4341     }
4342
4343     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4344     /* start is the beginning of the possible filehandle/object,
4345      * and s is the end of it
4346      * tmpbuf is a copy of it (but with single quotes as double colons)
4347      */
4348
4349     if (!keyword(tmpbuf, len, 0)) {
4350         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4351             len -= 2;
4352             tmpbuf[len] = '\0';
4353             goto bare_package;
4354         }
4355         indirgv = gv_fetchpvn_flags(tmpbuf, len,
4356                                     GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4357                                     SVt_PVCV);
4358         if (indirgv && SvTYPE(indirgv) != SVt_NULL
4359          && (!isGV(indirgv) || GvCVu(indirgv)))
4360             return 0;
4361         /* filehandle or package name makes it a method */
4362         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4363             s = skipspace(s);
4364             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4365                 return 0;       /* no assumptions -- "=>" quotes bareword */
4366       bare_package:
4367             NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4368                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4369             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4370             PL_expect = XTERM;
4371             force_next(BAREWORD);
4372             PL_bufptr = s;
4373             return *s == '(' ? FUNCMETH : METHOD;
4374         }
4375     }
4376     return 0;
4377 }
4378
4379 /* Encoded script support. filter_add() effectively inserts a
4380  * 'pre-processing' function into the current source input stream.
4381  * Note that the filter function only applies to the current source file
4382  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4383  *
4384  * The datasv parameter (which may be NULL) can be used to pass
4385  * private data to this instance of the filter. The filter function
4386  * can recover the SV using the FILTER_DATA macro and use it to
4387  * store private buffers and state information.
4388  *
4389  * The supplied datasv parameter is upgraded to a PVIO type
4390  * and the IoDIRP/IoANY field is used to store the function pointer,
4391  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4392  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4393  * private use must be set using malloc'd pointers.
4394  */
4395
4396 SV *
4397 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4398 {
4399     if (!funcp)
4400         return NULL;
4401
4402     if (!PL_parser)
4403         return NULL;
4404
4405     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4406         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4407
4408     if (!PL_rsfp_filters)
4409         PL_rsfp_filters = newAV();
4410     if (!datasv)
4411         datasv = newSV(0);
4412     SvUPGRADE(datasv, SVt_PVIO);
4413     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4414     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4415     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4416                           FPTR2DPTR(void *, IoANY(datasv)),
4417                           SvPV_nolen(datasv)));
4418     av_unshift(PL_rsfp_filters, 1);
4419     av_store(PL_rsfp_filters, 0, datasv) ;
4420     if (
4421         !PL_parser->filtered
4422      && PL_parser->lex_flags & LEX_EVALBYTES
4423      && PL_bufptr < PL_bufend
4424     ) {
4425         const char *s = PL_bufptr;
4426         while (s < PL_bufend) {
4427             if (*s == '\n') {
4428                 SV *linestr = PL_parser->linestr;
4429                 char *buf = SvPVX(linestr);
4430                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4431                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4432                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4433                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4434                 STRLEN const last_uni_pos =
4435                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4436                 STRLEN const last_lop_pos =
4437                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4438                 av_push(PL_rsfp_filters, linestr);
4439                 PL_parser->linestr =
4440                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4441                 buf = SvPVX(PL_parser->linestr);
4442                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4443                 PL_parser->bufptr = buf + bufptr_pos;
4444                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4445                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4446                 PL_parser->linestart = buf + linestart_pos;
4447                 if (PL_parser->last_uni)
4448                     PL_parser->last_uni = buf + last_uni_pos;
4449                 if (PL_parser->last_lop)
4450                     PL_parser->last_lop = buf + last_lop_pos;
4451                 SvLEN_set(linestr, SvCUR(linestr));
4452                 SvCUR_set(linestr, s - SvPVX(linestr));
4453                 PL_parser->filtered = 1;
4454                 break;
4455             }
4456             s++;
4457         }
4458     }
4459     return(datasv);
4460 }
4461
4462
4463 /* Delete most recently added instance of this filter function. */
4464 void
4465 Perl_filter_del(pTHX_ filter_t funcp)
4466 {
4467     SV *datasv;
4468
4469     PERL_ARGS_ASSERT_FILTER_DEL;
4470
4471 #ifdef DEBUGGING
4472     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4473                           FPTR2DPTR(void*, funcp)));
4474 #endif
4475     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4476         return;
4477     /* if filter is on top of stack (usual case) just pop it off */
4478     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4479     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4480         sv_free(av_pop(PL_rsfp_filters));
4481
4482         return;
4483     }
4484     /* we need to search for the correct entry and clear it     */
4485     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4486 }
4487
4488
4489 /* Invoke the idxth filter function for the current rsfp.        */
4490 /* maxlen 0 = read one text line */
4491 I32
4492 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4493 {
4494     filter_t funcp;
4495     I32 ret;
4496     SV *datasv = NULL;
4497     /* This API is bad. It should have been using unsigned int for maxlen.
4498        Not sure if we want to change the API, but if not we should sanity
4499        check the value here.  */
4500     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4501
4502     PERL_ARGS_ASSERT_FILTER_READ;
4503
4504     if (!PL_parser || !PL_rsfp_filters)
4505         return -1;
4506     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4507         /* Provide a default input filter to make life easy.    */
4508         /* Note that we append to the line. This is handy.      */
4509         DEBUG_P(PerlIO_printf(Perl_debug_log,
4510                               "filter_read %d: from rsfp\n", idx));
4511         if (correct_length) {
4512             /* Want a block */
4513             int len ;
4514             const int old_len = SvCUR(buf_sv);
4515
4516             /* ensure buf_sv is large enough */
4517             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4518             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4519                                    correct_length)) <= 0) {
4520                 if (PerlIO_error(PL_rsfp))
4521                     return -1;          /* error */
4522                 else
4523                     return 0 ;          /* end of file */
4524             }
4525             SvCUR_set(buf_sv, old_len + len) ;
4526             SvPVX(buf_sv)[old_len + len] = '\0';
4527         } else {
4528             /* Want a line */
4529             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4530                 if (PerlIO_error(PL_rsfp))
4531                     return -1;          /* error */
4532                 else
4533                     return 0 ;          /* end of file */
4534             }
4535         }
4536         return SvCUR(buf_sv);
4537     }
4538     /* Skip this filter slot if filter has been deleted */
4539     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4540         DEBUG_P(PerlIO_printf(Perl_debug_log,
4541                               "filter_read %d: skipped (filter deleted)\n",
4542                               idx));
4543         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4544     }
4545     if (SvTYPE(datasv) != SVt_PVIO) {
4546         if (correct_length) {
4547             /* Want a block */
4548             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4549             if (!remainder) return 0; /* eof */
4550             if (correct_length > remainder) correct_length = remainder;
4551             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4552             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4553         } else {
4554             /* Want a line */
4555             const char *s = SvEND(datasv);
4556             const char *send = SvPVX(datasv) + SvLEN(datasv);
4557             while (s < send) {
4558                 if (*s == '\n') {
4559                     s++;
4560                     break;
4561                 }
4562                 s++;
4563             }
4564             if (s == send) return 0; /* eof */
4565             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4566             SvCUR_set(datasv, s-SvPVX(datasv));
4567         }
4568         return SvCUR(buf_sv);
4569     }
4570     /* Get function pointer hidden within datasv        */
4571     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4572     DEBUG_P(PerlIO_printf(Perl_debug_log,
4573                           "filter_read %d: via function %p (%s)\n",
4574                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4575     /* Call function. The function is expected to       */
4576     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4577     /* Return: <0:error, =0:eof, >0:not eof             */
4578     ENTER;
4579     save_scalar(PL_errgv);
4580     ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4581     LEAVE;
4582     return ret;
4583 }
4584
4585 STATIC char *
4586 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4587 {
4588     PERL_ARGS_ASSERT_FILTER_GETS;
4589
4590 #ifdef PERL_CR_FILTER
4591     if (!PL_rsfp_filters) {
4592         filter_add(S_cr_textfilter,NULL);
4593     }
4594 #endif
4595     if (PL_rsfp_filters) {
4596         if (!append)
4597             SvCUR_set(sv, 0);   /* start with empty line        */
4598         if (FILTER_READ(0, sv, 0) > 0)
4599             return ( SvPVX(sv) ) ;
4600         else
4601             return NULL ;
4602     }
4603     else
4604         return (sv_gets(sv, PL_rsfp, append));
4605 }
4606
4607 STATIC HV *
4608 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4609 {
4610     GV *gv;
4611
4612     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4613
4614     if (memEQs(pkgname, len, "__PACKAGE__"))
4615         return PL_curstash;
4616
4617     if (len > 2
4618         && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4619         && (gv = gv_fetchpvn_flags(pkgname,
4620                                    len,
4621                                    ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4622     {
4623         return GvHV(gv);                        /* Foo:: */
4624     }
4625
4626     /* use constant CLASS => 'MyClass' */
4627     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4628     if (gv && GvCV(gv)) {
4629         SV * const sv = cv_const_sv(GvCV(gv));
4630         if (sv)
4631             return gv_stashsv(sv, 0);
4632     }
4633
4634     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4635 }
4636
4637
4638 STATIC char *
4639 S_tokenize_use(pTHX_ int is_use, char *s) {
4640     PERL_ARGS_ASSERT_TOKENIZE_USE;
4641
4642     if (PL_expect != XSTATE)
4643         /* diag_listed_as: "use" not allowed in expression */
4644         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4645                     is_use ? "use" : "no"));
4646     PL_expect = XTERM;
4647     s = skipspace(s);
4648     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4649         s = force_version(s, TRUE);
4650         if (*s == ';' || *s == '}'
4651                 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4652             NEXTVAL_NEXTTOKE.opval = NULL;
4653             force_next(BAREWORD);
4654         }
4655         else if (*s == 'v') {
4656             s = force_word(s,BAREWORD,FALSE,TRUE);
4657             s = force_version(s, FALSE);
4658         }
4659     }
4660     else {
4661         s = force_word(s,BAREWORD,FALSE,TRUE);
4662         s = force_version(s, FALSE);
4663     }
4664     pl_yylval.ival = is_use;
4665     return s;
4666 }
4667 #ifdef DEBUGGING
4668     static const char* const exp_name[] =
4669         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4670           "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4671           "SIGVAR", "TERMORDORDOR"
4672         };
4673 #endif
4674
4675 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4676 STATIC bool
4677 S_word_takes_any_delimiter(char *p, STRLEN len)
4678 {
4679     return (len == 1 && strchr("msyq", p[0]))
4680             || (len == 2
4681                 && ((p[0] == 't' && p[1] == 'r')
4682                     || (p[0] == 'q' && strchr("qwxr", p[1]))));
4683 }
4684
4685 static void
4686 S_check_scalar_slice(pTHX_ char *s)
4687 {
4688     s++;
4689     while (SPACE_OR_TAB(*s)) s++;
4690     if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4691                                                              PL_bufend,
4692                                                              UTF))
4693     {
4694         return;
4695     }
4696     while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4697            || (*s && strchr(" \t$#+-'\"", *s)))
4698     {
4699         s += UTF ? UTF8SKIP(s) : 1;
4700     }
4701     if (*s == '}' || *s == ']')
4702         pl_yylval.ival = OPpSLICEWARNING;
4703 }
4704
4705 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4706 static void
4707 S_lex_token_boundary(pTHX)
4708 {
4709     PL_oldoldbufptr = PL_oldbufptr;
4710     PL_oldbufptr = PL_bufptr;
4711 }
4712
4713 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
4714 static char *
4715 S_vcs_conflict_marker(pTHX_ char *s)
4716 {
4717     lex_token_boundary();
4718     PL_bufptr = s;
4719     yyerror("Version control conflict marker");
4720     while (s < PL_bufend && *s != '\n')
4721         s++;
4722     return s;
4723 }
4724
4725 /*
4726   yylex
4727
4728   Works out what to call the token just pulled out of the input
4729   stream.  The yacc parser takes care of taking the ops we return and
4730   stitching them into a tree.
4731
4732   Returns:
4733     The type of the next token
4734
4735   Structure:
4736       Check if we have already built the token; if so, use it.
4737       Switch based on the current state:
4738           - if we have a case modifier in a string, deal with that
4739           - handle other cases of interpolation inside a string
4740           - scan the next line if we are inside a format
4741       In the normal state, switch on the next character:
4742           - default:
4743             if alphabetic, go to key lookup
4744             unrecognized character - croak
4745           - 0/4/26: handle end-of-line or EOF
4746           - cases for whitespace
4747           - \n and #: handle comments and line numbers
4748           - various operators, brackets and sigils
4749           - numbers
4750           - quotes
4751           - 'v': vstrings (or go to key lookup)
4752           - 'x' repetition operator (or go to key lookup)
4753           - other ASCII alphanumerics (key lookup begins here):
4754               word before => ?
4755               keyword plugin
4756               scan built-in keyword (but do nothing with it yet)
4757               check for statement label
4758               check for lexical subs
4759                   goto just_a_word if there is one
4760               see whether built-in keyword is overridden
4761               switch on keyword number:
4762                   - default: just_a_word:
4763                       not a built-in keyword; handle bareword lookup
4764                       disambiguate between method and sub call
4765                       fall back to bareword
4766                   - cases for built-in keywords
4767 */
4768
4769
4770 int
4771 Perl_yylex(pTHX)
4772 {
4773     dVAR;
4774     char *s = PL_bufptr;
4775     char *d;
4776     STRLEN len;
4777     bool bof = FALSE;
4778     const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4779     U8 formbrack = 0;
4780     U32 fake_eof = 0;
4781
4782     /* orig_keyword, gvp, and gv are initialized here because
4783      * jump to the label just_a_word_zero can bypass their
4784      * initialization later. */
4785     I32 orig_keyword = 0;
4786     GV *gv = NULL;
4787     GV **gvp = NULL;
4788
4789     if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
4790         const U8* first_bad_char_loc;
4791         if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
4792                                                         PL_bufend - PL_bufptr,
4793                                                         &first_bad_char_loc)))
4794         {
4795             _force_out_malformed_utf8_message(first_bad_char_loc,
4796                                               (U8 *) PL_bufend,
4797                                               0,
4798                                               1 /* 1 means die */ );
4799             NOT_REACHED; /* NOTREACHED */
4800         }
4801         PL_parser->recheck_utf8_validity = FALSE;
4802     }
4803     DEBUG_T( {
4804         SV* tmp = newSVpvs("");
4805         PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
4806             (IV)CopLINE(PL_curcop),
4807             lex_state_names[PL_lex_state],
4808             exp_name[PL_expect],
4809             pv_display(tmp, s, strlen(s), 0, 60));
4810         SvREFCNT_dec(tmp);
4811     } );
4812
4813     /* when we've already built the next token, just pull it out of the queue */
4814     if (PL_nexttoke) {
4815         PL_nexttoke--;
4816         pl_yylval = PL_nextval[PL_nexttoke];
4817         {
4818             I32 next_type;
4819             next_type = PL_nexttype[PL_nexttoke];
4820             if (next_type & (7<<24)) {
4821                 if (next_type & (1<<24)) {
4822                     if (PL_lex_brackets > 100)
4823                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4824                     PL_lex_brackstack[PL_lex_brackets++] =
4825                         (char) ((next_type >> 16) & 0xff);
4826                 }
4827                 if (next_type & (2<<24))
4828                     PL_lex_allbrackets++;
4829                 if (next_type & (4<<24))
4830                     PL_lex_allbrackets--;
4831                 next_type &= 0xffff;
4832             }
4833             return REPORT(next_type == 'p' ? pending_ident() : next_type);
4834         }
4835     }
4836
4837     switch (PL_lex_state) {
4838     case LEX_NORMAL:
4839     case LEX_INTERPNORMAL:
4840         break;
4841
4842     /* interpolated case modifiers like \L \U, including \Q and \E.
4843        when we get here, PL_bufptr is at the \
4844     */
4845     case LEX_INTERPCASEMOD:
4846 #ifdef DEBUGGING
4847         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4848             Perl_croak(aTHX_
4849                        "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4850                        PL_bufptr, PL_bufend, *PL_bufptr);
4851 #endif
4852         /* handle \E or end of string */
4853         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4854             /* if at a \E */
4855             if (PL_lex_casemods) {
4856                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4857                 PL_lex_casestack[PL_lex_casemods] = '\0';
4858
4859                 if (PL_bufptr != PL_bufend
4860                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4861                         || oldmod == 'F')) {
4862                     PL_bufptr += 2;
4863                     PL_lex_state = LEX_INTERPCONCAT;
4864                 }
4865                 PL_lex_allbrackets--;
4866                 return REPORT(')');
4867             }
4868             else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4869                /* Got an unpaired \E */
4870                Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4871                         "Useless use of \\E");
4872             }
4873             if (PL_bufptr != PL_bufend)
4874                 PL_bufptr += 2;
4875             PL_lex_state = LEX_INTERPCONCAT;
4876             return yylex();
4877         }
4878         else {
4879             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4880               "### Saw case modifier\n"); });
4881             s = PL_bufptr + 1;
4882             if (s[1] == '\\' && s[2] == 'E') {
4883                 PL_bufptr = s + 3;
4884                 PL_lex_state = LEX_INTERPCONCAT;
4885                 return yylex();
4886             }
4887             else {
4888                 I32 tmp;
4889                 if (   memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
4890                     || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
4891                 {
4892                     tmp = *s, *s = s[2], s[2] = (char)tmp;      /* misordered... */
4893                 }
4894                 if ((*s == 'L' || *s == 'U' || *s == 'F')
4895                     && (strpbrk(PL_lex_casestack, "LUF")))
4896                 {
4897                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4898                     PL_lex_allbrackets--;
4899                     return REPORT(')');
4900                 }
4901                 if (PL_lex_casemods > 10)
4902                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4903                 PL_lex_casestack[PL_lex_casemods++] = *s;
4904                 PL_lex_casestack[PL_lex_casemods] = '\0';
4905                 PL_lex_state = LEX_INTERPCONCAT;
4906                 NEXTVAL_NEXTTOKE.ival = 0;
4907                 force_next((2<<24)|'(');
4908                 if (*s == 'l')
4909                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4910                 else if (*s == 'u')
4911                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4912                 else if (*s == 'L')
4913                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4914                 else if (*s == 'U')
4915                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4916                 else if (*s == 'Q')
4917                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4918                 else if (*s == 'F')
4919                     NEXTVAL_NEXTTOKE.ival = OP_FC;
4920                 else
4921                     Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4922                 PL_bufptr = s + 1;
4923             }
4924             force_next(FUNC);
4925             if (PL_lex_starts) {
4926                 s = PL_bufptr;
4927                 PL_lex_starts = 0;
4928                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4929                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4930                     TOKEN(',');
4931                 else
4932                     AopNOASSIGN(OP_CONCAT);
4933             }
4934             else
4935                 return yylex();
4936         }
4937
4938     case LEX_INTERPPUSH:
4939         return REPORT(sublex_push());
4940
4941     case LEX_INTERPSTART:
4942         if (PL_bufptr == PL_bufend)
4943             return REPORT(sublex_done());
4944         DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4945               "### Interpolated variable\n"); });
4946         PL_expect = XTERM;
4947         /* for /@a/, we leave the joining for the regex engine to do
4948          * (unless we're within \Q etc) */
4949         PL_lex_dojoin = (*PL_bufptr == '@'
4950                             && (!PL_lex_inpat || PL_lex_casemods));
4951         PL_lex_state = LEX_INTERPNORMAL;
4952         if (PL_lex_dojoin) {
4953             NEXTVAL_NEXTTOKE.ival = 0;
4954             force_next(',');
4955             force_ident("\"", '$');
4956             NEXTVAL_NEXTTOKE.ival = 0;
4957             force_next('$');
4958             NEXTVAL_NEXTTOKE.ival = 0;
4959             force_next((2<<24)|'(');
4960             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4961             force_next(FUNC);
4962         }
4963         /* Convert (?{...}) and friends to 'do {...}' */
4964         if (PL_lex_inpat && *PL_bufptr == '(') {
4965             PL_parser->lex_shared->re_eval_start = PL_bufptr;
4966             PL_bufptr += 2;
4967             if (*PL_bufptr != '{')
4968                 PL_bufptr++;
4969             PL_expect = XTERMBLOCK;
4970             force_next(DO);
4971         }
4972
4973         if (PL_lex_starts++) {
4974             s = PL_bufptr;
4975             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4976             if (!PL_lex_casemods && PL_lex_inpat)
4977                 TOKEN(',');
4978             else
4979                 AopNOASSIGN(OP_CONCAT);
4980         }
4981         return yylex();
4982
4983     case LEX_INTERPENDMAYBE:
4984         if (intuit_more(PL_bufptr, PL_bufend)) {
4985             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4986             break;
4987         }
4988         /* FALLTHROUGH */
4989
4990     case LEX_INTERPEND:
4991         if (PL_lex_dojoin) {
4992             const U8 dojoin_was = PL_lex_dojoin;
4993             PL_lex_dojoin = FALSE;
4994             PL_lex_state = LEX_INTERPCONCAT;
4995             PL_lex_allbrackets--;
4996             return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN);
4997         }
4998         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4999             && SvEVALED(PL_lex_repl))
5000         {
5001             if (PL_bufptr != PL_bufend)
5002                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
5003             PL_lex_repl = NULL;
5004         }
5005         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
5006            re_eval_str.  If the here-doc body’s length equals the previous
5007            value of re_eval_start, re_eval_start will now be null.  So
5008            check re_eval_str as well. */
5009         if (PL_parser->lex_shared->re_eval_start
5010          || PL_parser->lex_shared->re_eval_str) {
5011             SV *sv;
5012             if (*PL_bufptr != ')')
5013                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5014             PL_bufptr++;
5015             /* having compiled a (?{..}) expression, return the original
5016              * text too, as a const */
5017             if (PL_parser->lex_shared->re_eval_str) {
5018                 sv = PL_parser->lex_shared->re_eval_str;
5019                 PL_parser->lex_shared->re_eval_str = NULL;
5020                 SvCUR_set(sv,
5021                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
5022                 SvPV_shrink_to_cur(sv);
5023             }
5024             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5025                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
5026             NEXTVAL_NEXTTOKE.opval =
5027                     newSVOP(OP_CONST, 0,
5028                                  sv);
5029             force_next(THING);
5030             PL_parser->lex_shared->re_eval_start = NULL;
5031             PL_expect = XTERM;
5032             return REPORT(',');
5033         }
5034
5035         /* FALLTHROUGH */
5036     case LEX_INTERPCONCAT:
5037 #ifdef DEBUGGING
5038         if (PL_lex_brackets)
5039             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5040                        (long) PL_lex_brackets);
5041 #endif
5042         if (PL_bufptr == PL_bufend)
5043             return REPORT(sublex_done());
5044
5045         /* m'foo' still needs to be parsed for possible (?{...}) */
5046         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5047             SV *sv = newSVsv(PL_linestr);
5048             sv = tokeq(sv);
5049             pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
5050             s = PL_bufend;
5051         }
5052         else {
5053             int save_error_count = PL_error_count;
5054
5055             s = scan_const(PL_bufptr);
5056
5057             /* Set flag if this was a pattern and there were errors.  op.c will
5058              * refuse to compile a pattern with this flag set.  Otherwise, we
5059              * could get segfaults, etc. */
5060             if (PL_lex_inpat && PL_error_count > save_error_count) {
5061                 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
5062             }
5063             if (*s == '\\')
5064                 PL_lex_state = LEX_INTERPCASEMOD;
5065             else
5066                 PL_lex_state = LEX_INTERPSTART;
5067         }
5068
5069         if (s != PL_bufptr) {
5070             NEXTVAL_NEXTTOKE = pl_yylval;
5071             PL_expect = XTERM;
5072             force_next(THING);
5073             if (PL_lex_starts++) {
5074                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5075                 if (!PL_lex_casemods && PL_lex_inpat)
5076                     TOKEN(',');
5077                 else
5078                     AopNOASSIGN(OP_CONCAT);
5079             }
5080             else {
5081                 PL_bufptr = s;
5082                 return yylex();
5083             }
5084         }
5085
5086         return yylex();
5087     case LEX_FORMLINE:
5088         assert(PL_lex_formbrack);
5089         s = scan_formline(PL_bufptr);
5090         if (!PL_lex_formbrack)
5091         {
5092             formbrack = 1;
5093             goto rightbracket;
5094         }
5095         PL_bufptr = s;
5096         return yylex();
5097     }
5098
5099     /* We really do *not* want PL_linestr ever becoming a COW. */
5100     assert (!SvIsCOW(PL_linestr));
5101     s = PL_bufptr;
5102     PL_oldoldbufptr = PL_oldbufptr;
5103     PL_oldbufptr = s;
5104     PL_parser->saw_infix_sigil = 0;
5105
5106     if (PL_in_my == KEY_sigvar) {
5107         /* we expect the sigil and optional var name part of a
5108          * signature element here. Since a '$' is not necessarily
5109          * followed by a var name, handle it specially here; the general
5110          * yylex code would otherwise try to interpret whatever follows
5111          * as a var; e.g. ($, ...) would be seen as the var '$,'
5112          */
5113
5114         U8 sigil;
5115
5116         s = skipspace(s);
5117         sigil = *s++;
5118         PL_bufptr = s; /* for error reporting */
5119         switch (sigil) {
5120         case '$':
5121         case '@':
5122         case '%':
5123             /* spot stuff that looks like an prototype */
5124             if (strchr("$:@%&*;\\[]", *s)) {
5125                 yyerror("Illegal character following sigil in a subroutine signature");
5126                 break;
5127             }
5128             /* '$#' is banned, while '$ # comment' isn't */
5129             if (*s == '#') {
5130                 yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
5131                 break;
5132             }
5133             s = skipspace(s);
5134             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5135                 char *dest = PL_tokenbuf + 1;
5136                 /* read var name, including sigil, into PL_tokenbuf */
5137                 PL_tokenbuf[0] = sigil;
5138                 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
5139                     0, cBOOL(UTF), FALSE, FALSE);
5140                 *dest = '\0';
5141                 assert(PL_tokenbuf[1]); /* we have a variable name */
5142             }
5143             else {
5144                 *PL_tokenbuf = 0;
5145                 PL_in_my = 0;
5146             }
5147
5148             s = skipspace(s);
5149             /* parse the = for the default ourselves to avoid '+=' etc being accepted here
5150              * as the ASSIGNOP, and exclude other tokens that start with =
5151              */
5152             if (*s == '=' && (!s[1] || strchr("=~>", s[1]) == 0)) {
5153                 /* save now to report with the same context as we did when
5154                  * all ASSIGNOPS were accepted */
5155                 PL_oldbufptr = s;
5156
5157                 ++s;
5158                 NEXTVAL_NEXTTOKE.ival = 0;
5159                 force_next(ASSIGNOP);
5160                 PL_expect = XTERM;
5161             }
5162             else if (*s == ',' || *s == ')') {
5163                 PL_expect = XOPERATOR;
5164             }
5165             else {
5166                 /* make sure the context shows the unexpected character and
5167                  * hopefully a bit more */
5168                 if (*s) ++s;
5169                 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5170                     s++;
5171                 PL_bufptr = s; /* for error reporting */
5172                 yyerror("Illegal operator following parameter in a subroutine signature");
5173                 PL_in_my = 0;
5174             }
5175             if (*PL_tokenbuf) {
5176                 NEXTVAL_NEXTTOKE.ival = sigil;
5177                 force_next('p'); /* force a signature pending identifier */
5178             }
5179             break;
5180
5181         case ')':
5182             PL_expect = XBLOCK;
5183             break;
5184         case ',': /* handle ($a,,$b) */
5185             break;
5186
5187         default:
5188             PL_in_my = 0;
5189             yyerror("A signature parameter must start with '$', '@' or '%'");
5190             /* very crude error recovery: skip to likely next signature
5191              * element */
5192             while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5193                 s++;
5194             break;
5195         }
5196         TOKEN(sigil);
5197     }
5198
5199   retry:
5200     switch (*s) {
5201     default:
5202         if (UTF) {
5203             if (isIDFIRST_utf8_safe(s, PL_bufend)) {
5204                 goto keylookup;
5205             }
5206         }
5207         else if (isALNUMC(*s)) {
5208             goto keylookup;
5209         }
5210     {
5211         SV *dsv = newSVpvs_flags("", SVs_TEMP);
5212         const char *c;
5213         if (UTF) {
5214             STRLEN skiplen = UTF8SKIP(s);
5215             STRLEN stravail = PL_bufend - s;
5216             c = sv_uni_display(dsv, newSVpvn_flags(s,
5217                                                    skiplen > stravail ? stravail : skiplen,
5218                                                    SVs_TEMP | SVf_UTF8),
5219                                10, UNI_DISPLAY_ISPRINT);
5220         }
5221         else {
5222             c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5223         }
5224
5225         if (s >= PL_linestart) {
5226             d = PL_linestart;
5227         }
5228         else {
5229             /* somehow (probably due to a parse failure), PL_linestart has advanced
5230              * pass PL_bufptr, get a reasonable beginning of line
5231              */
5232             d = s;
5233             while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
5234                 --d;
5235         }
5236         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
5237         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5238             d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
5239         }
5240
5241         Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
5242                           UTF8fARG(UTF, (s - d), d),
5243                          (int) len + 1);
5244     }
5245     case 4:
5246     case 26:
5247         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
5248     case 0:
5249         if ((!PL_rsfp || PL_lex_inwhat)
5250          && (!PL_parser->filtered || s+1 < PL_bufend)) {
5251             PL_last_uni = 0;
5252             PL_last_lop = 0;
5253             if (PL_lex_brackets
5254                 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
5255             {
5256                 yyerror((const char *)
5257                         (PL_lex_formbrack
5258                          ? "Format not terminated"
5259                          : "Missing right curly or square bracket"));
5260             }
5261             DEBUG_T( { PerlIO_printf(Perl_debug_log,
5262                         "### Tokener got EOF\n");
5263             } );
5264             TOKEN(0);
5265         }
5266         if (s++ < PL_bufend)
5267             goto retry;                 /* ignore stray nulls */
5268         PL_last_uni = 0;
5269         PL_last_lop = 0;
5270         if (!PL_in_eval && !PL_preambled) {
5271             PL_preambled = TRUE;
5272             if (PL_perldb) {
5273                 /* Generate a string of Perl code to load the debugger.
5274                  * If PERL5DB is set, it will return the contents of that,
5275                  * otherwise a compile-time require of perl5db.pl.  */
5276
5277                 const char * const pdb = PerlEnv_getenv("PERL5DB");
5278
5279                 if (pdb) {
5280                     sv_setpv(PL_linestr, pdb);
5281                     sv_catpvs(PL_linestr,";");
5282                 } else {
5283                     SETERRNO(0,SS_NORMAL);
5284                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5285                 }
5286                 PL_parser->preambling = CopLINE(PL_curcop);
5287             } else
5288                 SvPVCLEAR(PL_linestr);
5289             if (PL_preambleav) {
5290                 SV **svp = AvARRAY(PL_preambleav);
5291                 SV **const end = svp + AvFILLp(PL_preambleav);
5292                 while(svp <= end) {
5293                     sv_catsv(PL_linestr, *svp);
5294                     ++svp;
5295                     sv_catpvs(PL_linestr, ";");
5296                 }
5297                 sv_free(MUTABLE_SV(PL_preambleav));
5298                 PL_preambleav = NULL;
5299             }
5300             if (PL_minus_E)
5301                 sv_catpvs(PL_linestr,
5302                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5303             if (PL_minus_n || PL_minus_p) {
5304                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5305                 if (PL_minus_l)
5306                     sv_catpvs(PL_linestr,"chomp;");
5307                 if (PL_minus_a) {
5308                     if (PL_minus_F) {
5309                         if (   (   *PL_splitstr == '/'
5310                                 || *PL_splitstr == '\''
5311                                 || *PL_splitstr == '"')
5312                             && strchr(PL_splitstr + 1, *PL_splitstr))
5313                         {
5314                             /* strchr is ok, because -F pattern can't contain
5315                              * embeddded NULs */
5316                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5317                         }
5318                         else {
5319                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5320                                bytes can be used as quoting characters.  :-) */
5321                             const char *splits = PL_splitstr;
5322                             sv_catpvs(PL_linestr, "our @F=split(q\0");
5323                             do {
5324                                 /* Need to \ \s  */
5325                                 if (*splits == '\\')
5326                                     sv_catpvn(PL_linestr, splits, 1);
5327                                 sv_catpvn(PL_linestr, splits, 1);
5328                             } while (*splits++);
5329                             /* This loop will embed the trailing NUL of
5330                                PL_linestr as the last thing it does before
5331                                terminating.  */
5332                             sv_catpvs(PL_linestr, ");");
5333                         }
5334                     }
5335                     else
5336                         sv_catpvs(PL_linestr,"our @F=split(' ');");
5337                 }
5338             }
5339             sv_catpvs(PL_linestr, "\n");
5340             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5341             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5342             PL_last_lop = PL_last_uni = NULL;
5343             if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
5344                 update_debugger_info(PL_linestr, NULL, 0);
5345             goto retry;
5346         }
5347         do {
5348             fake_eof = 0;
5349             bof = cBOOL(PL_rsfp);
5350             if (0) {
5351               fake_eof:
5352                 fake_eof = LEX_FAKE_EOF;
5353             }
5354             PL_bufptr = PL_bufend;
5355             COPLINE_INC_WITH_HERELINES;
5356             if (!lex_next_chunk(fake_eof)) {
5357                 CopLINE_dec(PL_curcop);
5358                 s = PL_bufptr;
5359                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
5360             }
5361             CopLINE_dec(PL_curcop);
5362             s = PL_bufptr;
5363             /* If it looks like the start of a BOM or raw UTF-16,
5364              * check if it in fact is. */
5365             if (bof && PL_rsfp
5366                 && (   *s == 0
5367                     || *(U8*)s == BOM_UTF8_FIRST_BYTE
5368                     || *(U8*)s >= 0xFE
5369                     || s[1] == 0))
5370             {
5371                 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5372                 bof = (offset == (Off_t)SvCUR(PL_linestr));
5373 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5374                 /* offset may include swallowed CR */
5375                 if (!bof)
5376                     bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5377 #endif
5378                 if (bof) {
5379                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5380                     s = swallow_bom((U8*)s);
5381                 }
5382             }
5383             if (PL_parser->in_pod) {
5384                 /* Incest with pod. */
5385                 if (    memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
5386                     && !isALPHA(s[4]))
5387                 {
5388                     SvPVCLEAR(PL_linestr);
5389                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5390                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5391                     PL_last_lop = PL_last_uni = NULL;
5392                     PL_parser->in_pod = 0;
5393                 }
5394             }
5395             if (PL_rsfp || PL_parser->filtered)
5396                 incline(s, PL_bufend);
5397         } while (PL_parser->in_pod);
5398         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5399         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5400         PL_last_lop = PL_last_uni = NULL;
5401         if (CopLINE(PL_curcop) == 1) {
5402             while (s < PL_bufend && isSPACE(*s))
5403                 s++;
5404             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5405                 s++;
5406             d = NULL;
5407             if (!PL_in_eval) {
5408                 if (*s == '#' && *(s+1) == '!')
5409                     d = s + 2;
5410 #ifdef ALTERNATE_SHEBANG
5411                 else {
5412                     static char const as[] = ALTERNATE_SHEBANG;
5413                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5414                         d = s + (sizeof(as) - 1);
5415                 }
5416 #endif /* ALTERNATE_SHEBANG */
5417             }
5418             if (d) {
5419                 char *ipath;
5420                 char *ipathend;
5421
5422                 while (isSPACE(*d))
5423                     d++;
5424                 ipath = d;
5425                 while (*d && !isSPACE(*d))
5426                     d++;
5427                 ipathend = d;
5428
5429 #ifdef ARG_ZERO_IS_SCRIPT
5430                 if (ipathend > ipath) {
5431                     /*
5432                      * HP-UX (at least) sets argv[0] to the script name,
5433                      * which makes $^X incorrect.  And Digital UNIX and Linux,
5434                      * at least, set argv[0] to the basename of the Perl
5435                      * interpreter. So, having found "#!", we'll set it right.
5436                      */
5437                     SV* copfilesv = CopFILESV(PL_curcop);
5438                     if (copfilesv) {
5439                         SV * const x =
5440                             GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5441                                              SVt_PV)); /* $^X */
5442                         assert(SvPOK(x) || SvGMAGICAL(x));
5443                         if (sv_eq(x, copfilesv)) {
5444                             sv_setpvn(x, ipath, ipathend - ipath);
5445                             SvSETMAGIC(x);
5446                         }
5447                         else {
5448                             STRLEN blen;
5449                             STRLEN llen;
5450                             const char *bstart = SvPV_const(copfilesv, blen);
5451                             const char * const lstart = SvPV_const(x, llen);
5452                             if (llen < blen) {
5453                                 bstart += blen - llen;
5454                                 if (strnEQ(bstart, lstart, llen) &&     bstart[-1] == '/') {
5455                                     sv_setpvn(x, ipath, ipathend - ipath);
5456                                     SvSETMAGIC(x);
5457                                 }
5458                             }
5459                         }
5460                     }
5461                     else {
5462                         /* Anything to do if no copfilesv? */
5463                     }
5464                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
5465                 }
5466 #endif /* ARG_ZERO_IS_SCRIPT */
5467
5468                 /*
5469                  * Look for options.
5470                  */
5471                 d = instr(s,"perl -");
5472                 if (!d) {
5473                     d = instr(s,"perl");
5474 #if defined(DOSISH)
5475                     /* avoid getting into infinite loops when shebang
5476                      * line contains "Perl" rather than "perl" */
5477                     if (!d) {
5478                         for (d = ipathend-4; d >= ipath; --d) {
5479                             if (isALPHA_FOLD_EQ(*d, 'p')
5480                                 && !ibcmp(d, "perl", 4))
5481                             {
5482                                 break;
5483                             }
5484                         }
5485                         if (d < ipath)
5486                             d = NULL;
5487                     }
5488 #endif
5489                 }
5490 #ifdef ALTERNATE_SHEBANG
5491                 /*
5492                  * If the ALTERNATE_SHEBANG on this system starts with a
5493                  * character that can be part of a Perl expression, then if
5494                  * we see it but not "perl", we're probably looking at the
5495                  * start of Perl code, not a request to hand off to some
5496                  * other interpreter.  Similarly, if "perl" is there, but
5497                  * not in the first 'word' of the line, we assume the line
5498                  * contains the start of the Perl program.
5499                  */
5500                 if (d && *s != '#') {
5501                     const char *c = ipath;
5502                     while (*c && !strchr("; \t\r\n\f\v#", *c))
5503                         c++;
5504                     if (c < d)
5505                         d = NULL;       /* "perl" not in first word; ignore */
5506                     else
5507                         *s = '#';       /* Don't try to parse shebang line */
5508                 }
5509 #endif /* ALTERNATE_SHEBANG */
5510                 if (!d
5511                     && *s == '#'
5512                     && ipathend > ipath
5513                     && !PL_minus_c
5514                     && !instr(s,"indir")
5515                     && instr(PL_origargv[0],"perl"))
5516                 {
5517                     dVAR;
5518                     char **newargv;
5519
5520                     *ipathend = '\0';
5521                     s = ipathend + 1;
5522                     while (s < PL_bufend && isSPACE(*s))
5523                         s++;
5524                     if (s < PL_bufend) {
5525                         Newx(newargv,PL_origargc+3,char*);
5526                         newargv[1] = s;
5527                         while (s < PL_bufend && !isSPACE(*s))
5528                             s++;
5529                         *s = '\0';
5530                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5531                     }
5532                     else
5533                         newargv = PL_origargv;
5534                     newargv[0] = ipath;
5535                     PERL_FPU_PRE_EXEC
5536                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5537                     PERL_FPU_POST_EXEC
5538                     Perl_croak(aTHX_ "Can't exec %s", ipath);
5539                 }
5540                 if (d) {
5541                     while (*d && !isSPACE(*d))
5542                         d++;
5543                     while (SPACE_OR_TAB(*d))
5544                         d++;
5545
5546                     if (*d++ == '-') {
5547                         const bool switches_done = PL_doswitches;
5548                         const U32 oldpdb = PL_perldb;
5549                         const bool oldn = PL_minus_n;
5550                         const bool oldp = PL_minus_p;
5551                         const char *d1 = d;
5552
5553                         do {
5554                             bool baduni = FALSE;
5555                             if (*d1 == 'C') {
5556                                 const char *d2 = d1 + 1;
5557                                 if (parse_unicode_opts((const char **)&d2)
5558                                     != PL_unicode)
5559                                     baduni = TRUE;
5560                             }
5561                             if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
5562                                 const char * const m = d1;
5563                                 while (*d1 && !isSPACE(*d1))
5564                                     d1++;
5565                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5566                                       (int)(d1 - m), m);
5567                             }
5568                             d1 = moreswitches(d1);
5569                         } while (d1);
5570                         if (PL_doswitches && !switches_done) {
5571                             int argc = PL_origargc;
5572                             char **argv = PL_origargv;
5573                             do {
5574                                 argc--,argv++;
5575                             } while (argc && argv[0][0] == '-' && argv[0][1]);
5576                             init_argv_symbols(argc,argv);
5577                         }
5578                         if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
5579                             || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5580                               /* if we have already added "LINE: while (<>) {",
5581                                  we must not do it again */
5582                         {
5583                             SvPVCLEAR(PL_linestr);
5584                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5585                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5586                             PL_last_lop = PL_last_uni = NULL;
5587                             PL_preambled = FALSE;
5588                             if (PERLDB_LINE_OR_SAVESRC)
5589                                 (void)gv_fetchfile(PL_origfilename);
5590                             goto retry;
5591                         }
5592                     }
5593                 }
5594             }
5595         }
5596         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5597             PL_lex_state = LEX_FORMLINE;
5598             force_next(FORMRBRACK);
5599             TOKEN(';');
5600         }
5601         goto retry;
5602     case '\r':
5603 #ifdef PERL_STRICT_CR
5604         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5605         Perl_croak(aTHX_
5606       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5607 #endif
5608     case ' ': case '\t': case '\f': case '\v':
5609         s++;
5610         goto retry;
5611     case '#':
5612     case '\n':
5613         if (PL_lex_state != LEX_NORMAL
5614             || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
5615         {
5616             const bool in_comment = *s == '#';
5617             if (*s == '#' && s == PL_linestart && PL_in_eval
5618              && !PL_rsfp && !PL_parser->filtered) {
5619                 /* handle eval qq[#line 1 "foo"\n ...] */
5620                 CopLINE_dec(PL_curcop);
5621                 incline(s, PL_bufend);
5622             }
5623             d = s;
5624             while (d < PL_bufend && *d != '\n')
5625                 d++;
5626             if (d < PL_bufend)
5627                 d++;
5628             s = d;
5629             if (in_comment && d == PL_bufend
5630                 && PL_lex_state == LEX_INTERPNORMAL
5631                 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5632                 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5633             else
5634                 incline(s, PL_bufend);
5635             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5636                 PL_lex_state = LEX_FORMLINE;
5637                 force_next(FORMRBRACK);
5638                 TOKEN(';');
5639             }
5640         }
5641         else {
5642             while (s < PL_bufend && *s != '\n')
5643                 s++;
5644             if (s < PL_bufend)
5645                 {
5646                     s++;
5647                     if (s < PL_bufend)
5648                         incline(s, PL_bufend);
5649                 }
5650         }
5651         goto retry;
5652     case '-':
5653         if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5654             I32 ftst = 0;
5655             char tmp;
5656
5657             s++;
5658             PL_bufptr = s;
5659             tmp = *s++;
5660
5661             while (s < PL_bufend && SPACE_OR_TAB(*s))
5662                 s++;
5663
5664             if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5665                 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5666                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5667                 OPERATOR('-');          /* unary minus */
5668             }
5669             switch (tmp) {
5670             case 'r': ftst = OP_FTEREAD;        break;
5671             case 'w': ftst = OP_FTEWRITE;       break;
5672             case 'x': ftst = OP_FTEEXEC;        break;
5673             case 'o': ftst = OP_FTEOWNED;       break;
5674             case 'R': ftst = OP_FTRREAD;        break;
5675             case 'W': ftst = OP_FTRWRITE;       break;
5676             case 'X': ftst = OP_FTREXEC;        break;
5677             case 'O': ftst = OP_FTROWNED;       break;
5678             case 'e': ftst = OP_FTIS;           break;
5679             case 'z': ftst = OP_FTZERO;         break;
5680             case 's': ftst = OP_FTSIZE;         break;
5681             case 'f': ftst = OP_FTFILE;         break;
5682             case 'd': ftst = OP_FTDIR;          break;
5683             case 'l': ftst = OP_FTLINK;         break;
5684             case 'p': ftst = OP_FTPIPE;         break;
5685             case 'S': ftst = OP_FTSOCK;         break;
5686             case 'u': ftst = OP_FTSUID;         break;
5687             case 'g': ftst = OP_FTSGID;         break;
5688             case 'k': ftst = OP_FTSVTX;         break;
5689             case 'b': ftst = OP_FTBLK;          break;
5690             case 'c': ftst = OP_FTCHR;          break;
5691             case 't': ftst = OP_FTTTY;          break;
5692             case 'T': ftst = OP_FTTEXT;         break;
5693             case 'B': ftst = OP_FTBINARY;       break;
5694             case 'M': case 'A': case 'C':
5695                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5696                 switch (tmp) {
5697                 case 'M': ftst = OP_FTMTIME;    break;
5698                 case 'A': ftst = OP_FTATIME;    break;
5699                 case 'C': ftst = OP_FTCTIME;    break;
5700                 default:                        break;
5701                 }
5702                 break;
5703             default:
5704                 break;
5705             }
5706             if (ftst) {
5707                 PL_last_uni = PL_oldbufptr;
5708                 PL_last_lop_op = (OPCODE)ftst;
5709                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5710                         "### Saw file test %c\n", (int)tmp);
5711                 } );
5712                 FTST(ftst);
5713             }
5714             else {
5715                 /* Assume it was a minus followed by a one-letter named
5716                  * subroutine call (or a -bareword), then. */
5717                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5718                         "### '-%c' looked like a file test but was not\n",
5719                         (int) tmp);
5720                 } );
5721                 s = --PL_bufptr;
5722             }
5723         }
5724         {
5725             const char tmp = *s++;
5726             if (*s == tmp) {
5727                 s++;
5728                 if (PL_expect == XOPERATOR)
5729                     TERM(POSTDEC);
5730                 else
5731                     OPERATOR(PREDEC);
5732             }
5733             else if (*s == '>') {
5734                 s++;
5735                 s = skipspace(s);
5736                 if (((*s == '$' || *s == '&') && s[1] == '*')
5737                   ||(*s == '$' && s[1] == '#' && s[2] == '*')
5738                   ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5739                   ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5740                  )
5741                 {
5742                     PL_expect = XPOSTDEREF;
5743                     TOKEN(ARROW);
5744                 }
5745                 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5746                     s = force_word(s,METHOD,FALSE,TRUE);
5747                     TOKEN(ARROW);
5748                 }
5749                 else if (*s == '$')
5750                     OPERATOR(ARROW);
5751                 else
5752                     TERM(ARROW);
5753             }
5754             if (PL_expect == XOPERATOR) {
5755                 if (*s == '='
5756                     && !PL_lex_allbrackets
5757                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5758                 {
5759                     s--;
5760                     TOKEN(0);
5761                 }
5762                 Aop(OP_SUBTRACT);
5763             }
5764             else {
5765                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5766                     check_uni();
5767                 OPERATOR('-');          /* unary minus */
5768             }
5769         }
5770
5771     case '+':
5772         {
5773             const char tmp = *s++;
5774             if (*s == tmp) {
5775                 s++;
5776                 if (PL_expect == XOPERATOR)
5777                     TERM(POSTINC);
5778                 else
5779                     OPERATOR(PREINC);
5780             }
5781             if (PL_expect == XOPERATOR) {
5782                 if (*s == '='
5783                     && !PL_lex_allbrackets
5784                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5785                 {
5786                     s--;
5787                     TOKEN(0);
5788                 }
5789                 Aop(OP_ADD);
5790             }
5791             else {
5792                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5793                     check_uni();
5794                 OPERATOR('+');
5795             }
5796         }
5797
5798     case '*':
5799         if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5800         if (PL_expect != XOPERATOR) {
5801             s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5802             PL_expect = XOPERATOR;
5803             force_ident(PL_tokenbuf, '*');
5804             if (!*PL_tokenbuf)
5805                 PREREF('*');
5806             TERM('*');
5807         }
5808         s++;
5809         if (*s == '*') {
5810             s++;
5811             if (*s == '=' && !PL_lex_allbrackets
5812                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5813             {
5814                 s -= 2;
5815                 TOKEN(0);
5816             }
5817             PWop(OP_POW);
5818         }
5819         if (*s == '='
5820             && !PL_lex_allbrackets
5821             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5822         {
5823             s--;
5824             TOKEN(0);
5825         }
5826         PL_parser->saw_infix_sigil = 1;
5827         Mop(OP_MULTIPLY);
5828
5829     case '%':
5830     {
5831         if (PL_expect == XOPERATOR) {
5832             if (s[1] == '='
5833                 && !PL_lex_allbrackets
5834                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5835             {
5836                 TOKEN(0);
5837             }
5838             ++s;
5839             PL_parser->saw_infix_sigil = 1;
5840             Mop(OP_MODULO);
5841         }
5842         else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5843         PL_tokenbuf[0] = '%';
5844         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5845         pl_yylval.ival = 0;
5846         if (!PL_tokenbuf[1]) {
5847             PREREF('%');
5848         }
5849         if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5850             && intuit_more(s, PL_bufend)) {
5851             if (*s == '[')
5852                 PL_tokenbuf[0] = '@';
5853         }
5854         PL_expect = XOPERATOR;
5855         force_ident_maybe_lex('%');
5856         TERM('%');
5857     }
5858     case '^':
5859         d = s;
5860         bof = FEATURE_BITWISE_IS_ENABLED;
5861         if (bof && s[1] == '.')
5862             s++;
5863         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5864                 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5865         {
5866             s = d;
5867             TOKEN(0);
5868         }
5869         s++;
5870         BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5871     case '[':
5872         if (PL_lex_brackets > 100)
5873             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5874         PL_lex_brackstack[PL_lex_brackets++] = 0;
5875         PL_lex_allbrackets++;
5876         {
5877             const char tmp = *s++;
5878             OPERATOR(tmp);
5879         }
5880     case '~':
5881         if (s[1] == '~'
5882             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5883         {
5884             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5885                 TOKEN(0);
5886             s += 2;
5887             Perl_ck_warner_d(aTHX_
5888                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5889                 "Smartmatch is experimental");
5890             Eop(OP_SMARTMATCH);
5891         }
5892         s++;
5893         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
5894             s++;
5895             BCop(OP_SCOMPLEMENT);
5896         }
5897         BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
5898     case ',':
5899         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5900             TOKEN(0);
5901         s++;
5902         OPERATOR(',');
5903     case ':':
5904         if (s[1] == ':') {
5905             len = 0;
5906             goto just_a_word_zero_gv;
5907         }
5908         s++;
5909         {
5910         OP *attrs;
5911
5912         switch (PL_expect) {
5913         case XOPERATOR:
5914             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5915                 break;
5916             PL_bufptr = s;      /* update in case we back off */
5917             if (*s == '=') {
5918                 Perl_croak(aTHX_
5919                            "Use of := for an empty attribute list is not allowed");
5920             }
5921             goto grabattrs;
5922         case XATTRBLOCK:
5923             PL_expect = XBLOCK;
5924             goto grabattrs;
5925         case XATTRTERM:
5926             PL_expect = XTERMBLOCK;
5927          grabattrs:
5928             s = skipspace(s);
5929             attrs = NULL;
5930             while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5931                 I32 tmp;
5932                 SV *sv;
5933                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5934                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5935                     if (tmp < 0) tmp = -tmp;
5936                     switch (tmp) {
5937                     case KEY_or:
5938                     case KEY_and:
5939                     case KEY_for:
5940                     case KEY_foreach:
5941                     case KEY_unless:
5942                     case KEY_if:
5943                     case KEY_while:
5944                     case KEY_until:
5945                         goto got_attrs;
5946                     default:
5947                         break;
5948                     }
5949                 }
5950                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5951                 if (*d == '(') {
5952                     d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5953                     if (!d) {
5954                         if (attrs)
5955                             op_free(attrs);
5956                         sv_free(sv);
5957                         Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5958                     }
5959                     COPLINE_SET_FROM_MULTI_END;
5960                 }
5961                 if (PL_lex_stuff) {
5962                     sv_catsv(sv, PL_lex_stuff);
5963                     attrs = op_append_elem(OP_LIST, attrs,
5964                                         newSVOP(OP_CONST, 0, sv));
5965                     SvREFCNT_dec_NN(PL_lex_stuff);
5966                     PL_lex_stuff = NULL;
5967                 }
5968                 else {
5969                     /* NOTE: any CV attrs applied here need to be part of
5970                        the CVf_BUILTIN_ATTRS define in cv.h! */
5971                     if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
5972                         sv_free(sv);
5973                         CvLVALUE_on(PL_compcv);
5974                     }
5975                     else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
5976                         sv_free(sv);
5977                         CvMETHOD_on(PL_compcv);
5978                     }
5979                     else if (!PL_in_my && memEQs(SvPVX(sv), len, "const"))
5980                     {
5981                         sv_free(sv);
5982                         Perl_ck_warner_d(aTHX_
5983                             packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5984                            ":const is experimental"
5985                         );
5986                         CvANONCONST_on(PL_compcv);
5987                         if (!CvANON(PL_compcv))
5988                             yyerror(":const is not permitted on named "
5989                                     "subroutines");
5990                     }
5991                     /* After we've set the flags, it could be argued that
5992                        we don't need to do the attributes.pm-based setting
5993                        process, and shouldn't bother appending recognized
5994                        flags.  To experiment with that, uncomment the
5995                        following "else".  (Note that's already been
5996                        uncommented.  That keeps the above-applied built-in
5997                        attributes from being intercepted (and possibly
5998                        rejected) by a package's attribute routines, but is
5999                        justified by the performance win for the common case
6000                        of applying only built-in attributes.) */
6001                     else
6002                         attrs = op_append_elem(OP_LIST, attrs,
6003                                             newSVOP(OP_CONST, 0,
6004                                                     sv));
6005                 }
6006                 s = skipspace(d);
6007                 if (*s == ':' && s[1] != ':')
6008                     s = skipspace(s+1);
6009                 else if (s == d)
6010                     break;      /* require real whitespace or :'s */
6011                 /* XXX losing whitespace on sequential attributes here */
6012             }
6013             {
6014                 if (*s != ';'
6015                     && *s != '}'
6016                     && !(PL_expect == XOPERATOR
6017                          ? (*s == '=' ||  *s == ')')
6018                          : (*s == '{' ||  *s == '(')))
6019                 {
6020                     const char q = ((*s == '\'') ? '"' : '\'');
6021                     /* If here for an expression, and parsed no attrs, back
6022                        off. */
6023                     if (PL_expect == XOPERATOR && !attrs) {
6024                         s = PL_bufptr;
6025                         break;
6026                     }
6027                     /* MUST advance bufptr here to avoid bogus "at end of line"
6028                        context messages from yyerror().
6029                     */
6030                     PL_bufptr = s;
6031                     yyerror( (const char *)
6032                              (*s
6033                               ? Perl_form(aTHX_ "Invalid separator character "
6034                                           "%c%c%c in attribute list", q, *s, q)
6035                               : "Unterminated attribute list" ) );
6036                     if (attrs)
6037                         op_free(attrs);
6038                     OPERATOR(':');
6039                 }
6040             }
6041         got_attrs:
6042             if (attrs) {
6043                 NEXTVAL_NEXTTOKE.opval = attrs;
6044                 force_next(THING);
6045             }
6046             TOKEN(COLONATTR);
6047         }
6048         }
6049         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6050             s--;
6051             TOKEN(0);
6052         }
6053         PL_lex_allbrackets--;
6054         OPERATOR(':');
6055     case '(':
6056         s++;
6057         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6058             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
6059         else
6060             PL_expect = XTERM;
6061         s = skipspace(s);
6062         PL_lex_allbrackets++;
6063         TOKEN('(');
6064     case ';':
6065         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6066             TOKEN(0);
6067         CLINE;
6068         s++;
6069         PL_expect = XSTATE;
6070         TOKEN(';');
6071     case ')':
6072         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6073             TOKEN(0);
6074         s++;
6075         PL_lex_allbrackets--;
6076         s = skipspace(s);
6077         if (*s == '{')
6078             PREBLOCK(')');
6079         TERM(')');
6080     case ']':
6081         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6082             TOKEN(0);
6083         s++;
6084         if (PL_lex_brackets <= 0)
6085             /* diag_listed_as: Unmatched right %s bracket */
6086             yyerror("Unmatched right square bracket");
6087         else
6088             --PL_lex_brackets;
6089         PL_lex_allbrackets--;
6090         if (PL_lex_state == LEX_INTERPNORMAL) {
6091             if (PL_lex_brackets == 0) {
6092                 if (*s == '-' && s[1] == '>')
6093                     PL_lex_state = LEX_INTERPENDMAYBE;
6094                 else if (*s != '[' && *s != '{')
6095                     PL_lex_state = LEX_INTERPEND;
6096             }
6097         }
6098         TERM(']');
6099     case '{':
6100         s++;
6101       leftbracket:
6102         if (PL_lex_brackets > 100) {
6103             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6104         }
6105         switch (PL_expect) {
6106         case XTERM:
6107         case XTERMORDORDOR:
6108             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6109             PL_lex_allbrackets++;
6110             OPERATOR(HASHBRACK);
6111         case XOPERATOR:
6112             while (s < PL_bufend && SPACE_OR_TAB(*s))
6113                 s++;
6114             d = s;
6115             PL_tokenbuf[0] = '\0';
6116             if (d < PL_bufend && *d == '-') {
6117                 PL_tokenbuf[0] = '-';
6118                 d++;
6119                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6120                     d++;
6121             }
6122             if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6123                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6124                               FALSE, &len);
6125                 while (d < PL_bufend && SPACE_OR_TAB(*d))
6126                     d++;
6127                 if (*d == '}') {
6128                     const char minus = (PL_tokenbuf[0] == '-');
6129                     s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6130                     if (minus)
6131                         force_next('-');
6132                 }
6133             }
6134             /* FALLTHROUGH */
6135         case XATTRTERM:
6136         case XTERMBLOCK:
6137             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6138             PL_lex_allbrackets++;
6139             PL_expect = XSTATE;
6140             break;
6141         case XATTRBLOCK:
6142         case XBLOCK:
6143             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6144             PL_lex_allbrackets++;
6145             PL_expect = XSTATE;
6146             break;
6147         case XBLOCKTERM:
6148             PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6149             PL_lex_allbrackets++;
6150             PL_expect = XSTATE;
6151             break;
6152         default: {
6153                 const char *t;
6154                 if (PL_oldoldbufptr == PL_last_lop)
6155                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6156                 else
6157                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6158                 PL_lex_allbrackets++;
6159                 s = skipspace(s);
6160                 if (*s == '}') {
6161                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6162                         PL_expect = XTERM;
6163                         /* This hack is to get the ${} in the message. */
6164                         PL_bufptr = s+1;
6165                         yyerror("syntax error");
6166                         break;
6167                     }
6168                     OPERATOR(HASHBRACK);
6169                 }
6170                 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6171                     /* ${...} or @{...} etc., but not print {...}
6172                      * Skip the disambiguation and treat this as a block.
6173                      */
6174                     goto block_expectation;
6175                 }
6176                 /* This hack serves to disambiguate a pair of curlies
6177                  * as being a block or an anon hash.  Normally, expectation
6178                  * determines that, but in cases where we're not in a
6179                  * position to expect anything in particular (like inside
6180                  * eval"") we have to resolve the ambiguity.  This code
6181                  * covers the case where the first term in the curlies is a
6182                  * quoted string.  Most other cases need to be explicitly
6183                  * disambiguated by prepending a "+" before the opening
6184                  * curly in order to force resolution as an anon hash.
6185                  *
6186                  * XXX should probably propagate the outer expectation
6187                  * into eval"" to rely less on this hack, but that could
6188                  * potentially break current behavior of eval"".
6189                  * GSAR 97-07-21
6190                  */
6191                 t = s;
6192                 if (*s == '\'' || *s == '"' || *s == '`') {
6193                     /* common case: get past first string, handling escapes */
6194                     for (t++; t < PL_bufend && *t != *s;)
6195                         if (*t++ == '\\')
6196                             t++;
6197                     t++;
6198                 }
6199                 else if (*s == 'q') {
6200                     if (++t < PL_bufend
6201                         && (!isWORDCHAR(*t)
6202                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6203                                 && !isWORDCHAR(*t))))
6204                     {
6205                         /* skip q//-like construct */
6206                         const char *tmps;
6207                         char open, close, term;
6208                         I32 brackets = 1;
6209
6210                         while (t < PL_bufend && isSPACE(*t))
6211                             t++;
6212                         /* check for q => */
6213                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6214                             OPERATOR(HASHBRACK);
6215                         }
6216                         term = *t;
6217                         open = term;
6218                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6219                             term = tmps[5];
6220                         close = term;
6221                         if (open == close)
6222                             for (t++; t < PL_bufend; t++) {
6223                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6224                                     t++;
6225                                 else if (*t == open)
6226                                     break;
6227                             }
6228                         else {
6229                             for (t++; t < PL_bufend; t++) {
6230                                 if (*t == '\\' && t+1 < PL_bufend)
6231                                     t++;
6232                                 else if (*t == close && --brackets <= 0)
6233                                     break;
6234                                 else if (*t == open)
6235                                     brackets++;
6236                             }
6237                         }
6238                         t++;
6239                     }
6240                     else
6241                         /* skip plain q word */
6242                         while (   t < PL_bufend
6243                                && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6244                         {
6245                             t += UTF ? UTF8SKIP(t) : 1;
6246                         }
6247                 }
6248                 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6249                     t += UTF ? UTF8SKIP(t) : 1;
6250                     while (   t < PL_bufend
6251                            && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6252                     {
6253                         t += UTF ? UTF8SKIP(t) : 1;
6254                     }
6255                 }
6256                 while (t < PL_bufend && isSPACE(*t))
6257                     t++;
6258                 /* if comma follows first term, call it an anon hash */
6259                 /* XXX it could be a comma expression with loop modifiers */
6260                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6261                                    || (*t == '=' && t[1] == '>')))
6262                     OPERATOR(HASHBRACK);
6263                 if (PL_expect == XREF)
6264                 {
6265                   block_expectation:
6266                     /* If there is an opening brace or 'sub:', treat it
6267                        as a term to make ${{...}}{k} and &{sub:attr...}
6268                        dwim.  Otherwise, treat it as a statement, so
6269                        map {no strict; ...} works.
6270                      */
6271                     s = skipspace(s);
6272                     if (*s == '{') {
6273                         PL_expect = XTERM;
6274                         break;
6275                     }
6276                     if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6277                         PL_bufptr = s;
6278                         d = s + 3;
6279                         d = skipspace(d);
6280                         s = PL_bufptr;
6281                         if (*d == ':') {
6282                             PL_expect = XTERM;
6283                             break;
6284                         }
6285                     }
6286                     PL_expect = XSTATE;
6287                 }
6288                 else {
6289                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6290                     PL_expect = XSTATE;
6291                 }
6292             }
6293             break;
6294         }
6295         pl_yylval.ival = CopLINE(PL_curcop);
6296         PL_copline = NOLINE;   /* invalidate current command line number */
6297         TOKEN(formbrack ? '=' : '{');
6298     case '}':
6299         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6300             TOKEN(0);
6301       rightbracket:
6302         assert(s != PL_bufend);
6303         s++;
6304         if (PL_lex_brackets <= 0)
6305             /* diag_listed_as: Unmatched right %s bracket */
6306             yyerror("Unmatched right curly bracket");
6307         else
6308             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6309         PL_lex_allbrackets--;
6310         if (PL_lex_state == LEX_INTERPNORMAL) {
6311             if (PL_lex_brackets == 0) {
6312                 if (PL_expect & XFAKEBRACK) {
6313                     PL_expect &= XENUMMASK;
6314                     PL_lex_state = LEX_INTERPEND;
6315                     PL_bufptr = s;
6316                     return yylex();     /* ignore fake brackets */
6317                 }
6318                 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6319                  && SvEVALED(PL_lex_repl))
6320                     PL_lex_state = LEX_INTERPEND;
6321                 else if (*s == '-' && s[1] == '>')
6322                     PL_lex_state = LEX_INTERPENDMAYBE;
6323                 else if (*s != '[' && *s != '{')
6324                     PL_lex_state = LEX_INTERPEND;
6325             }
6326         }
6327         if (PL_expect & XFAKEBRACK) {
6328             PL_expect &= XENUMMASK;
6329             PL_bufptr = s;
6330             return yylex();             /* ignore fake brackets */
6331         }
6332         force_next(formbrack ? '.' : '}');
6333         if (formbrack) LEAVE_with_name("lex_format");
6334         if (formbrack == 2) { /* means . where arguments were expected */
6335             force_next(';');
6336             TOKEN(FORMRBRACK);
6337         }
6338         TOKEN(';');
6339     case '&':
6340         if (PL_expect == XPOSTDEREF) POSTDEREF('&');
6341         s++;
6342         if (*s++ == '&') {
6343             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6344                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6345                 s -= 2;
6346                 TOKEN(0);
6347             }
6348             AOPERATOR(ANDAND);
6349         }
6350         s--;
6351         if (PL_expect == XOPERATOR) {
6352             if (   PL_bufptr == PL_linestart
6353                 && ckWARN(WARN_SEMICOLON)
6354                 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6355             {
6356                 CopLINE_dec(PL_curcop);
6357                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6358                 CopLINE_inc(PL_curcop);
6359             }
6360             d = s;
6361             if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6362                 s++;
6363             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6364                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6365                 s = d;
6366                 s--;
6367                 TOKEN(0);
6368             }
6369             if (d == s) {
6370                 PL_parser->saw_infix_sigil = 1;
6371                 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6372             }
6373             else
6374                 BAop(OP_SBIT_AND);
6375         }
6376
6377         PL_tokenbuf[0] = '&';
6378         s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6379         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6380         if (PL_tokenbuf[1]) {
6381             force_ident_maybe_lex('&');
6382         }
6383         else
6384             PREREF('&');
6385         TERM('&');
6386
6387     case '|':
6388         s++;
6389         if (*s++ == '|') {
6390             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6391                     (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6392                 s -= 2;
6393                 TOKEN(0);
6394             }
6395             AOPERATOR(OROR);
6396         }
6397         s--;
6398         d = s;
6399         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6400             s++;
6401         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6402                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6403             s = d - 1;
6404             TOKEN(0);
6405         }
6406         BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6407     case '=':
6408         s++;
6409         {
6410             const char tmp = *s++;
6411             if (tmp == '=') {
6412                 if (   (s == PL_linestart+2 || s[-3] == '\n')
6413                     && memBEGINs(s, (STRLEN) (PL_bufend - s), "====="))
6414                 {
6415                     s = vcs_conflict_marker(s + 5);
6416                     goto retry;
6417                 }
6418                 if (!PL_lex_allbrackets
6419                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6420                 {
6421                     s -= 2;
6422                     TOKEN(0);
6423                 }
6424                 Eop(OP_EQ);
6425             }
6426             if (tmp == '>') {
6427                 if (!PL_lex_allbrackets
6428                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
6429                 {
6430                     s -= 2;
6431                     TOKEN(0);
6432                 }
6433                 OPERATOR(',');
6434             }
6435             if (tmp == '~')
6436                 PMop(OP_MATCH);
6437             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6438                 && strchr("+-*/%.^&|<",tmp))
6439                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6440                             "Reversed %c= operator",(int)tmp);
6441             s--;
6442             if (PL_expect == XSTATE
6443                 && isALPHA(tmp)
6444                 && (s == PL_linestart+1 || s[-2] == '\n') )
6445             {
6446                 if (   (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6447                     || PL_lex_state != LEX_NORMAL)
6448                 {
6449                     d = PL_bufend;
6450                     while (s < d) {
6451                         if (*s++ == '\n') {
6452                             incline(s, PL_bufend);
6453                             if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
6454                             {
6455                                 s = (char *) memchr(s,'\n', d - s);
6456                                 if (s)
6457                                     s++;
6458                                 else
6459                                     s = d;
6460                                 incline(s, PL_bufend);
6461                                 goto retry;
6462                             }
6463                         }
6464                     }
6465                     goto retry;
6466                 }
6467                 s = PL_bufend;
6468                 PL_parser->in_pod = 1;
6469                 goto retry;
6470             }
6471         }
6472         if (PL_expect == XBLOCK) {
6473             const char *t = s;
6474 #ifdef PERL_STRICT_CR
6475             while (SPACE_OR_TAB(*t))
6476 #else
6477             while (SPACE_OR_TAB(*t) || *t == '\r')
6478 #endif
6479                 t++;
6480             if (*t == '\n' || *t == '#') {
6481                 formbrack = 1;
6482                 ENTER_with_name("lex_format");
6483                 SAVEI8(PL_parser->form_lex_state);
6484                 SAVEI32(PL_lex_formbrack);
6485                 PL_parser->form_lex_state = PL_lex_state;
6486                 PL_lex_formbrack = PL_lex_brackets + 1;
6487                 goto leftbracket;
6488             }
6489         }
6490         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6491             s--;
6492             TOKEN(0);
6493         }
6494         pl_yylval.ival = 0;
6495         OPERATOR(ASSIGNOP);
6496     case '!':
6497         s++;
6498         {
6499             const char tmp = *s++;
6500             if (tmp == '=') {
6501                 /* was this !=~ where !~ was meant?
6502                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6503
6504                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6505                     const char *t = s+1;
6506
6507                     while (t < PL_bufend && isSPACE(*t))
6508                         ++t;
6509
6510                     if (*t == '/' || *t == '?'
6511                         || ((*t == 'm' || *t == 's' || *t == 'y')
6512                             && !isWORDCHAR(t[1]))
6513                         || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6514                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6515                                     "!=~ should be !~");
6516                 }
6517                 if (!PL_lex_allbrackets
6518                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6519                 {
6520                     s -= 2;
6521                     TOKEN(0);
6522                 }
6523                 Eop(OP_NE);
6524             }
6525             if (tmp == '~')
6526                 PMop(OP_NOT);
6527         }
6528         s--;
6529         OPERATOR('!');
6530     case '<':
6531         if (PL_expect != XOPERATOR) {
6532             if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6533                 check_uni();
6534             if (s[1] == '<' && s[2] != '>') {
6535                 if (   (s == PL_linestart || s[-1] == '\n')
6536                     && memBEGINs(s+2, (STRLEN) (PL_bufend - (s+2)), "<<<<<"))
6537                 {
6538                     s = vcs_conflict_marker(s + 7);
6539                     goto retry;
6540                 }
6541                 s = scan_heredoc(s);
6542             }
6543             else
6544                 s = scan_inputsymbol(s);
6545             PL_expect = XOPERATOR;
6546             TOKEN(sublex_start());
6547         }
6548         s++;
6549         {
6550             char tmp = *s++;
6551             if (tmp == '<') {
6552                 if (   (s == PL_linestart+2 || s[-3] == '\n')
6553                     && memBEGINs(s, (STRLEN) (PL_bufend - s), "<<<<<"))
6554                 {
6555                     s = vcs_conflict_marker(s + 5);
6556                     goto retry;
6557                 }
6558                 if (*s == '=' && !PL_lex_allbrackets
6559                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6560                 {
6561                     s -= 2;
6562                     TOKEN(0);
6563                 }
6564                 SHop(OP_LEFT_SHIFT);
6565             }
6566             if (tmp == '=') {
6567                 tmp = *s++;
6568                 if (tmp == '>') {
6569                     if (!PL_lex_allbrackets
6570                         && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6571                     {
6572                         s -= 3;
6573                         TOKEN(0);
6574                     }
6575                     Eop(OP_NCMP);
6576                 }
6577                 s--;
6578                 if (!PL_lex_allbrackets
6579                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6580                 {
6581                     s -= 2;
6582                     TOKEN(0);
6583                 }
6584                 Rop(OP_LE);
6585             }
6586         }
6587         s--;
6588         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6589             s--;
6590             TOKEN(0);
6591         }
6592         Rop(OP_LT);
6593     case '>':
6594         s++;
6595         {
6596             const char tmp = *s++;
6597             if (tmp == '>') {
6598                 if (   (s == PL_linestart+2 || s[-3] == '\n')
6599                     && memBEGINs(s, (STRLEN) (PL_bufend - s), ">>>>>"))
6600                 {
6601                     s = vcs_conflict_marker(s + 5);
6602                     goto retry;
6603                 }
6604                 if (*s == '=' && !PL_lex_allbrackets
6605                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6606                 {
6607                     s -= 2;
6608                     TOKEN(0);
6609                 }
6610                 SHop(OP_RIGHT_SHIFT);
6611             }
6612             else if (tmp == '=') {
6613                 if (!PL_lex_allbrackets
6614                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6615                 {
6616                     s -= 2;
6617                     TOKEN(0);
6618                 }
6619                 Rop(OP_GE);
6620             }
6621         }
6622         s--;
6623         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6624             s--;
6625             TOKEN(0);
6626         }
6627         Rop(OP_GT);
6628
6629     case '$':
6630         CLINE;
6631
6632         if (PL_expect == XPOSTDEREF) {
6633             if (s[1] == '#') {
6634                 s++;
6635                 POSTDEREF(DOLSHARP);
6636             }
6637             POSTDEREF('$');
6638         }
6639
6640         if (   s[1] == '#'
6641             && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
6642                 || strchr("{$:+-@", s[2])))
6643         {
6644             PL_tokenbuf[0] = '@';
6645             s = scan_ident(s + 1, PL_tokenbuf + 1,
6646                            sizeof PL_tokenbuf - 1, FALSE);
6647             if (PL_expect == XOPERATOR) {
6648                 d = s;
6649                 if (PL_bufptr > s) {
6650                     d = PL_bufptr-1;
6651                     PL_bufptr = PL_oldbufptr;
6652                 }
6653                 no_op("Array length", d);
6654             }
6655             if (!PL_tokenbuf[1])
6656                 PREREF(DOLSHARP);
6657             PL_expect = XOPERATOR;
6658             force_ident_maybe_lex('#');
6659             TOKEN(DOLSHARP);
6660         }
6661
6662         PL_tokenbuf[0] = '$';
6663         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6664         if (PL_expect == XOPERATOR) {
6665             d = s;
6666             if (PL_bufptr > s) {
6667                 d = PL_bufptr-1;
6668                 PL_bufptr = PL_oldbufptr;
6669             }
6670             no_op("Scalar", d);
6671         }
6672         if (!PL_tokenbuf[1]) {
6673             if (s == PL_bufend)
6674                 yyerror("Final $ should be \\$ or $name");
6675             PREREF('$');
6676         }
6677
6678         d = s;
6679         {
6680             const char tmp = *s;
6681             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6682                 s = skipspace(s);
6683
6684             if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6685                 && intuit_more(s, PL_bufend)) {
6686                 if (*s == '[') {
6687                     PL_tokenbuf[0] = '@';
6688                     if (ckWARN(WARN_SYNTAX)) {
6689                         char *t = s+1;
6690
6691                         while (   isSPACE(*t)
6692                                || isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)
6693                                || *t == '$')
6694                         {
6695                             t += UTF ? UTF8SKIP(t) : 1;
6696                         }
6697                         if (*t++ == ',') {
6698                             PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
6699                             while (t < PL_bufend && *t != ']')
6700                                 t++;
6701                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6702                                         "Multidimensional syntax %" UTF8f " not supported",
6703                                         UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
6704                         }
6705                     }
6706                 }
6707                 else if (*s == '{') {
6708                     char *t;
6709                     PL_tokenbuf[0] = '%';
6710                     if (    strEQ(PL_tokenbuf+1, "SIG")
6711                         && ckWARN(WARN_SYNTAX)
6712                         && (t = (char *) memchr(s, '}', PL_bufend - s))
6713                         && (t = (char *) memchr(t, '=', PL_bufend - t)))
6714                     {
6715                         char tmpbuf[sizeof PL_tokenbuf];
6716                         do {
6717                             t++;
6718                         } while (isSPACE(*t));
6719                         if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
6720                             STRLEN len;
6721                             t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6722                                             &len);
6723                             while (isSPACE(*t))
6724                                 t++;
6725                             if (  *t == ';'
6726                                 && get_cvn_flags(tmpbuf, len, UTF
6727                                                                 ? SVf_UTF8
6728                                                                 : 0))
6729                             {
6730                                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6731                                     "You need to quote \"%" UTF8f "\"",
6732                                         UTF8fARG(UTF, len, tmpbuf));
6733                             }
6734                         }
6735                     }
6736                 }
6737             }
6738
6739             PL_expect = XOPERATOR;
6740             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6741                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6742                 if (!islop || PL_last_lop_op == OP_GREPSTART)
6743                     PL_expect = XOPERATOR;
6744                 else if (strchr("$@\"'`q", *s))
6745                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
6746                 else if (   strchr("&*<%", *s)
6747                          && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
6748                 {
6749                     PL_expect = XTERM;          /* e.g. print $fh &sub */
6750                 }
6751                 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6752                     char tmpbuf[sizeof PL_tokenbuf];
6753                     int t2;
6754                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6755                     if ((t2 = keyword(tmpbuf, len, 0))) {
6756                         /* binary operators exclude handle interpretations */
6757                         switch (t2) {
6758                         case -KEY_x:
6759                         case -KEY_eq:
6760                         case -KEY_ne:
6761                         case -KEY_gt:
6762                         case -KEY_lt:
6763                         case -KEY_ge:
6764                         case -KEY_le:
6765                         case -KEY_cmp:
6766                             break;
6767                         default:
6768                             PL_expect = XTERM;  /* e.g. print $fh length() */
6769                             break;
6770                         }
6771                     }
6772                     else {
6773                         PL_expect = XTERM;      /* e.g. print $fh subr() */
6774                     }
6775                 }
6776                 else if (isDIGIT(*s))
6777                     PL_expect = XTERM;          /* e.g. print $fh 3 */
6778                 else if (*s == '.' && isDIGIT(s[1]))
6779                     PL_expect = XTERM;          /* e.g. print $fh .3 */
6780                 else if ((*s == '?' || *s == '-' || *s == '+')
6781                          && !isSPACE(s[1]) && s[1] != '=')
6782                     PL_expect = XTERM;          /* e.g. print $fh -1 */
6783                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6784                          && s[1] != '/')
6785                     PL_expect = XTERM;          /* e.g. print $fh /.../
6786                                                    XXX except DORDOR operator
6787                                                 */
6788                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6789                          && s[2] != '=')
6790                     PL_expect = XTERM;          /* print $fh <<"EOF" */
6791             }
6792         }
6793         force_ident_maybe_lex('$');
6794         TOKEN('$');
6795
6796     case '@':
6797         if (PL_expect == XPOSTDEREF)
6798             POSTDEREF('@');
6799         PL_tokenbuf[0] = '@';
6800         s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6801         if (PL_expect == XOPERATOR) {
6802             d = s;
6803             if (PL_bufptr > s) {
6804                 d = PL_bufptr-1;
6805                 PL_bufptr = PL_oldbufptr;
6806             }
6807             no_op("Array", d);
6808         }
6809         pl_yylval.ival = 0;
6810         if (!PL_tokenbuf[1]) {
6811             PREREF('@');
6812         }
6813         if (PL_lex_state == LEX_NORMAL)
6814             s = skipspace(s);
6815         if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6816             && intuit_more(s, PL_bufend))
6817         {
6818             if (*s == '{')
6819                 PL_tokenbuf[0] = '%';
6820
6821             /* Warn about @ where they meant $. */
6822             if (*s == '[' || *s == '{') {
6823                 if (ckWARN(WARN_SYNTAX)) {
6824                     S_check_scalar_slice(aTHX_ s);
6825                 }
6826             }
6827         }
6828         PL_expect = XOPERATOR;
6829         force_ident_maybe_lex('@');
6830         TERM('@');
6831
6832      case '/':                  /* may be division, defined-or, or pattern */
6833         if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6834             if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6835                     (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6836                 TOKEN(0);
6837             s += 2;
6838             AOPERATOR(DORDOR);
6839         }
6840         else if (PL_expect == XOPERATOR) {
6841             s++;
6842             if (*s == '=' && !PL_lex_allbrackets
6843                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6844             {
6845                 s--;
6846                 TOKEN(0);
6847             }
6848             Mop(OP_DIVIDE);
6849         }
6850         else {
6851             /* Disable warning on "study /blah/" */
6852             if (    PL_oldoldbufptr == PL_last_uni
6853                 && (   *PL_last_uni != 's' || s - PL_last_uni < 5
6854                     || memNE(PL_last_uni, "study", 5)
6855                     || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6856              ))
6857                 check_uni();
6858             s = scan_pat(s,OP_MATCH);
6859             TERM(sublex_start());
6860         }
6861
6862      case '?':                  /* conditional */
6863         s++;
6864         if (!PL_lex_allbrackets
6865             && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
6866         {
6867             s--;
6868             TOKEN(0);
6869         }
6870         PL_lex_allbrackets++;
6871         OPERATOR('?');
6872
6873     case '.':
6874         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6875 #ifdef PERL_STRICT_CR
6876             && s[1] == '\n'
6877 #else
6878             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6879 #endif
6880             && (s == PL_linestart || s[-1] == '\n') )
6881         {
6882             PL_expect = XSTATE;
6883             formbrack = 2; /* dot seen where arguments expected */
6884             goto rightbracket;
6885         }
6886         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6887             s += 3;
6888             OPERATOR(YADAYADA);
6889         }
6890         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6891             char tmp = *s++;
6892             if (*s == tmp) {
6893                 if (!PL_lex_allbrackets
6894                     && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
6895                 {
6896                     s--;
6897                     TOKEN(0);
6898                 }
6899                 s++;
6900                 if (*s == tmp) {
6901                     s++;
6902                     pl_yylval.ival = OPf_SPECIAL;
6903                 }
6904                 else
6905                     pl_yylval.ival = 0;
6906                 OPERATOR(DOTDOT);
6907             }
6908             if (*s == '=' && !PL_lex_allbrackets
6909                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6910             {
6911                 s--;
6912                 TOKEN(0);
6913             }
6914             Aop(OP_CONCAT);
6915         }
6916         /* FALLTHROUGH */
6917     case '0': case '1': case '2': case '3': case '4':
6918     case '5': case '6': case '7': case '8': case '9':
6919         s = scan_num(s, &pl_yylval);
6920         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6921         if (PL_expect == XOPERATOR)
6922             no_op("Number",s);
6923         TERM(THING);
6924
6925     case '\'':
6926         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6927         if (!s)
6928             missingterm(NULL, 0);
6929         COPLINE_SET_FROM_MULTI_END;
6930         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6931         if (PL_expect == XOPERATOR) {
6932             no_op("String",s);
6933         }
6934         pl_yylval.ival = OP_CONST;
6935         TERM(sublex_start());
6936
6937     case '"':
6938         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6939         DEBUG_T( {
6940             if (s)
6941                 printbuf("### Saw string before %s\n", s);
6942             else
6943                 PerlIO_printf(Perl_debug_log,
6944                              "### Saw unterminated string\n");
6945         } );
6946         if (PL_expect == XOPERATOR) {
6947                 no_op("String",s);
6948         }
6949         if (!s)
6950             missingterm(NULL, 0);
6951         pl_yylval.ival = OP_CONST;
6952         /* FIXME. I think that this can be const if char *d is replaced by
6953            more localised variables.  */
6954         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6955             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6956                 pl_yylval.ival = OP_STRINGIFY;
6957                 break;
6958             }
6959         }
6960         if (pl_yylval.ival == OP_CONST)
6961             COPLINE_SET_FROM_MULTI_END;
6962         TERM(sublex_start());
6963
6964     case '`':
6965         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6966         DEBUG_T( {
6967             if (s)
6968                 printbuf("### Saw backtick string before %s\n", s);
6969             else
6970                 PerlIO_printf(Perl_debug_log,
6971                              "### Saw unterminated backtick string\n");
6972         } );
6973         if (PL_expect == XOPERATOR)
6974             no_op("Backticks",s);
6975         if (!s)
6976             missingterm(NULL, 0);
6977         pl_yylval.ival = OP_BACKTICK;
6978         TERM(sublex_start());
6979
6980     case '\\':
6981         s++;
6982         if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6983          && isDIGIT(*s))
6984             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6985                            *s, *s);
6986         if (PL_expect == XOPERATOR)
6987             no_op("Backslash",s);
6988         OPERATOR(REFGEN);
6989
6990     case 'v':
6991         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6992             char *start = s + 2;
6993             while (isDIGIT(*start) || *start == '_')
6994                 start++;
6995             if (*start == '.' && isDIGIT(start[1])) {
6996                 s = scan_num(s, &pl_yylval);
6997                 TERM(THING);
6998             }
6999             else if ((*start == ':' && start[1] == ':')
7000                   || (PL_expect == XSTATE && *start == ':'))
7001                 goto keylookup;
7002             else if (PL_expect == XSTATE) {
7003                 d = start;
7004                 while (d < PL_bufend && isSPACE(*d)) d++;
7005                 if (*d == ':') goto keylookup;
7006             }
7007             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
7008             if (!isALPHA(*start) && (PL_expect == XTERM
7009                         || PL_expect == XREF || PL_expect == XSTATE
7010                         || PL_expect == XTERMORDORDOR)) {
7011                 GV *const gv = gv_fetchpvn_flags(s, start - s,
7012                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
7013                 if (!gv) {
7014                     s = scan_num(s, &pl_yylval);
7015                     TERM(THING);
7016                 }
7017             }
7018         }
7019         goto keylookup;
7020     case 'x':
7021         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
7022             s++;
7023             Mop(OP_REPEAT);
7024         }
7025         goto keylookup;
7026
7027     case '_':
7028     case 'a': case 'A':
7029     case 'b': case 'B':
7030     case 'c': case 'C':
7031     case 'd': case 'D':
7032     case 'e': case 'E':
7033     case 'f': case 'F':
7034     case 'g': case 'G':
7035     case 'h': case 'H':
7036     case 'i': case 'I':
7037     case 'j': case 'J':
7038     case 'k': case 'K':
7039     case 'l': case 'L':
7040     case 'm': case 'M':
7041     case 'n': case 'N':
7042     case 'o': case 'O':
7043     case 'p': case 'P':
7044     case 'q': case 'Q':
7045     case 'r': case 'R':
7046     case 's': case 'S':
7047     case 't': case 'T':
7048     case 'u': case 'U':
7049               case 'V':
7050     case 'w': case 'W':
7051               case 'X':
7052     case 'y': case 'Y':
7053     case 'z': case 'Z':
7054
7055       keylookup: {
7056         bool anydelim;
7057         bool lex;
7058         I32 tmp;
7059         SV *sv;
7060         CV *cv;
7061         PADOFFSET off;
7062         OP *rv2cv_op;
7063
7064         lex = FALSE;
7065         orig_keyword = 0;
7066         off = 0;
7067         sv = NULL;
7068         cv = NULL;
7069         gv = NULL;
7070         gvp = NULL;
7071         rv2cv_op = NULL;
7072
7073         PL_bufptr = s;
7074         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7075
7076         /* Some keywords can be followed by any delimiter, including ':' */
7077         anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
7078
7079         /* x::* is just a word, unless x is "CORE" */
7080         if (!anydelim && *s == ':' && s[1] == ':') {
7081             if (memEQs(PL_tokenbuf, len, "CORE")) goto case_KEY_CORE;
7082             goto just_a_word;
7083         }
7084
7085         d = s;
7086         while (d < PL_bufend && isSPACE(*d))
7087                 d++;    /* no comments skipped here, or s### is misparsed */
7088
7089         /* Is this a word before a => operator? */
7090         if (*d == '=' && d[1] == '>') {
7091           fat_arrow:
7092             CLINE;
7093             pl_yylval.opval
7094                 = newSVOP(OP_CONST, 0,
7095                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7096             pl_yylval.opval->op_private = OPpCONST_BARE;
7097             TERM(BAREWORD);
7098         }
7099
7100         /* Check for plugged-in keyword */
7101         {
7102             OP *o;
7103             int result;
7104             char *saved_bufptr = PL_bufptr;
7105             PL_bufptr = s;
7106             result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
7107             s = PL_bufptr;
7108             if (result == KEYWORD_PLUGIN_DECLINE) {
7109                 /* not a plugged-in keyword */
7110                 PL_bufptr = saved_bufptr;
7111             } else if (result == KEYWORD_PLUGIN_STMT) {
7112                 pl_yylval.opval = o;
7113                 CLINE;
7114                 if (!PL_nexttoke) PL_expect = XSTATE;
7115                 return REPORT(PLUGSTMT);
7116             } else if (result == KEYWORD_PLUGIN_EXPR) {
7117                 pl_yylval.opval = o;
7118                 CLINE;
7119                 if (!PL_nexttoke) PL_expect = XOPERATOR;
7120                 return REPORT(PLUGEXPR);
7121             } else {
7122                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
7123                                         PL_tokenbuf);
7124             }
7125         }
7126
7127         /* Check for built-in keyword */
7128         tmp = keyword(PL_tokenbuf, len, 0);
7129
7130         /* Is this a label? */
7131         if (!anydelim && PL_expect == XSTATE
7132               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7133             s = d + 1;
7134             pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
7135             pl_yylval.pval[len] = '\0';
7136             pl_yylval.pval[len+1] = UTF ? 1 : 0;
7137             CLINE;
7138             TOKEN(LABEL);
7139         }
7140
7141         /* Check for lexical sub */
7142         if (PL_expect != XOPERATOR) {
7143             char tmpbuf[sizeof PL_tokenbuf + 1];
7144             *tmpbuf = '&';
7145             Copy(PL_tokenbuf, tmpbuf+1, len, char);
7146             off = pad_findmy_pvn(tmpbuf, len+1, 0);
7147             if (off != NOT_IN_PAD) {
7148                 assert(off); /* we assume this is boolean-true below */
7149                 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7150                     HV *  const stash = PAD_COMPNAME_OURSTASH(off);
7151                     HEK * const stashname = HvNAME_HEK(stash);
7152                     sv = newSVhek(stashname);
7153                     sv_catpvs(sv, "::");
7154                     sv_catpvn_flags(sv, PL_tokenbuf, len,
7155                                     (UTF ? SV_CATUTF8 : SV_CATBYTES));
7156                     gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7157                                     SVt_PVCV);
7158                     off = 0;
7159                     if (!gv) {
7160                         sv_free(sv);
7161                         sv = NULL;
7162                         goto just_a_word;
7163                     }
7164                 }
7165                 else {
7166                     rv2cv_op = newOP(OP_PADANY, 0);
7167                     rv2cv_op->op_targ = off;
7168                     cv = find_lexical_cv(off);
7169                 }
7170                 lex = TRUE;
7171                 goto just_a_word;
7172             }
7173             off = 0;
7174         }
7175
7176         if (tmp < 0) {                  /* second-class keyword? */
7177             GV *ogv = NULL;     /* override (winner) */
7178             GV *hgv = NULL;     /* hidden (loser) */
7179             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7180                 CV *cv;
7181                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7182                                             (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
7183                                             SVt_PVCV))
7184                     && (cv = GvCVu(gv)))
7185                 {
7186                     if (GvIMPORTED_CV(gv))
7187                         ogv = gv;
7188                     else if (! CvMETHOD(cv))
7189                         hgv = gv;
7190                 }
7191                 if (!ogv
7192                     && (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7193                                                               len, FALSE))
7194                     && (gv = *gvp)
7195                     && (isGV_with_GP(gv)
7196                         ? GvCVu(gv) && GvIMPORTED_CV(gv)
7197                         :   SvPCS_IMPORTED(gv)
7198                         && (gv_init(gv, PL_globalstash, PL_tokenbuf,
7199                                                                  len, 0), 1)))
7200                 {
7201                     ogv = gv;
7202                 }
7203             }
7204             if (ogv) {
7205                 orig_keyword = tmp;
7206                 tmp = 0;                /* overridden by import or by GLOBAL */
7207             }
7208             else if (gv && !gvp
7209                      && -tmp==KEY_lock  /* XXX generalizable kludge */
7210                      && GvCVu(gv))
7211             {
7212                 tmp = 0;                /* any sub overrides "weak" keyword */
7213             }
7214             else {                      /* no override */
7215                 tmp = -tmp;
7216                 if (tmp == KEY_dump) {
7217                     Perl_ck_warner_d(aTHX_ packWARN2(WARN_MISC,WARN_DEPRECATED),
7218                                      "dump() better written as CORE::dump(). "
7219                                      "dump() will no longer be available "
7220                                      "in Perl 5.30");
7221                 }
7222                 gv = NULL;
7223                 gvp = 0;
7224                 if (hgv && tmp != KEY_x)        /* never ambiguous */
7225                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7226                                    "Ambiguous call resolved as CORE::%s(), "
7227                                    "qualify as such or use &",
7228                                    GvENAME(hgv));
7229             }
7230         }
7231
7232         if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7233          && (!anydelim || *s != '#')) {
7234             /* no override, and not s### either; skipspace is safe here
7235              * check for => on following line */
7236             bool arrow;
7237             STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7238             STRLEN   soff = s         - SvPVX(PL_linestr);
7239             s = peekspace(s);
7240             arrow = *s == '=' && s[1] == '>';
7241             PL_bufptr = SvPVX(PL_linestr) + bufoff;
7242             s         = SvPVX(PL_linestr) +   soff;
7243             if (arrow)
7244                 goto fat_arrow;
7245         }
7246
7247       reserved_word:
7248         switch (tmp) {
7249
7250             /* Trade off - by using this evil construction we can pull the
7251                variable gv into the block labelled keylookup. If not, then
7252                we have to give it function scope so that the goto from the
7253                earlier ':' case doesn't bypass the initialisation.  */
7254             just_a_word_zero_gv:
7255                 sv = NULL;
7256                 cv = NULL;
7257                 gv = NULL;
7258                 gvp = NULL;
7259                 rv2cv_op = NULL;
7260                 orig_keyword = 0;
7261                 lex = 0;
7262                 off = 0;
7263             /* FALLTHROUGH */
7264         default:                        /* not a keyword */
7265           just_a_word: {
7266                 int pkgname = 0;
7267                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7268                 bool safebw;
7269
7270
7271                 /* Get the rest if it looks like a package qualifier */
7272
7273                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7274                     STRLEN morelen;
7275                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7276                                   TRUE, &morelen);
7277                     if (!morelen)
7278                         Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7279                                 UTF8fARG(UTF, len, PL_tokenbuf),
7280                                 *s == '\'' ? "'" : "::");
7281                     len += morelen;
7282                     pkgname = 1;
7283                 }
7284
7285                 if (PL_expect == XOPERATOR) {
7286                     if (PL_bufptr == PL_linestart) {
7287                         CopLINE_dec(PL_curcop);
7288                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7289                         CopLINE_inc(PL_curcop);
7290                     }
7291                     else
7292                         no_op("Bareword",s);
7293                 }
7294
7295                 /* See if the name is "Foo::",
7296                    in which case Foo is a bareword
7297                    (and a package name). */
7298
7299                 if (len > 2
7300                     && PL_tokenbuf[len - 2] == ':'
7301                     && PL_tokenbuf[len - 1] == ':')
7302                 {
7303                     if (ckWARN(WARN_BAREWORD)
7304                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7305                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7306                                     "Bareword \"%" UTF8f
7307                                     "\" refers to nonexistent package",
7308                                     UTF8fARG(UTF, len, PL_tokenbuf));
7309                     len -= 2;
7310                     PL_tokenbuf[len] = '\0';
7311                     gv = NULL;
7312                     gvp = 0;
7313                     safebw = TRUE;
7314                 }
7315                 else {
7316                     safebw = FALSE;
7317                 }
7318
7319                 /* if we saw a global override before, get the right name */
7320
7321                 if (!sv)
7322                   sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7323                                                 len);
7324                 if (gvp) {
7325                     SV * const tmp_sv = sv;
7326                     sv = newSVpvs("CORE::GLOBAL::");
7327                     sv_catsv(sv, tmp_sv);
7328                     SvREFCNT_dec(tmp_sv);
7329                 }
7330
7331
7332                 /* Presume this is going to be a bareword of some sort. */
7333                 CLINE;
7334                 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
7335                 pl_yylval.opval->op_private = OPpCONST_BARE;
7336
7337                 /* And if "Foo::", then that's what it certainly is. */
7338                 if (safebw)
7339                     goto safe_bareword;
7340
7341                 if (!off)
7342                 {
7343                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7344                     const_op->op_private = OPpCONST_BARE;
7345                     rv2cv_op =
7346                         newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7347                     cv = lex
7348                         ? isGV(gv)
7349                             ? GvCV(gv)
7350                             : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
7351                                 ? (CV *)SvRV(gv)
7352                                 : ((CV *)gv)
7353                         : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
7354                 }
7355
7356                 /* Use this var to track whether intuit_method has been
7357                    called.  intuit_method returns 0 or > 255.  */
7358                 tmp = 1;
7359
7360                 /* See if it's the indirect object for a list operator. */
7361
7362                 if (PL_oldoldbufptr
7363                     && PL_oldoldbufptr < PL_bufptr
7364                     && (PL_oldoldbufptr == PL_last_lop
7365                         || PL_oldoldbufptr == PL_last_uni)
7366                     && /* NO SKIPSPACE BEFORE HERE! */
7367                        (PL_expect == XREF
7368                         || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7369                                                                == OA_FILEREF))
7370                 {
7371                     bool immediate_paren = *s == '(';
7372                     SSize_t s_off;
7373
7374                     /* (Now we can afford to cross potential line boundary.) */
7375                     s = skipspace(s);
7376
7377                     /* intuit_method() can indirectly call lex_next_chunk(),
7378                      * invalidating s
7379                      */
7380                     s_off = s - SvPVX(PL_linestr);
7381                     /* Two barewords in a row may indicate method call. */
7382                     if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7383                             || *s == '$')
7384                         && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
7385                     {
7386                         /* the code at method: doesn't use s */
7387                         goto method;
7388                     }
7389                     s = SvPVX(PL_linestr) + s_off;
7390
7391                     /* If not a declared subroutine, it's an indirect object. */
7392                     /* (But it's an indir obj regardless for sort.) */
7393                     /* Also, if "_" follows a filetest operator, it's a bareword */
7394
7395                     if (
7396                         ( !immediate_paren && (PL_last_lop_op == OP_SORT
7397                          || (!cv
7398                              && (PL_last_lop_op != OP_MAPSTART
7399                                  && PL_last_lop_op != OP_GREPSTART))))
7400                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7401                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7402                                                             == OA_FILESTATOP))
7403                        )
7404                     {
7405                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7406                         goto bareword;
7407                     }
7408                 }
7409
7410                 PL_expect = XOPERATOR;
7411                 s = skipspace(s);
7412
7413                 /* Is this a word before a => operator? */
7414                 if (*s == '=' && s[1] == '>' && !pkgname) {
7415                     op_free(rv2cv_op);
7416                     CLINE;
7417                     if (gvp || (lex && !off)) {
7418                         assert (cSVOPx(pl_yylval.opval)->op_sv == sv);
7419                         /* This is our own scalar, created a few lines
7420                            above, so this is safe. */
7421                         SvREADONLY_off(sv);
7422                         sv_setpv(sv, PL_tokenbuf);
7423                         if (UTF && !IN_BYTES
7424                          && is_utf8_string((U8*)PL_tokenbuf, len))
7425                               SvUTF8_on(sv);
7426                         SvREADONLY_on(sv);
7427                     }
7428                     TERM(BAREWORD);
7429                 }
7430
7431                 /* If followed by a paren, it's certainly a subroutine. */
7432                 if (*s == '(') {
7433                     CLINE;
7434                     if (cv) {
7435                         d = s + 1;
7436                         while (SPACE_OR_TAB(*d))
7437                             d++;
7438                         if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7439                             s = d + 1;
7440                             goto its_constant;
7441                         }
7442                     }
7443                     NEXTVAL_NEXTTOKE.opval =
7444                         off ? rv2cv_op : pl_yylval.opval;
7445                     if (off)
7446                          op_free(pl_yylval.opval), force_next(PRIVATEREF);
7447                     else op_free(rv2cv_op),        force_next(BAREWORD);
7448                     pl_yylval.ival = 0;
7449                     TOKEN('&');
7450                 }
7451
7452                 /* If followed by var or block, call it a method (unless sub) */
7453
7454                 if ((*s == '$' || *s == '{') && !cv) {
7455                     op_free(rv2cv_op);
7456                     PL_last_lop = PL_oldbufptr;
7457                     PL_last_lop_op = OP_METHOD;
7458                     if (!PL_lex_allbrackets
7459                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7460                     {
7461                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7462                     }
7463                     PL_expect = XBLOCKTERM;
7464                     PL_bufptr = s;
7465                     return REPORT(METHOD);
7466                 }
7467
7468                 /* If followed by a bareword, see if it looks like indir obj. */
7469
7470                 if (   tmp == 1
7471                     && !orig_keyword
7472                     && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7473                     && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
7474                 {
7475                   method:
7476                     if (lex && !off) {
7477                         assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
7478                         SvREADONLY_off(sv);
7479                         sv_setpvn(sv, PL_tokenbuf, len);
7480                         if (UTF && !IN_BYTES
7481                          && is_utf8_string((U8*)PL_tokenbuf, len))
7482                             SvUTF8_on (sv);
7483                         else SvUTF8_off(sv);
7484                     }
7485                     op_free(rv2cv_op);
7486                     if (tmp == METHOD && !PL_lex_allbrackets
7487                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7488                     {
7489                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7490                     }
7491                     return REPORT(tmp);
7492                 }
7493
7494                 /* Not a method, so call it a subroutine (if defined) */
7495
7496                 if (cv) {
7497                     /* Check for a constant sub */
7498                     if ((sv = cv_const_sv_or_av(cv))) {
7499                   its_constant:
7500                         op_free(rv2cv_op);
7501                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7502                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7503                         if (SvTYPE(sv) == SVt_PVAV)
7504                             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7505                                                       pl_yylval.opval);
7506                         else {
7507                             pl_yylval.opval->op_private = 0;
7508                             pl_yylval.opval->op_folded = 1;
7509                             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7510                         }
7511                         TOKEN(BAREWORD);
7512                     }
7513
7514                     op_free(pl_yylval.opval);
7515                     pl_yylval.opval =
7516                         off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7517                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7518                     PL_last_lop = PL_oldbufptr;
7519                     PL_last_lop_op = OP_ENTERSUB;
7520                     /* Is there a prototype? */
7521                     if (
7522                         SvPOK(cv))
7523                     {
7524                         STRLEN protolen = CvPROTOLEN(cv);
7525                         const char *proto = CvPROTO(cv);
7526                         bool optional;
7527                         proto = S_strip_spaces(aTHX_ proto, &protolen);
7528                         if (!protolen)
7529                             TERM(FUNC0SUB);
7530                         if ((optional = *proto == ';'))
7531                           do
7532                             proto++;
7533                           while (*proto == ';');
7534                         if (
7535                             (
7536                                 (
7537                                     *proto == '$' || *proto == '_'
7538                                  || *proto == '*' || *proto == '+'
7539                                 )
7540                              && proto[1] == '\0'
7541                             )
7542                          || (
7543                              *proto == '\\' && proto[1] && proto[2] == '\0'
7544                             )
7545                         )
7546                             UNIPROTO(UNIOPSUB,optional);
7547                         if (*proto == '\\' && proto[1] == '[') {
7548                             const char *p = proto + 2;
7549                             while(*p && *p != ']')
7550                                 ++p;
7551                             if(*p == ']' && !p[1])
7552                                 UNIPROTO(UNIOPSUB,optional);
7553                         }
7554                         if (*proto == '&' && *s == '{') {
7555                             if (PL_curstash)
7556                                 sv_setpvs(PL_subname, "__ANON__");
7557                             else
7558                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7559                             if (!PL_lex_allbrackets
7560                                 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7561                             {
7562                                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7563                             }
7564                             PREBLOCK(LSTOPSUB);
7565                         }
7566                     }
7567                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7568                     PL_expect = XTERM;
7569                     force_next(off ? PRIVATEREF : BAREWORD);
7570                     if (!PL_lex_allbrackets
7571                         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7572                     {
7573                         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7574                     }
7575                     TOKEN(NOAMP);
7576                 }
7577
7578                 /* Call it a bare word */
7579
7580                 if (PL_hints & HINT_STRICT_SUBS)
7581                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
7582                 else {
7583                 bareword:
7584                     /* after "print" and similar functions (corresponding to
7585                      * "F? L" in opcode.pl), whatever wasn't already parsed as
7586                      * a filehandle should be subject to "strict subs".
7587                      * Likewise for the optional indirect-object argument to system
7588                      * or exec, which can't be a bareword */
7589                     if ((PL_last_lop_op == OP_PRINT
7590                             || PL_last_lop_op == OP_PRTF
7591                             || PL_last_lop_op == OP_SAY
7592                             || PL_last_lop_op == OP_SYSTEM
7593                             || PL_last_lop_op == OP_EXEC)
7594                             && (PL_hints & HINT_STRICT_SUBS))
7595                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7596                     if (lastchar != '-') {
7597                         if (ckWARN(WARN_RESERVED)) {
7598                             d = PL_tokenbuf;
7599                             while (isLOWER(*d))
7600                                 d++;
7601                             if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7602                             {
7603                                 /* PL_warn_reserved is constant */
7604                                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
7605                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7606                                        PL_tokenbuf);
7607                                 GCC_DIAG_RESTORE;
7608                             }
7609                         }
7610                     }
7611                 }
7612                 op_free(rv2cv_op);
7613
7614             safe_bareword:
7615                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7616                  && saw_infix_sigil) {
7617                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7618                                      "Operator or semicolon missing before %c%" UTF8f,
7619                                      lastchar,
7620                                      UTF8fARG(UTF, strlen(PL_tokenbuf),
7621                                               PL_tokenbuf));
7622                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7623                                      "Ambiguous use of %c resolved as operator %c",
7624                                      lastchar, lastchar);
7625                 }
7626                 TOKEN(BAREWORD);
7627             }
7628
7629         case KEY___FILE__:
7630             FUN0OP(
7631                 newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7632             );
7633
7634         case KEY___LINE__:
7635             FUN0OP(
7636                 newSVOP(OP_CONST, 0,
7637                     Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7638             );
7639
7640         case KEY___PACKAGE__:
7641             FUN0OP(
7642                 newSVOP(OP_CONST, 0,
7643                                         (PL_curstash
7644                                          ? newSVhek(HvNAME_HEK(PL_curstash))
7645                                          : &PL_sv_undef))
7646             );
7647
7648         case KEY___DATA__:
7649         case KEY___END__: {
7650             GV *gv;
7651             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7652                 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7653                                         ? PL_curstash
7654                                         : PL_defstash;
7655                 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7656                 if (!isGV(gv))
7657                     gv_init(gv,stash,"DATA",4,0);
7658                 GvMULTI_on(gv);
7659                 if (!GvIO(gv))
7660                     GvIOp(gv) = newIO();
7661                 IoIFP(GvIOp(gv)) = PL_rsfp;
7662 #if defined(HAS_FCNTL) && defined(F_SETFD) && defined(FD_CLOEXEC)
7663                 {
7664                     const int fd = PerlIO_fileno(PL_rsfp);
7665                     if (fd >= 3) {
7666                         fcntl(fd,F_SETFD, FD_CLOEXEC);
7667                     }
7668                 }
7669 #endif
7670                 /* Mark this internal pseudo-handle as clean */
7671                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7672                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7673                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7674                 else
7675                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7676 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7677                 /* if the script was opened in binmode, we need to revert
7678                  * it to text mode for compatibility; but only iff it has CRs
7679                  * XXX this is a questionable hack at best. */
7680                 if (PL_bufend-PL_bufptr > 2
7681                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7682                 {
7683                     Off_t loc = 0;
7684                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7685                         loc = PerlIO_tell(PL_rsfp);
7686                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
7687                     }
7688 #ifdef NETWARE
7689                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7690 #else
7691                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7692 #endif  /* NETWARE */
7693                         if (loc > 0)
7694                             PerlIO_seek(PL_rsfp, loc, 0);
7695                     }
7696                 }
7697 #endif
7698 #ifdef PERLIO_LAYERS
7699                 if (!IN_BYTES) {
7700                     if (UTF)
7701                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7702                 }
7703 #endif
7704                 PL_rsfp = NULL;
7705             }
7706             goto fake_eof;
7707         }
7708
7709         case KEY___SUB__:
7710             FUN0OP(CvCLONE(PL_compcv)
7711                         ? newOP(OP_RUNCV, 0)
7712                         : newPVOP(OP_RUNCV,0,NULL));
7713
7714         case KEY_AUTOLOAD:
7715         case KEY_DESTROY:
7716         case KEY_BEGIN:
7717         case KEY_UNITCHECK:
7718         case KEY_CHECK:
7719         case KEY_INIT:
7720         case KEY_END:
7721             if (PL_expect == XSTATE) {
7722                 s = PL_bufptr;
7723                 goto really_sub;
7724             }
7725             goto just_a_word;
7726
7727         case_KEY_CORE:
7728             {
7729                 STRLEN olen = len;
7730                 d = s;
7731                 s += 2;
7732                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7733                 if ((*s == ':' && s[1] == ':')
7734                  || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7735                 {
7736                     s = d;
7737                     len = olen;
7738                     Copy(PL_bufptr, PL_tokenbuf, olen, char);
7739                     goto just_a_word;
7740                 }
7741                 if (!tmp)
7742                     Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
7743                                       UTF8fARG(UTF, len, PL_tokenbuf));
7744                 if (tmp < 0)
7745                     tmp = -tmp;
7746                 else if (tmp == KEY_require || tmp == KEY_do
7747                       || tmp == KEY_glob)
7748                     /* that's a way to remember we saw "CORE::" */
7749                     orig_keyword = tmp;
7750                 goto reserved_word;
7751             }
7752
7753         case KEY_abs:
7754             UNI(OP_ABS);
7755
7756         case KEY_alarm:
7757             UNI(OP_ALARM);
7758
7759         case KEY_accept:
7760             LOP(OP_ACCEPT,XTERM);
7761
7762         case KEY_and:
7763             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7764                 return REPORT(0);
7765             OPERATOR(ANDOP);
7766
7767         case KEY_atan2:
7768             LOP(OP_ATAN2,XTERM);
7769
7770         case KEY_bind:
7771             LOP(OP_BIND,XTERM);
7772
7773         case KEY_binmode:
7774             LOP(OP_BINMODE,XTERM);
7775
7776         case KEY_bless:
7777             LOP(OP_BLESS,XTERM);
7778
7779         case KEY_break:
7780             FUN0(OP_BREAK);
7781
7782         case KEY_chop:
7783             UNI(OP_CHOP);
7784
7785         case KEY_continue:
7786                     /* We have to disambiguate the two senses of
7787                       "continue". If the next token is a '{' then
7788                       treat it as the start of a continue block;
7789                       otherwise treat it as a control operator.
7790                      */
7791                     s = skipspace(s);
7792                     if (*s == '{')
7793             PREBLOCK(CONTINUE);
7794                     else
7795                         FUN0(OP_CONTINUE);
7796
7797         case KEY_chdir:
7798             /* may use HOME */
7799             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7800             UNI(OP_CHDIR);
7801
7802         case KEY_close:
7803             UNI(OP_CLOSE);
7804
7805         case KEY_closedir:
7806             UNI(OP_CLOSEDIR);
7807
7808         case KEY_cmp:
7809             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7810                 return REPORT(0);
7811             Eop(OP_SCMP);
7812
7813         case KEY_caller:
7814             UNI(OP_CALLER);
7815
7816         case KEY_crypt:
7817 #ifdef FCRYPT
7818             if (!PL_cryptseen) {
7819                 PL_cryptseen = TRUE;
7820                 init_des();
7821             }
7822 #endif
7823             LOP(OP_CRYPT,XTERM);
7824
7825         case KEY_chmod:
7826             LOP(OP_CHMOD,XTERM);
7827
7828         case KEY_chown:
7829             LOP(OP_CHOWN,XTERM);
7830
7831         case KEY_connect:
7832             LOP(OP_CONNECT,XTERM);
7833
7834         case KEY_chr:
7835             UNI(OP_CHR);
7836
7837         case KEY_cos:
7838             UNI(OP_COS);
7839
7840         case KEY_chroot:
7841             UNI(OP_CHROOT);
7842
7843         case KEY_default:
7844             PREBLOCK(DEFAULT);
7845
7846         case KEY_do:
7847             s = skipspace(s);
7848             if (*s == '{')
7849                 PRETERMBLOCK(DO);
7850             if (*s != '\'') {
7851                 *PL_tokenbuf = '&';
7852                 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7853                               1, &len);
7854                 if (len && memNEs(PL_tokenbuf+1, len, "CORE")
7855                  && !keyword(PL_tokenbuf + 1, len, 0)) {
7856                     SSize_t off = s-SvPVX(PL_linestr);
7857                     d = skipspace(d);
7858                     s = SvPVX(PL_linestr)+off;
7859                     if (*d == '(') {
7860                         force_ident_maybe_lex('&');
7861                         s = d;
7862                     }
7863                 }
7864             }
7865             if (orig_keyword == KEY_do) {
7866                 orig_keyword = 0;
7867                 pl_yylval.ival = 1;
7868             }
7869             else
7870                 pl_yylval.ival = 0;
7871             OPERATOR(DO);
7872
7873         case KEY_die:
7874             PL_hints |= HINT_BLOCK_SCOPE;
7875             LOP(OP_DIE,XTERM);
7876
7877         case KEY_defined:
7878             UNI(OP_DEFINED);
7879
7880         case KEY_delete:
7881             UNI(OP_DELETE);
7882
7883         case KEY_dbmopen:
7884             Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7885                               STR_WITH_LEN("NDBM_File::"),
7886                               STR_WITH_LEN("DB_File::"),
7887                               STR_WITH_LEN("GDBM_File::"),
7888                               STR_WITH_LEN("SDBM_File::"),
7889                               STR_WITH_LEN("ODBM_File::"),
7890                               NULL);
7891             LOP(OP_DBMOPEN,XTERM);
7892
7893         case KEY_dbmclose:
7894             UNI(OP_DBMCLOSE);
7895
7896         case KEY_dump:
7897             LOOPX(OP_DUMP);
7898
7899         case KEY_else:
7900             PREBLOCK(ELSE);
7901
7902         case KEY_elsif:
7903             pl_yylval.ival = CopLINE(PL_curcop);
7904             OPERATOR(ELSIF);
7905
7906         case KEY_eq:
7907             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7908                 return REPORT(0);
7909             Eop(OP_SEQ);
7910
7911         case KEY_exists:
7912             UNI(OP_EXISTS);
7913
7914         case KEY_exit:
7915             UNI(OP_EXIT);
7916
7917         case KEY_eval:
7918             s = skipspace(s);
7919             if (*s == '{') { /* block eval */
7920                 PL_expect = XTERMBLOCK;
7921                 UNIBRACK(OP_ENTERTRY);
7922             }
7923             else { /* string eval */
7924                 PL_expect = XTERM;
7925                 UNIBRACK(OP_ENTEREVAL);
7926             }
7927
7928         case KEY_evalbytes:
7929             PL_expect = XTERM;
7930             UNIBRACK(-OP_ENTEREVAL);
7931
7932         case KEY_eof:
7933             UNI(OP_EOF);
7934
7935         case KEY_exp:
7936             UNI(OP_EXP);
7937
7938         case KEY_each:
7939             UNI(OP_EACH);
7940
7941         case KEY_exec:
7942             LOP(OP_EXEC,XREF);
7943
7944         case KEY_endhostent:
7945             FUN0(OP_EHOSTENT);
7946
7947         case KEY_endnetent:
7948             FUN0(OP_ENETENT);
7949
7950         case KEY_endservent:
7951             FUN0(OP_ESERVENT);
7952
7953         case KEY_endprotoent:
7954             FUN0(OP_EPROTOENT);
7955
7956         case KEY_endpwent:
7957             FUN0(OP_EPWENT);
7958
7959         case KEY_endgrent:
7960             FUN0(OP_EGRENT);
7961
7962         case KEY_for:
7963         case KEY_foreach:
7964             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7965                 return REPORT(0);
7966             pl_yylval.ival = CopLINE(PL_curcop);
7967             s = skipspace(s);
7968             if (   PL_expect == XSTATE
7969                 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
7970             {
7971                 char *p = s;
7972                 SSize_t s_off = s - SvPVX(PL_linestr);
7973
7974                 if (   memBEGINPs(p, (STRLEN) (PL_bufend - p), "my")
7975                     && isSPACE(*(p + 2)))
7976                 {
7977                     p += 2;
7978                 }
7979                 else if (   memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")
7980                          && isSPACE(*(p + 3)))
7981                 {
7982                     p += 3;
7983                 }
7984
7985                 p = skipspace(p);
7986                 /* skip optional package name, as in "for my abc $x (..)" */
7987                 if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
7988                     p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7989                     p = skipspace(p);
7990                 }
7991                 if (*p != '$' && *p != '\\')
7992                     Perl_croak(aTHX_ "Missing $ on loop variable");
7993
7994                 /* The buffer may have been reallocated, update s */
7995                 s = SvPVX(PL_linestr) + s_off;
7996             }
7997             OPERATOR(FOR);
7998
7999         case KEY_formline:
8000             LOP(OP_FORMLINE,XTERM);
8001
8002         case KEY_fork:
8003             FUN0(OP_FORK);
8004
8005         case KEY_fc:
8006             UNI(OP_FC);
8007
8008         case KEY_fcntl:
8009             LOP(OP_FCNTL,XTERM);
8010
8011         case KEY_fileno:
8012             UNI(OP_FILENO);
8013
8014         case KEY_flock:
8015             LOP(OP_FLOCK,XTERM);
8016
8017         case KEY_gt:
8018             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8019                 return REPORT(0);
8020             Rop(OP_SGT);
8021
8022         case KEY_ge:
8023             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8024                 return REPORT(0);
8025             Rop(OP_SGE);
8026
8027         case KEY_grep:
8028             LOP(OP_GREPSTART, XREF);
8029
8030         case KEY_goto:
8031             LOOPX(OP_GOTO);
8032
8033         case KEY_gmtime:
8034             UNI(OP_GMTIME);
8035
8036         case KEY_getc:
8037             UNIDOR(OP_GETC);
8038
8039         case KEY_getppid:
8040             FUN0(OP_GETPPID);
8041
8042         case KEY_getpgrp:
8043             UNI(OP_GETPGRP);
8044
8045         case KEY_getpriority:
8046             LOP(OP_GETPRIORITY,XTERM);
8047
8048         case KEY_getprotobyname:
8049             UNI(OP_GPBYNAME);
8050
8051         case KEY_getprotobynumber:
8052             LOP(OP_GPBYNUMBER,XTERM);
8053
8054         case KEY_getprotoent:
8055             FUN0(OP_GPROTOENT);
8056
8057         case KEY_getpwent:
8058             FUN0(OP_GPWENT);
8059
8060         case KEY_getpwnam:
8061             UNI(OP_GPWNAM);
8062
8063         case KEY_getpwuid:
8064             UNI(OP_GPWUID);
8065
8066         case KEY_getpeername:
8067             UNI(OP_GETPEERNAME);
8068
8069         case KEY_gethostbyname:
8070             UNI(OP_GHBYNAME);
8071
8072         case KEY_gethostbyaddr:
8073             LOP(OP_GHBYADDR,XTERM);
8074
8075         case KEY_gethostent:
8076             FUN0(OP_GHOSTENT);
8077
8078         case KEY_getnetbyname:
8079             UNI(OP_GNBYNAME);
8080
8081         case KEY_getnetbyaddr:
8082             LOP(OP_GNBYADDR,XTERM);
8083
8084         case KEY_getnetent:
8085             FUN0(OP_GNETENT);
8086
8087         case KEY_getservbyname:
8088             LOP(OP_GSBYNAME,XTERM);
8089
8090         case KEY_getservbyport:
8091             LOP(OP_GSBYPORT,XTERM);
8092
8093         case KEY_getservent:
8094             FUN0(OP_GSERVENT);
8095
8096         case KEY_getsockname:
8097             UNI(OP_GETSOCKNAME);
8098
8099         case KEY_getsockopt:
8100             LOP(OP_GSOCKOPT,XTERM);
8101
8102         case KEY_getgrent:
8103             FUN0(OP_GGRENT);
8104
8105         case KEY_getgrnam:
8106             UNI(OP_GGRNAM);
8107
8108         case KEY_getgrgid:
8109             UNI(OP_GGRGID);
8110
8111         case KEY_getlogin:
8112             FUN0(OP_GETLOGIN);
8113
8114         case KEY_given:
8115             pl_yylval.ival = CopLINE(PL_curcop);
8116             Perl_ck_warner_d(aTHX_
8117                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8118                 "given is experimental");
8119             OPERATOR(GIVEN);
8120
8121         case KEY_glob:
8122             LOP(
8123              orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
8124              XTERM
8125             );
8126
8127         case KEY_hex:
8128             UNI(OP_HEX);
8129
8130         case KEY_if:
8131             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8132                 return REPORT(0);
8133             pl_yylval.ival = CopLINE(PL_curcop);
8134             OPERATOR(IF);
8135
8136         case KEY_index:
8137             LOP(OP_INDEX,XTERM);
8138
8139         case KEY_int:
8140             UNI(OP_INT);
8141
8142         case KEY_ioctl:
8143             LOP(OP_IOCTL,XTERM);
8144
8145         case KEY_join:
8146             LOP(OP_JOIN,XTERM);
8147
8148         case KEY_keys:
8149             UNI(OP_KEYS);
8150
8151         case KEY_kill:
8152             LOP(OP_KILL,XTERM);
8153
8154         case KEY_last:
8155             LOOPX(OP_LAST);
8156
8157         case KEY_lc:
8158             UNI(OP_LC);
8159
8160         case KEY_lcfirst:
8161             UNI(OP_LCFIRST);
8162
8163         case KEY_local:
8164             OPERATOR(LOCAL);
8165
8166         case KEY_length:
8167             UNI(OP_LENGTH);
8168
8169         case KEY_lt:
8170             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8171                 return REPORT(0);
8172             Rop(OP_SLT);
8173
8174         case KEY_le:
8175             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8176                 return REPORT(0);
8177             Rop(OP_SLE);
8178
8179         case KEY_localtime:
8180             UNI(OP_LOCALTIME);
8181
8182         case KEY_log:
8183             UNI(OP_LOG);
8184
8185         case KEY_link:
8186             LOP(OP_LINK,XTERM);
8187
8188         case KEY_listen:
8189             LOP(OP_LISTEN,XTERM);
8190
8191         case KEY_lock:
8192             UNI(OP_LOCK);
8193
8194         case KEY_lstat:
8195             UNI(OP_LSTAT);
8196
8197         case KEY_m:
8198             s = scan_pat(s,OP_MATCH);
8199             TERM(sublex_start());
8200
8201         case KEY_map:
8202             LOP(OP_MAPSTART, XREF);
8203
8204         case KEY_mkdir:
8205             LOP(OP_MKDIR,XTERM);
8206
8207         case KEY_msgctl:
8208             LOP(OP_MSGCTL,XTERM);
8209
8210         case KEY_msgget:
8211             LOP(OP_MSGGET,XTERM);
8212
8213         case KEY_msgrcv:
8214             LOP(OP_MSGRCV,XTERM);
8215
8216         case KEY_msgsnd:
8217             LOP(OP_MSGSND,XTERM);
8218
8219         case KEY_our:
8220         case KEY_my:
8221         case KEY_state:
8222             if (PL_in_my) {
8223                 PL_bufptr = s;
8224                 yyerror(Perl_form(aTHX_
8225                                   "Can't redeclare \"%s\" in \"%s\"",
8226                                    tmp      == KEY_my    ? "my" :
8227                                    tmp      == KEY_state ? "state" : "our",
8228                                    PL_in_my == KEY_my    ? "my" :
8229                                    PL_in_my == KEY_state ? "state" : "our"));
8230             }
8231             PL_in_my = (U16)tmp;
8232             s = skipspace(s);
8233             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8234                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8235                 if (memEQs(PL_tokenbuf, len, "sub"))
8236                     goto really_sub;
8237                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8238                 if (!PL_in_my_stash) {
8239                     char tmpbuf[1024];
8240                     int len;
8241                     PL_bufptr = s;
8242                     len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8243                     PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
8244                     yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8245                 }
8246             }
8247             else if (*s == '\\') {
8248                 if (!FEATURE_MYREF_IS_ENABLED)
8249                     Perl_croak(aTHX_ "The experimental declared_refs "
8250                                      "feature is not enabled");
8251                 Perl_ck_warner_d(aTHX_
8252                      packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
8253                     "Declaring references is experimental");
8254             }
8255             OPERATOR(MY);
8256
8257         case KEY_next:
8258             LOOPX(OP_NEXT);
8259
8260         case KEY_ne:
8261             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8262                 return REPORT(0);
8263             Eop(OP_SNE);
8264
8265         case KEY_no:
8266             s = tokenize_use(0, s);
8267             TOKEN(USE);
8268
8269         case KEY_not:
8270             if (*s == '(' || (s = skipspace(s), *s == '('))
8271                 FUN1(OP_NOT);
8272             else {
8273                 if (!PL_lex_allbrackets
8274                     && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8275                 {
8276                     PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8277                 }
8278                 OPERATOR(NOTOP);
8279             }
8280
8281         case KEY_open:
8282             s = skipspace(s);
8283             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8284                 const char *t;
8285                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8286                               &len);
8287                 for (t=d; isSPACE(*t);)
8288                     t++;
8289                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8290                     /* [perl #16184] */
8291                     && !(t[0] == '=' && t[1] == '>')
8292                     && !(t[0] == ':' && t[1] == ':')
8293                     && !keyword(s, d-s, 0)
8294                 ) {
8295                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8296                        "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8297                         UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8298                 }
8299             }
8300             LOP(OP_OPEN,XTERM);
8301
8302         case KEY_or:
8303             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8304                 return REPORT(0);
8305             pl_yylval.ival = OP_OR;
8306             OPERATOR(OROP);
8307
8308         case KEY_ord:
8309             UNI(OP_ORD);
8310
8311         case KEY_oct:
8312             UNI(OP_OCT);
8313
8314         case KEY_opendir:
8315             LOP(OP_OPEN_DIR,XTERM);
8316
8317         case KEY_print:
8318             checkcomma(s,PL_tokenbuf,"filehandle");
8319             LOP(OP_PRINT,XREF);
8320
8321         case KEY_printf:
8322             checkcomma(s,PL_tokenbuf,"filehandle");
8323             LOP(OP_PRTF,XREF);
8324
8325         case KEY_prototype:
8326             UNI(OP_PROTOTYPE);
8327
8328         case KEY_push:
8329             LOP(OP_PUSH,XTERM);
8330
8331         case KEY_pop:
8332             UNIDOR(OP_POP);
8333
8334         case KEY_pos:
8335             UNIDOR(OP_POS);
8336
8337         case KEY_pack:
8338             LOP(OP_PACK,XTERM);
8339
8340         case KEY_package:
8341             s = force_word(s,BAREWORD,FALSE,TRUE);
8342             s = skipspace(s);
8343             s = force_strict_version(s);
8344             PREBLOCK(PACKAGE);
8345
8346         case KEY_pipe:
8347             LOP(OP_PIPE_OP,XTERM);
8348
8349         case KEY_q:
8350             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8351             if (!s)
8352                 missingterm(NULL, 0);
8353             COPLINE_SET_FROM_MULTI_END;
8354             pl_yylval.ival = OP_CONST;
8355             TERM(sublex_start());
8356
8357         case KEY_quotemeta:
8358             UNI(OP_QUOTEMETA);
8359
8360         case KEY_qw: {
8361             OP *words = NULL;
8362             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8363             if (!s)
8364                 missingterm(NULL, 0);
8365             COPLINE_SET_FROM_MULTI_END;
8366             PL_expect = XOPERATOR;
8367             if (SvCUR(PL_lex_stuff)) {
8368                 int warned_comma = !ckWARN(WARN_QW);
8369                 int warned_comment = warned_comma;
8370                 d = SvPV_force(PL_lex_stuff, len);
8371                 while (len) {
8372                     for (; isSPACE(*d) && len; --len, ++d)
8373                         /**/;
8374                     if (len) {
8375                         SV *sv;
8376                         const char *b = d;
8377                         if (!warned_comma || !warned_comment) {
8378                             for (; !isSPACE(*d) && len; --len, ++d) {
8379                                 if (!warned_comma && *d == ',') {
8380                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8381                                         "Possible attempt to separate words with commas");
8382                                     ++warned_comma;
8383                                 }
8384                                 else if (!warned_comment && *d == '#') {
8385                                     Perl_warner(aTHX_ packWARN(WARN_QW),
8386                                         "Possible attempt to put comments in qw() list");
8387                                     ++warned_comment;
8388                                 }
8389                             }
8390                         }
8391                         else {
8392                             for (; !isSPACE(*d) && len; --len, ++d)
8393                                 /**/;
8394                         }
8395                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8396                         words = op_append_elem(OP_LIST, words,
8397                                             newSVOP(OP_CONST, 0, tokeq(sv)));
8398                     }
8399                 }
8400             }
8401             if (!words)
8402                 words = newNULLLIST();
8403             SvREFCNT_dec_NN(PL_lex_stuff);
8404             PL_lex_stuff = NULL;
8405             PL_expect = XOPERATOR;
8406             pl_yylval.opval = sawparens(words);
8407             TOKEN(QWLIST);
8408         }
8409
8410         case KEY_qq:
8411             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8412             if (!s)
8413                 missingterm(NULL, 0);
8414             pl_yylval.ival = OP_STRINGIFY;
8415             if (SvIVX(PL_lex_stuff) == '\'')
8416                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should interpolate */
8417             TERM(sublex_start());
8418
8419         case KEY_qr:
8420             s = scan_pat(s,OP_QR);
8421             TERM(sublex_start());
8422
8423         case KEY_qx:
8424             s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8425             if (!s)
8426                 missingterm(NULL, 0);
8427             pl_yylval.ival = OP_BACKTICK;
8428             TERM(sublex_start());
8429
8430         case KEY_return:
8431             OLDLOP(OP_RETURN);
8432
8433         case KEY_require:
8434             s = skipspace(s);
8435             if (isDIGIT(*s)) {
8436                 s = force_version(s, FALSE);
8437             }
8438             else if (*s != 'v' || !isDIGIT(s[1])
8439                     || (s = force_version(s, TRUE), *s == 'v'))
8440             {
8441                 *PL_tokenbuf = '\0';
8442                 s = force_word(s,BAREWORD,TRUE,TRUE);
8443                 if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
8444                                            PL_tokenbuf + sizeof(PL_tokenbuf),
8445                                            UTF))
8446                 {
8447                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8448                                 GV_ADD | (UTF ? SVf_UTF8 : 0));
8449                 }
8450                 else if (*s == '<')
8451                     yyerror("<> at require-statement should be quotes");
8452             }
8453             if (orig_keyword == KEY_require) {
8454                 orig_keyword = 0;
8455                 pl_yylval.ival = 1;
8456             }
8457             else
8458                 pl_yylval.ival = 0;
8459             PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
8460             PL_bufptr = s;
8461             PL_last_uni = PL_oldbufptr;
8462             PL_last_lop_op = OP_REQUIRE;
8463             s = skipspace(s);
8464             return REPORT( (int)REQUIRE );
8465
8466         case KEY_reset:
8467             UNI(OP_RESET);
8468
8469         case KEY_redo:
8470             LOOPX(OP_REDO);
8471
8472         case KEY_rename:
8473             LOP(OP_RENAME,XTERM);
8474
8475         case KEY_rand:
8476             UNI(OP_RAND);
8477
8478         case KEY_rmdir:
8479             UNI(OP_RMDIR);
8480
8481         case KEY_rindex:
8482             LOP(OP_RINDEX,XTERM);
8483
8484         case KEY_read:
8485             LOP(OP_READ,XTERM);
8486
8487         case KEY_readdir:
8488             UNI(OP_READDIR);
8489
8490         case KEY_readline:
8491             UNIDOR(OP_READLINE);
8492
8493         case KEY_readpipe:
8494             UNIDOR(OP_BACKTICK);
8495
8496         case KEY_rewinddir:
8497             UNI(OP_REWINDDIR);
8498
8499         case KEY_recv:
8500             LOP(OP_RECV,XTERM);
8501
8502         case KEY_reverse:
8503             LOP(OP_REVERSE,XTERM);
8504
8505         case KEY_readlink:
8506             UNIDOR(OP_READLINK);
8507
8508         case KEY_ref:
8509             UNI(OP_REF);
8510
8511         case KEY_s:
8512             s = scan_subst(s);
8513             if (pl_yylval.opval)
8514                 TERM(sublex_start());
8515             else
8516                 TOKEN(1);       /* force error */
8517
8518         case KEY_say:
8519             checkcomma(s,PL_tokenbuf,"filehandle");
8520             LOP(OP_SAY,XREF);
8521
8522         case KEY_chomp:
8523             UNI(OP_CHOMP);
8524
8525         case KEY_scalar:
8526             UNI(OP_SCALAR);
8527
8528         case KEY_select:
8529             LOP(OP_SELECT,XTERM);
8530
8531         case KEY_seek:
8532             LOP(OP_SEEK,XTERM);
8533
8534         case KEY_semctl:
8535             LOP(OP_SEMCTL,XTERM);
8536
8537         case KEY_semget:
8538             LOP(OP_SEMGET,XTERM);
8539
8540         case KEY_semop:
8541             LOP(OP_SEMOP,XTERM);
8542
8543         case KEY_send:
8544             LOP(OP_SEND,XTERM);
8545
8546         case KEY_setpgrp:
8547             LOP(OP_SETPGRP,XTERM);
8548
8549         case KEY_setpriority:
8550             LOP(OP_SETPRIORITY,XTERM);
8551
8552         case KEY_sethostent:
8553             UNI(OP_SHOSTENT);
8554
8555         case KEY_setnetent:
8556             UNI(OP_SNETENT);
8557
8558         case KEY_setservent:
8559             UNI(OP_SSERVENT);
8560
8561         case KEY_setprotoent:
8562             UNI(OP_SPROTOENT);
8563
8564         case KEY_setpwent:
8565             FUN0(OP_SPWENT);
8566
8567         case KEY_setgrent:
8568             FUN0(OP_SGRENT);
8569
8570         case KEY_seekdir:
8571             LOP(OP_SEEKDIR,XTERM);
8572
8573         case KEY_setsockopt:
8574             LOP(OP_SSOCKOPT,XTERM);
8575
8576         case KEY_shift:
8577             UNIDOR(OP_SHIFT);
8578
8579         case KEY_shmctl:
8580             LOP(OP_SHMCTL,XTERM);
8581
8582         case KEY_shmget:
8583             LOP(OP_SHMGET,XTERM);
8584
8585         case KEY_shmread:
8586             LOP(OP_SHMREAD,XTERM);
8587
8588         case KEY_shmwrite:
8589             LOP(OP_SHMWRITE,XTERM);
8590
8591         case KEY_shutdown:
8592             LOP(OP_SHUTDOWN,XTERM);
8593
8594         case KEY_sin:
8595             UNI(OP_SIN);
8596
8597         case KEY_sleep:
8598             UNI(OP_SLEEP);
8599
8600         case KEY_socket:
8601             LOP(OP_SOCKET,XTERM);
8602
8603         case KEY_socketpair:
8604             LOP(OP_SOCKPAIR,XTERM);
8605
8606         case KEY_sort:
8607             checkcomma(s,PL_tokenbuf,"subroutine name");
8608             s = skipspace(s);
8609             PL_expect = XTERM;
8610             s = force_word(s,BAREWORD,TRUE,TRUE);
8611             LOP(OP_SORT,XREF);
8612
8613         case KEY_split:
8614             LOP(OP_SPLIT,XTERM);
8615
8616         case KEY_sprintf:
8617             LOP(OP_SPRINTF,XTERM);
8618
8619         case KEY_splice:
8620             LOP(OP_SPLICE,XTERM);
8621
8622         case KEY_sqrt:
8623             UNI(OP_SQRT);
8624
8625         case KEY_srand:
8626             UNI(OP_SRAND);
8627
8628         case KEY_stat:
8629             UNI(OP_STAT);
8630
8631         case KEY_study:
8632             UNI(OP_STUDY);
8633
8634         case KEY_substr:
8635             LOP(OP_SUBSTR,XTERM);
8636
8637         case KEY_format:
8638         case KEY_sub:
8639           really_sub:
8640             {
8641                 char * const tmpbuf = PL_tokenbuf + 1;
8642                 expectation attrful;
8643                 bool have_name, have_proto;
8644                 const int key = tmp;
8645                 SV *format_name = NULL;
8646
8647                 SSize_t off = s-SvPVX(PL_linestr);
8648                 s = skipspace(s);
8649                 d = SvPVX(PL_linestr)+off;
8650
8651                 if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
8652                     || *s == '\''
8653                     || (*s == ':' && s[1] == ':'))
8654                 {
8655
8656                     PL_expect = XBLOCK;
8657                     attrful = XATTRBLOCK;
8658                     d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8659                                   &len);
8660                     if (key == KEY_format)
8661                         format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8662                     *PL_tokenbuf = '&';
8663                     if (memchr(tmpbuf, ':', len) || key != KEY_sub
8664                      || pad_findmy_pvn(
8665                             PL_tokenbuf, len + 1, 0
8666                         ) != NOT_IN_PAD)
8667                         sv_setpvn(PL_subname, tmpbuf, len);
8668                     else {
8669                         sv_setsv(PL_subname,PL_curstname);
8670                         sv_catpvs(PL_subname,"::");
8671                         sv_catpvn(PL_subname,tmpbuf,len);
8672                     }
8673                     if (SvUTF8(PL_linestr))
8674                         SvUTF8_on(PL_subname);
8675                     have_name = TRUE;
8676
8677
8678                     s = skipspace(d);
8679                 }
8680                 else {
8681                     if (key == KEY_my || key == KEY_our || key==KEY_state)
8682                     {
8683                         *d = '\0';
8684                         /* diag_listed_as: Missing name in "%s sub" */
8685                         Perl_croak(aTHX_
8686                                   "Missing name in \"%s\"", PL_bufptr);
8687                     }
8688                     PL_expect = XTERMBLOCK;
8689                     attrful = XATTRTERM;
8690                     sv_setpvs(PL_subname,"?");
8691                     have_name = FALSE;
8692                 }
8693
8694                 if (key == KEY_format) {
8695                     if (format_name) {
8696                         NEXTVAL_NEXTTOKE.opval
8697                             = newSVOP(OP_CONST,0, format_name);
8698                         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8699                         force_next(BAREWORD);
8700                     }
8701                     PREBLOCK(FORMAT);
8702                 }
8703
8704                 /* Look for a prototype */
8705                 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
8706                     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8707                     COPLINE_SET_FROM_MULTI_END;
8708                     if (!s)
8709                         Perl_croak(aTHX_ "Prototype not terminated");
8710                     (void)validate_proto(PL_subname, PL_lex_stuff,
8711                                          ckWARN(WARN_ILLEGALPROTO), 0);
8712                     have_proto = TRUE;
8713
8714                     s = skipspace(s);
8715                 }
8716                 else
8717                     have_proto = FALSE;
8718
8719                 if (*s == ':' && s[1] != ':')
8720                     PL_expect = attrful;
8721                 else if ((*s != '{' && *s != '(') && key != KEY_format) {
8722                     assert(key == KEY_sub || key == KEY_AUTOLOAD ||
8723                            key == KEY_DESTROY || key == KEY_BEGIN ||
8724                            key == KEY_UNITCHECK || key == KEY_CHECK ||
8725                            key == KEY_INIT || key == KEY_END ||
8726                            key == KEY_my || key == KEY_state ||
8727                            key == KEY_our);
8728                     if (!have_name)
8729                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8730                     else if (*s != ';' && *s != '}')
8731                         Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
8732                 }
8733
8734                 if (have_proto) {
8735                     NEXTVAL_NEXTTOKE.opval =
8736                         newSVOP(OP_CONST, 0, PL_lex_stuff);
8737                     PL_lex_stuff = NULL;
8738                     force_next(THING);
8739                 }
8740                 if (!have_name) {
8741                     if (PL_curstash)
8742                         sv_setpvs(PL_subname, "__ANON__");
8743                     else
8744                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
8745                     TOKEN(ANONSUB);
8746                 }
8747                 force_ident_maybe_lex('&');
8748                 TOKEN(SUB);
8749             }
8750
8751         case KEY_system:
8752             LOP(OP_SYSTEM,XREF);
8753
8754         case KEY_symlink:
8755             LOP(OP_SYMLINK,XTERM);
8756
8757         case KEY_syscall:
8758             LOP(OP_SYSCALL,XTERM);
8759
8760         case KEY_sysopen:
8761             LOP(OP_SYSOPEN,XTERM);
8762
8763         case KEY_sysseek:
8764             LOP(OP_SYSSEEK,XTERM);
8765
8766         case KEY_sysread:
8767             LOP(OP_SYSREAD,XTERM);
8768
8769         case KEY_syswrite:
8770             LOP(OP_SYSWRITE,XTERM);
8771
8772         case KEY_tr:
8773         case KEY_y:
8774             s = scan_trans(s);
8775             TERM(sublex_start());
8776
8777         case KEY_tell:
8778             UNI(OP_TELL);
8779
8780         case KEY_telldir:
8781             UNI(OP_TELLDIR);
8782
8783         case KEY_tie:
8784             LOP(OP_TIE,XTERM);
8785
8786         case KEY_tied:
8787             UNI(OP_TIED);
8788
8789         case KEY_time:
8790             FUN0(OP_TIME);
8791
8792         case KEY_times:
8793             FUN0(OP_TMS);
8794
8795         case KEY_truncate:
8796             LOP(OP_TRUNCATE,XTERM);
8797
8798         case KEY_uc:
8799             UNI(OP_UC);
8800
8801         case KEY_ucfirst:
8802             UNI(OP_UCFIRST);
8803
8804         case KEY_untie:
8805             UNI(OP_UNTIE);
8806
8807         case KEY_until:
8808             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8809                 return REPORT(0);
8810             pl_yylval.ival = CopLINE(PL_curcop);
8811             OPERATOR(UNTIL);
8812
8813         case KEY_unless:
8814             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8815                 return REPORT(0);
8816             pl_yylval.ival = CopLINE(PL_curcop);
8817             OPERATOR(UNLESS);
8818
8819         case KEY_unlink:
8820             LOP(OP_UNLINK,XTERM);
8821
8822         case KEY_undef:
8823             UNIDOR(OP_UNDEF);
8824
8825         case KEY_unpack:
8826             LOP(OP_UNPACK,XTERM);
8827
8828         case KEY_utime:
8829             LOP(OP_UTIME,XTERM);
8830
8831         case KEY_umask:
8832             UNIDOR(OP_UMASK);
8833
8834         case KEY_unshift:
8835             LOP(OP_UNSHIFT,XTERM);
8836
8837         case KEY_use:
8838             s = tokenize_use(1, s);
8839             TOKEN(USE);
8840
8841         case KEY_values:
8842             UNI(OP_VALUES);
8843
8844         case KEY_vec:
8845             LOP(OP_VEC,XTERM);
8846
8847         case KEY_when:
8848             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8849                 return REPORT(0);
8850             pl_yylval.ival = CopLINE(PL_curcop);
8851             Perl_ck_warner_d(aTHX_
8852                 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8853                 "when is experimental");
8854             OPERATOR(WHEN);
8855
8856         case KEY_while:
8857             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8858                 return REPORT(0);
8859             pl_yylval.ival = CopLINE(PL_curcop);
8860             OPERATOR(WHILE);
8861
8862         case KEY_warn:
8863             PL_hints |= HINT_BLOCK_SCOPE;
8864             LOP(OP_WARN,XTERM);
8865
8866         case KEY_wait:
8867             FUN0(OP_WAIT);
8868
8869         case KEY_waitpid:
8870             LOP(OP_WAITPID,XTERM);
8871
8872         case KEY_wantarray:
8873             FUN0(OP_WANTARRAY);
8874
8875         case KEY_write:
8876             /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8877              * we use the same number on EBCDIC */
8878             gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8879             UNI(OP_ENTERWRITE);
8880
8881         case KEY_x:
8882             if (PL_expect == XOPERATOR) {
8883                 if (*s == '=' && !PL_lex_allbrackets
8884                     && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8885                 {
8886                     return REPORT(0);
8887                 }
8888                 Mop(OP_REPEAT);
8889             }
8890             check_uni();
8891             goto just_a_word;
8892
8893         case KEY_xor:
8894             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8895                 return REPORT(0);
8896             pl_yylval.ival = OP_XOR;
8897             OPERATOR(OROP);
8898         }
8899     }}
8900 }
8901
8902 /*
8903   S_pending_ident
8904
8905   Looks up an identifier in the pad or in a package
8906
8907   is_sig indicates that this is a subroutine signature variable
8908   rather than a plain pad var.
8909
8910   Returns:
8911     PRIVATEREF if this is a lexical name.
8912     BAREWORD   if this belongs to a package.
8913
8914   Structure:
8915       if we're in a my declaration
8916           croak if they tried to say my($foo::bar)
8917           build the ops for a my() declaration
8918       if it's an access to a my() variable
8919           build ops for access to a my() variable
8920       if in a dq string, and they've said @foo and we can't find @foo
8921           warn
8922       build ops for a bareword
8923 */
8924
8925 static int
8926 S_pending_ident(pTHX)
8927 {
8928     PADOFFSET tmp = 0;
8929     const char pit = (char)pl_yylval.ival;
8930     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8931     /* All routes through this function want to know if there is a colon.  */
8932     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8933
8934     DEBUG_T({ PerlIO_printf(Perl_debug_log,
8935           "### Pending identifier '%s'\n", PL_tokenbuf); });
8936
8937     /* if we're in a my(), we can't allow dynamics here.
8938        $foo'bar has already been turned into $foo::bar, so
8939        just check for colons.
8940
8941        if it's a legal name, the OP is a PADANY.
8942     */
8943     if (PL_in_my) {
8944         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
8945             if (has_colon)
8946                 /* diag_listed_as: No package name allowed for variable %s
8947                                    in "our" */
8948                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8949                                   "%se %s in \"our\"",
8950                                   *PL_tokenbuf=='&' ?"subroutin":"variabl",
8951                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8952             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8953         }
8954         else {
8955             OP *o;
8956             if (has_colon) {
8957                 /* "my" variable %s can't be in a package */
8958                 /* PL_no_myglob is constant */
8959                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
8960                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8961                             PL_in_my == KEY_my ? "my" : "state",
8962                             *PL_tokenbuf == '&' ? "subroutin" : "variabl",
8963                             PL_tokenbuf),
8964                             UTF ? SVf_UTF8 : 0);
8965                 GCC_DIAG_RESTORE;
8966             }
8967
8968             if (PL_in_my == KEY_sigvar) {
8969                 /* A signature 'padop' needs in addition, an op_first to
8970                  * point to a child sigdefelem, and an extra field to hold
8971                  * the signature index. We can achieve both by using an
8972                  * UNOP_AUX and (ab)using the op_aux field to hold the
8973                  * index. If we ever need more fields, use a real malloced
8974                  * aux strut instead.
8975                  */
8976                 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
8977                                     INT2PTR(UNOP_AUX_item *,
8978                                         (PL_parser->sig_elems)));
8979                 o->op_private |= (  PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
8980                                   : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
8981                                   :                         OPpARGELEM_HV);
8982             }
8983             else
8984                 o = newOP(OP_PADANY, 0);
8985             o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8986                                                         UTF ? SVf_UTF8 : 0);
8987             if (PL_in_my == KEY_sigvar)
8988                 PL_in_my = 0;
8989
8990             pl_yylval.opval = o;
8991             return PRIVATEREF;
8992         }
8993     }
8994
8995     /*
8996        build the ops for accesses to a my() variable.
8997     */
8998
8999     if (!has_colon) {
9000         if (!PL_in_my)
9001             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9002                                  0);
9003         if (tmp != NOT_IN_PAD) {
9004             /* might be an "our" variable" */
9005             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9006                 /* build ops for a bareword */
9007                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
9008                 HEK * const stashname = HvNAME_HEK(stash);
9009                 SV *  const sym = newSVhek(stashname);
9010                 sv_catpvs(sym, "::");
9011                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9012                 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9013                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9014                 if (pit != '&')
9015                   gv_fetchsv(sym,
9016                     GV_ADDMULTI,
9017                     ((PL_tokenbuf[0] == '$') ? SVt_PV
9018                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9019                      : SVt_PVHV));
9020                 return BAREWORD;
9021             }
9022
9023             pl_yylval.opval = newOP(OP_PADANY, 0);
9024             pl_yylval.opval->op_targ = tmp;
9025             return PRIVATEREF;
9026         }
9027     }
9028
9029     /*
9030        Whine if they've said @foo or @foo{key} in a doublequoted string,
9031        and @foo (or %foo) isn't a variable we can find in the symbol
9032        table.
9033     */
9034     if (ckWARN(WARN_AMBIGUOUS)
9035         && pit == '@'
9036         && PL_lex_state != LEX_NORMAL
9037         && !PL_lex_brackets)
9038     {
9039         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
9040                                          ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9041                                          SVt_PVAV);
9042         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9043            )
9044         {
9045             /* Downgraded from fatal to warning 20000522 mjd */
9046             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9047                         "Possible unintended interpolation of %" UTF8f
9048                         " in string",
9049                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9050         }
9051     }
9052
9053     /* build ops for a bareword */
9054     pl_yylval.opval = newSVOP(OP_CONST, 0,
9055                                    newSVpvn_flags(PL_tokenbuf + 1,
9056                                                       tokenbuf_len - 1,
9057                                                       UTF ? SVf_UTF8 : 0 ));
9058     pl_yylval.opval->op_private = OPpCONST_ENTERED;
9059     if (pit != '&')
9060         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
9061                      (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9062                      | ( UTF ? SVf_UTF8 : 0 ),
9063                      ((PL_tokenbuf[0] == '$') ? SVt_PV
9064                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9065                       : SVt_PVHV));
9066     return BAREWORD;
9067 }
9068
9069 STATIC void
9070 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9071 {
9072     PERL_ARGS_ASSERT_CHECKCOMMA;
9073
9074     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
9075         if (ckWARN(WARN_SYNTAX)) {
9076             int level = 1;
9077             const char *w;
9078             for (w = s+2; *w && level; w++) {
9079                 if (*w == '(')
9080                     ++level;
9081                 else if (*w == ')')
9082                     --level;
9083             }
9084             while (isSPACE(*w))
9085                 ++w;
9086             /* the list of chars below is for end of statements or
9087              * block / parens, boolean operators (&&, ||, //) and branch
9088              * constructs (or, and, if, until, unless, while, err, for).
9089              * Not a very solid hack... */
9090             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9091                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9092                             "%s (...) interpreted as function",name);
9093         }
9094     }
9095     while (s < PL_bufend && isSPACE(*s))
9096         s++;
9097     if (*s == '(')
9098         s++;
9099     while (s < PL_bufend && isSPACE(*s))
9100         s++;
9101     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9102         const char * const w = s;
9103         s += UTF ? UTF8SKIP(s) : 1;
9104         while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9105             s += UTF ? UTF8SKIP(s) : 1;
9106         while (s < PL_bufend && isSPACE(*s))
9107             s++;
9108         if (*s == ',') {
9109             GV* gv;
9110             if (keyword(w, s - w, 0))
9111                 return;
9112
9113             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9114             if (gv && GvCVu(gv))
9115                 return;
9116             if (s - w <= 254) {
9117                 PADOFFSET off;
9118                 char tmpbuf[256];
9119                 Copy(w, tmpbuf+1, s - w, char);
9120                 *tmpbuf = '&';
9121                 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9122                 if (off != NOT_IN_PAD) return;
9123             }
9124             Perl_croak(aTHX_ "No comma allowed after %s", what);
9125         }
9126     }
9127 }
9128
9129 /* S_new_constant(): do any overload::constant lookup.
9130
9131    Either returns sv, or mortalizes/frees sv and returns a new SV*.
9132    Best used as sv=new_constant(..., sv, ...).
9133    If s, pv are NULL, calls subroutine with one argument,
9134    and <type> is used with error messages only.
9135    <type> is assumed to be well formed UTF-8 */
9136
9137 STATIC SV *
9138 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9139                SV *sv, SV *pv, const char *type, STRLEN typelen)
9140 {
9141     dSP;
9142     HV * table = GvHV(PL_hintgv);                /* ^H */
9143     SV *res;
9144     SV *errsv = NULL;
9145     SV **cvp;
9146     SV *cv, *typesv;
9147     const char *why1 = "", *why2 = "", *why3 = "";
9148
9149     PERL_ARGS_ASSERT_NEW_CONSTANT;
9150     /* We assume that this is true: */
9151     if (*key == 'c') { assert (strEQ(key, "charnames")); }
9152     assert(type || s);
9153
9154     /* charnames doesn't work well if there have been errors found */
9155     if (PL_error_count > 0 && *key == 'c')
9156     {
9157         SvREFCNT_dec_NN(sv);
9158         return &PL_sv_undef;
9159     }
9160
9161     sv_2mortal(sv);                     /* Parent created it permanently */
9162     if (!table
9163         || ! (PL_hints & HINT_LOCALIZE_HH)
9164         || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9165         || ! SvOK(*cvp))
9166     {
9167         char *msg;
9168
9169         /* Here haven't found what we're looking for.  If it is charnames,
9170          * perhaps it needs to be loaded.  Try doing that before giving up */
9171         if (*key == 'c') {
9172             Perl_load_module(aTHX_
9173                             0,
9174                             newSVpvs("_charnames"),
9175                              /* version parameter; no need to specify it, as if
9176                               * we get too early a version, will fail anyway,
9177                               * not being able to find '_charnames' */
9178                             NULL,
9179                             newSVpvs(":full"),
9180                             newSVpvs(":short"),
9181                             NULL);
9182             assert(sp == PL_stack_sp);
9183             table = GvHV(PL_hintgv);
9184             if (table
9185                 && (PL_hints & HINT_LOCALIZE_HH)
9186                 && (cvp = hv_fetch(table, key, keylen, FALSE))
9187                 && SvOK(*cvp))
9188             {
9189                 goto now_ok;
9190             }
9191         }
9192         if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9193             msg = Perl_form(aTHX_
9194                                "Constant(%.*s) unknown",
9195                                 (int)(type ? typelen : len),
9196                                 (type ? type: s));
9197         }
9198         else {
9199             why1 = "$^H{";
9200             why2 = key;
9201             why3 = "} is not defined";
9202         report:
9203             if (*key == 'c') {
9204                 msg = Perl_form(aTHX_
9205                             /* The +3 is for '\N{'; -4 for that, plus '}' */
9206                             "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9207                       );
9208             }
9209             else {
9210                 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9211                                     (int)(type ? typelen : len),
9212                                     (type ? type: s), why1, why2, why3);
9213             }
9214         }
9215         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9216         return SvREFCNT_inc_simple_NN(sv);
9217     }
9218   now_ok:
9219     cv = *cvp;
9220     if (!pv && s)
9221         pv = newSVpvn_flags(s, len, SVs_TEMP);
9222     if (type && pv)
9223         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9224     else
9225         typesv = &PL_sv_undef;
9226
9227     PUSHSTACKi(PERLSI_OVERLOAD);
9228     ENTER ;
9229     SAVETMPS;
9230
9231     PUSHMARK(SP) ;
9232     EXTEND(sp, 3);
9233     if (pv)
9234         PUSHs(pv);
9235     PUSHs(sv);
9236     if (pv)
9237         PUSHs(typesv);
9238     PUTBACK;
9239     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9240
9241     SPAGAIN ;
9242
9243     /* Check the eval first */
9244     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9245         STRLEN errlen;
9246         const char * errstr;
9247         sv_catpvs(errsv, "Propagated");
9248         errstr = SvPV_const(errsv, errlen);
9249         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9250         (void)POPs;
9251         res = SvREFCNT_inc_simple_NN(sv);
9252     }
9253     else {
9254         res = POPs;
9255         SvREFCNT_inc_simple_void_NN(res);
9256     }
9257
9258     PUTBACK ;
9259     FREETMPS ;
9260     LEAVE ;
9261     POPSTACK;
9262
9263     if (!SvOK(res)) {
9264         why1 = "Call to &{$^H{";
9265         why2 = key;
9266         why3 = "}} did not return a defined value";
9267         sv = res;
9268         (void)sv_2mortal(sv);
9269         goto report;
9270     }
9271
9272     return res;
9273 }
9274
9275 PERL_STATIC_INLINE void
9276 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9277                     bool is_utf8, bool check_dollar, bool tick_warn)
9278 {
9279     int saw_tick = 0;
9280     const char *olds = *s;
9281     PERL_ARGS_ASSERT_PARSE_IDENT;
9282
9283     while (*s < PL_bufend) {
9284         if (*d >= e)
9285             Perl_croak(aTHX_ "%s", ident_too_long);
9286         if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9287              /* The UTF-8 case must come first, otherwise things
9288              * like c\N{COMBINING TILDE} would start failing, as the
9289              * isWORDCHAR_A case below would gobble the 'c' up.
9290              */
9291
9292             char *t = *s + UTF8SKIP(*s);
9293             while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
9294                 t += UTF8SKIP(t);
9295             }
9296             if (*d + (t - *s) > e)
9297                 Perl_croak(aTHX_ "%s", ident_too_long);
9298             Copy(*s, *d, t - *s, char);
9299             *d += t - *s;
9300             *s = t;
9301         }
9302         else if ( isWORDCHAR_A(**s) ) {
9303             do {
9304                 *(*d)++ = *(*s)++;
9305             } while (isWORDCHAR_A(**s) && *d < e);
9306         }
9307         else if (   allow_package
9308                  && **s == '\''
9309                  && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
9310         {
9311             *(*d)++ = ':';
9312             *(*d)++ = ':';
9313             (*s)++;
9314             saw_tick++;
9315         }
9316         else if (allow_package && **s == ':' && (*s)[1] == ':'
9317            /* Disallow things like Foo::$bar. For the curious, this is
9318             * the code path that triggers the "Bad name after" warning
9319             * when looking for barewords.
9320             */
9321            && !(check_dollar && (*s)[2] == '$')) {
9322             *(*d)++ = *(*s)++;
9323             *(*d)++ = *(*s)++;
9324         }
9325         else
9326             break;
9327     }
9328     if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
9329               && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
9330         char *d;
9331         char *d2;
9332         Newx(d, *s - olds + saw_tick + 2, char); /* +2 for $# */
9333         d2 = d;
9334         SAVEFREEPV(d);
9335         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9336                          "Old package separator used in string");
9337         if (olds[-1] == '#')
9338             *d2++ = olds[-2];
9339         *d2++ = olds[-1];
9340         while (olds < *s) {
9341             if (*olds == '\'') {
9342                 *d2++ = '\\';
9343                 *d2++ = *olds++;
9344             }
9345             else
9346                 *d2++ = *olds++;
9347         }
9348         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9349                          "\t(Did you mean \"%" UTF8f "\" instead?)\n",
9350                           UTF8fARG(is_utf8, d2-d, d));
9351     }
9352     return;
9353 }
9354
9355 /* Returns a NUL terminated string, with the length of the string written to
9356    *slp
9357    */
9358 STATIC char *
9359 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9360 {
9361     char *d = dest;
9362     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
9363     bool is_utf8 = cBOOL(UTF);
9364
9365     PERL_ARGS_ASSERT_SCAN_WORD;
9366
9367     parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
9368     *d = '\0';
9369     *slp = d - dest;
9370     return s;
9371 }
9372
9373 /* Is the byte 'd' a legal single character identifier name?  'u' is true
9374  * iff Unicode semantics are to be used.  The legal ones are any of:
9375  *  a) all ASCII characters except:
9376  *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
9377  *          2) '{'
9378  *     The final case currently doesn't get this far in the program, so we
9379  *     don't test for it.  If that were to change, it would be ok to allow it.
9380  *  b) When not under Unicode rules, any upper Latin1 character
9381  *  c) Otherwise, when unicode rules are used, all XIDS characters.
9382  *
9383  *      Because all ASCII characters have the same representation whether
9384  *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
9385  *      '{' without knowing if is UTF-8 or not. */
9386 #define VALID_LEN_ONE_IDENT(s, e, is_utf8)                                  \
9387     (isGRAPH_A(*(s)) || ((is_utf8)                                          \
9388                          ? isIDFIRST_utf8_safe(s, e)                        \
9389                          : (isGRAPH_L1(*s)                                  \
9390                             && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
9391
9392 STATIC char *
9393 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9394 {
9395     I32 herelines = PL_parser->herelines;
9396     SSize_t bracket = -1;
9397     char funny = *s++;
9398     char *d = dest;
9399     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
9400     bool is_utf8 = cBOOL(UTF);
9401     I32 orig_copline = 0, tmp_copline = 0;
9402
9403     PERL_ARGS_ASSERT_SCAN_IDENT;
9404
9405     if (isSPACE(*s) || !*s)
9406         s = skipspace(s);
9407     if (isDIGIT(*s)) {
9408         while (isDIGIT(*s)) {
9409             if (d >= e)
9410                 Perl_croak(aTHX_ "%s", ident_too_long);
9411             *d++ = *s++;
9412         }
9413     }
9414     else {  /* See if it is a "normal" identifier */
9415         parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
9416     }
9417     *d = '\0';
9418     d = dest;
9419     if (*d) {
9420         /* Either a digit variable, or parse_ident() found an identifier
9421            (anything valid as a bareword), so job done and return.  */
9422         if (PL_lex_state != LEX_NORMAL)
9423             PL_lex_state = LEX_INTERPENDMAYBE;
9424         return s;
9425     }
9426
9427     /* Here, it is not a run-of-the-mill identifier name */
9428
9429     if (*s == '$' && s[1]
9430         && (   isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
9431             || isDIGIT_A((U8)s[1])
9432             || s[1] == '$'
9433             || s[1] == '{'
9434             || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
9435     {
9436         /* Dereferencing a value in a scalar variable.
9437            The alternatives are different syntaxes for a scalar variable.
9438            Using ' as a leading package separator isn't allowed. :: is.   */
9439         return s;
9440     }
9441     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
9442     if (*s == '{') {
9443         bracket = s - SvPVX(PL_linestr);
9444         s++;
9445         orig_copline = CopLINE(PL_curcop);
9446         if (s < PL_bufend && isSPACE(*s)) {
9447             s = skipspace(s);
9448         }
9449     }
9450     if ((s <= PL_bufend - (is_utf8)
9451                           ? UTF8SKIP(s)
9452                           : 1)
9453         && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
9454     {
9455         if (is_utf8) {
9456             const STRLEN skip = UTF8SKIP(s);
9457             STRLEN i;
9458             d[skip] = '\0';
9459             for ( i = 0; i < skip; i++ )
9460                 d[i] = *s++;
9461         }
9462         else {
9463             *d = *s++;
9464             d[1] = '\0';
9465         }
9466     }
9467     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9468     if (*d == '^' && *s && isCONTROLVAR(*s)) {
9469         *d = toCTRL(*s);
9470         s++;
9471     }
9472     /* Warn about ambiguous code after unary operators if {...} notation isn't
9473        used.  There's no difference in ambiguity; it's merely a heuristic
9474        about when not to warn.  */
9475     else if (ck_uni && bracket == -1)
9476         check_uni();
9477     if (bracket != -1) {
9478         bool skip;
9479         char *s2;
9480         /* If we were processing {...} notation then...  */
9481         if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
9482             || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9483                  && isWORDCHAR(*s))
9484         ) {
9485             /* note we have to check for a normal identifier first,
9486              * as it handles utf8 symbols, and only after that has
9487              * been ruled out can we look at the caret words */
9488             if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
9489                 /* if it starts as a valid identifier, assume that it is one.
9490                    (the later check for } being at the expected point will trap
9491                    cases where this doesn't pan out.)  */
9492                 d += is_utf8 ? UTF8SKIP(d) : 1;
9493                 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
9494                 *d = '\0';
9495             }
9496             else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
9497                 d++;
9498                 while (isWORDCHAR(*s) && d < e) {
9499                     *d++ = *s++;
9500                 }
9501                 if (d >= e)
9502                     Perl_croak(aTHX_ "%s", ident_too_long);
9503                 *d = '\0';
9504             }
9505             tmp_copline = CopLINE(PL_curcop);
9506             if (s < PL_bufend && isSPACE(*s)) {
9507                 s = skipspace(s);
9508             }
9509             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9510                 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation.  */
9511                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9512                     const char * const brack =
9513                         (const char *)
9514                         ((*s == '[') ? "[...]" : "{...}");
9515                     orig_copline = CopLINE(PL_curcop);
9516                     CopLINE_set(PL_curcop, tmp_copline);
9517    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9518                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9519                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9520                         funny, dest, brack, funny, dest, brack);
9521                     CopLINE_set(PL_curcop, orig_copline);
9522                 }
9523                 bracket++;
9524                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9525                 PL_lex_allbrackets++;
9526                 return s;
9527             }
9528         }
9529
9530         if ( !tmp_copline )
9531             tmp_copline = CopLINE(PL_curcop);
9532         if ((skip = s < PL_bufend && isSPACE(*s))) {
9533             /* Avoid incrementing line numbers or resetting PL_linestart,
9534                in case we have to back up.  */
9535             STRLEN s_off = s - SvPVX(PL_linestr);
9536             s2 = peekspace(s);
9537             s = SvPVX(PL_linestr) + s_off;
9538         }
9539         else
9540             s2 = s;
9541
9542         /* Expect to find a closing } after consuming any trailing whitespace.
9543          */
9544         if (*s2 == '}') {
9545             /* Now increment line numbers if applicable.  */
9546             if (skip)
9547                 s = skipspace(s);
9548             s++;
9549             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9550                 PL_lex_state = LEX_INTERPEND;
9551                 PL_expect = XREF;
9552             }
9553             if (PL_lex_state == LEX_NORMAL) {
9554                 if (ckWARN(WARN_AMBIGUOUS)
9555                     && (keyword(dest, d - dest, 0)
9556                         || get_cvn_flags(dest, d - dest, is_utf8
9557                            ? SVf_UTF8
9558                            : 0)))
9559                 {
9560                     SV *tmp = newSVpvn_flags( dest, d - dest,
9561                                         SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9562                     if (funny == '#')
9563                         funny = '@';
9564                     orig_copline = CopLINE(PL_curcop);
9565                     CopLINE_set(PL_curcop, tmp_copline);
9566                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9567                         "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
9568                         funny, SVfARG(tmp), funny, SVfARG(tmp));
9569                     CopLINE_set(PL_curcop, orig_copline);
9570                 }
9571             }
9572         }
9573         else {
9574             /* Didn't find the closing } at the point we expected, so restore
9575                state such that the next thing to process is the opening { and */
9576             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9577             CopLINE_set(PL_curcop, orig_copline);
9578             PL_parser->herelines = herelines;
9579             *dest = '\0';
9580         }
9581     }
9582     else if (   PL_lex_state == LEX_INTERPNORMAL
9583              && !PL_lex_brackets
9584              && !intuit_more(s, PL_bufend))
9585         PL_lex_state = LEX_INTERPEND;
9586     return s;
9587 }
9588
9589 static bool
9590 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
9591
9592     /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
9593      * found in the parse starting at 's', based on the subset that are valid
9594      * in this context input to this routine in 'valid_flags'. Advances s.
9595      * Returns TRUE if the input should be treated as a valid flag, so the next
9596      * char may be as well; otherwise FALSE. 'charset' should point to a NUL
9597      * upon first call on the current regex.  This routine will set it to any
9598      * charset modifier found.  The caller shouldn't change it.  This way,
9599      * another charset modifier encountered in the parse can be detected as an
9600      * error, as we have decided to allow only one */
9601
9602     const char c = **s;
9603     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9604
9605     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9606         if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
9607             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9608                        UTF ? SVf_UTF8 : 0);
9609             (*s) += charlen;
9610             /* Pretend that it worked, so will continue processing before
9611              * dieing */
9612             return TRUE;
9613         }
9614         return FALSE;
9615     }
9616
9617     switch (c) {
9618
9619         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
9620         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
9621         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
9622         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
9623         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
9624         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9625         case LOCALE_PAT_MOD:
9626             if (*charset) {
9627                 goto multiple_charsets;
9628             }
9629             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9630             *charset = c;
9631             break;
9632         case UNICODE_PAT_MOD:
9633             if (*charset) {
9634                 goto multiple_charsets;
9635             }
9636             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9637             *charset = c;
9638             break;
9639         case ASCII_RESTRICT_PAT_MOD:
9640             if (! *charset) {
9641                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9642             }
9643             else {
9644
9645                 /* Error if previous modifier wasn't an 'a', but if it was, see
9646                  * if, and accept, a second occurrence (only) */
9647                 if (*charset != 'a'
9648                     || get_regex_charset(*pmfl)
9649                         != REGEX_ASCII_RESTRICTED_CHARSET)
9650                 {
9651                         goto multiple_charsets;
9652                 }
9653                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9654             }
9655             *charset = c;
9656             break;
9657         case DEPENDS_PAT_MOD:
9658             if (*charset) {
9659                 goto multiple_charsets;
9660             }
9661             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9662             *charset = c;
9663             break;
9664     }
9665
9666     (*s)++;
9667     return TRUE;
9668
9669     multiple_charsets:
9670         if (*charset != c) {
9671             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9672         }
9673         else if (c == 'a') {
9674   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9675             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9676         }
9677         else {
9678             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9679         }
9680
9681         /* Pretend that it worked, so will continue processing before dieing */
9682         (*s)++;
9683         return TRUE;
9684 }
9685
9686 STATIC char *
9687 S_scan_pat(pTHX_ char *start, I32 type)
9688 {
9689     PMOP *pm;
9690     char *s;
9691     const char * const valid_flags =
9692         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9693     char charset = '\0';    /* character set modifier */
9694     unsigned int x_mod_count = 0;
9695
9696     PERL_ARGS_ASSERT_SCAN_PAT;
9697
9698     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
9699     if (!s)
9700         Perl_croak(aTHX_ "Search pattern not terminated");
9701
9702     pm = (PMOP*)newPMOP(type, 0);
9703     if (PL_multi_open == '?') {
9704         /* This is the only point in the code that sets PMf_ONCE:  */
9705         pm->op_pmflags |= PMf_ONCE;
9706
9707         /* Hence it's safe to do this bit of PMOP book-keeping here, which
9708            allows us to restrict the list needed by reset to just the ??
9709            matches.  */
9710         assert(type != OP_TRANS);
9711         if (PL_curstash) {
9712             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9713             U32 elements;
9714             if (!mg) {
9715                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9716                                  0);
9717             }
9718             elements = mg->mg_len / sizeof(PMOP**);
9719             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9720             ((PMOP**)mg->mg_ptr) [elements++] = pm;
9721             mg->mg_len = elements * sizeof(PMOP**);
9722             PmopSTASH_set(pm,PL_curstash);
9723         }
9724     }
9725
9726     /* if qr/...(?{..}).../, then need to parse the pattern within a new
9727      * anon CV. False positives like qr/[(?{]/ are harmless */
9728
9729     if (type == OP_QR) {
9730         STRLEN len;
9731         char *e, *p = SvPV(PL_lex_stuff, len);
9732         e = p + len;
9733         for (; p < e; p++) {
9734             if (p[0] == '(' && p[1] == '?'
9735                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9736             {
9737                 pm->op_pmflags |= PMf_HAS_CV;
9738                 break;
9739             }
9740         }
9741         pm->op_pmflags |= PMf_IS_QR;
9742     }
9743
9744     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
9745                                 &s, &charset, &x_mod_count))
9746     {};
9747     /* issue a warning if /c is specified,but /g is not */
9748     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9749     {
9750         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9751                        "Use of /c modifier is meaningless without /g" );
9752     }
9753
9754     PL_lex_op = (OP*)pm;
9755     pl_yylval.ival = OP_MATCH;
9756     return s;
9757 }
9758
9759 STATIC char *
9760 S_scan_subst(pTHX_ char *start)
9761 {
9762     char *s;
9763     PMOP *pm;
9764     I32 first_start;
9765     line_t first_line;
9766     line_t linediff = 0;
9767     I32 es = 0;
9768     char charset = '\0';    /* character set modifier */
9769     unsigned int x_mod_count = 0;
9770     char *t;
9771
9772     PERL_ARGS_ASSERT_SCAN_SUBST;
9773
9774     pl_yylval.ival = OP_NULL;
9775
9776     s = scan_str(start, TRUE, FALSE, FALSE, &t);
9777
9778     if (!s)
9779         Perl_croak(aTHX_ "Substitution pattern not terminated");
9780
9781     s = t;
9782
9783     first_start = PL_multi_start;
9784     first_line = CopLINE(PL_curcop);
9785     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9786     if (!s) {
9787         SvREFCNT_dec_NN(PL_lex_stuff);
9788         PL_lex_stuff = NULL;
9789         Perl_croak(aTHX_ "Substitution replacement not terminated");
9790     }
9791     PL_multi_start = first_start;       /* so whole substitution is taken together */
9792
9793     pm = (PMOP*)newPMOP(OP_SUBST, 0);
9794
9795
9796     while (*s) {
9797         if (*s == EXEC_PAT_MOD) {
9798             s++;
9799             es++;
9800         }
9801         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
9802                                   &s, &charset, &x_mod_count))
9803         {
9804             break;
9805         }
9806     }
9807
9808     if ((pm->op_pmflags & PMf_CONTINUE)) {
9809         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9810     }
9811
9812     if (es) {
9813         SV * const repl = newSVpvs("");
9814
9815         PL_multi_end = 0;
9816         pm->op_pmflags |= PMf_EVAL;
9817         for (; es > 1; es--) {
9818             sv_catpvs(repl, "eval ");
9819         }
9820         sv_catpvs(repl, "do {");
9821         sv_catsv(repl, PL_parser->lex_sub_repl);
9822         sv_catpvs(repl, "}");
9823         SvREFCNT_dec(PL_parser->lex_sub_repl);
9824         PL_parser->lex_sub_repl = repl;
9825     }
9826
9827
9828     linediff = CopLINE(PL_curcop) - first_line;
9829     if (linediff)
9830         CopLINE_set(PL_curcop, first_line);
9831
9832     if (linediff || es) {
9833         /* the IVX field indicates that the replacement string is a s///e;
9834          * the NVX field indicates how many src code lines the replacement
9835          * spreads over */
9836         sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
9837         ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = 0;
9838         ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
9839                                                                     cBOOL(es);
9840     }
9841
9842     PL_lex_op = (OP*)pm;
9843     pl_yylval.ival = OP_SUBST;
9844     return s;
9845 }
9846
9847 STATIC char *
9848 S_scan_trans(pTHX_ char *start)
9849 {
9850     char* s;
9851     OP *o;
9852     U8 squash;
9853     U8 del;
9854     U8 complement;
9855     bool nondestruct = 0;
9856     char *t;
9857
9858     PERL_ARGS_ASSERT_SCAN_TRANS;
9859
9860     pl_yylval.ival = OP_NULL;
9861
9862     s = scan_str(start,FALSE,FALSE,FALSE,&t);
9863     if (!s)
9864         Perl_croak(aTHX_ "Transliteration pattern not terminated");
9865
9866     s = t;
9867
9868     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9869     if (!s) {
9870         SvREFCNT_dec_NN(PL_lex_stuff);
9871         PL_lex_stuff = NULL;
9872         Perl_croak(aTHX_ "Transliteration replacement not terminated");
9873     }
9874
9875     complement = del = squash = 0;
9876     while (1) {
9877         switch (*s) {
9878         case 'c':
9879             complement = OPpTRANS_COMPLEMENT;
9880             break;
9881         case 'd':
9882             del = OPpTRANS_DELETE;
9883             break;
9884         case 's':
9885             squash = OPpTRANS_SQUASH;
9886             break;
9887         case 'r':
9888             nondestruct = 1;
9889             break;
9890         default:
9891             goto no_more;
9892         }
9893         s++;
9894     }
9895   no_more:
9896
9897     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9898     o->op_private &= ~OPpTRANS_ALL;
9899     o->op_private |= del|squash|complement|
9900       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9901       (DO_UTF8(PL_parser->lex_sub_repl) ? OPpTRANS_TO_UTF   : 0);
9902
9903     PL_lex_op = o;
9904     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9905
9906
9907     return s;
9908 }
9909
9910 /* scan_heredoc
9911    Takes a pointer to the first < in <<FOO.
9912    Returns a pointer to the byte following <<FOO.
9913
9914    This function scans a heredoc, which involves different methods
9915    depending on whether we are in a string eval, quoted construct, etc.
9916    This is because PL_linestr could containing a single line of input, or
9917    a whole string being evalled, or the contents of the current quote-
9918    like operator.
9919
9920    The two basic methods are:
9921     - Steal lines from the input stream
9922     - Scan the heredoc in PL_linestr and remove it therefrom
9923
9924    In a file scope or filtered eval, the first method is used; in a
9925    string eval, the second.
9926
9927    In a quote-like operator, we have to choose between the two,
9928    depending on where we can find a newline.  We peek into outer lex-
9929    ing scopes until we find one with a newline in it.  If we reach the
9930    outermost lexing scope and it is a file, we use the stream method.
9931    Otherwise it is treated as an eval.
9932 */
9933
9934 STATIC char *
9935 S_scan_heredoc(pTHX_ char *s)
9936 {
9937     I32 op_type = OP_SCALAR;
9938     I32 len;
9939     SV *tmpstr;
9940     char term;
9941     char *d;
9942     char *e;
9943     char *peek;
9944     char *indent = 0;
9945     I32 indent_len = 0;
9946     bool indented = FALSE;
9947     const bool infile = PL_rsfp || PL_parser->filtered;
9948     const line_t origline = CopLINE(PL_curcop);
9949     LEXSHARED *shared = PL_parser->lex_shared;
9950
9951     PERL_ARGS_ASSERT_SCAN_HEREDOC;
9952
9953     s += 2;
9954     d = PL_tokenbuf + 1;
9955     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9956     *PL_tokenbuf = '\n';
9957     peek = s;
9958     if (*peek == '~') {
9959         indented = TRUE;
9960         peek++; s++;
9961     }
9962     while (SPACE_OR_TAB(*peek))
9963         peek++;
9964     if (*peek == '`' || *peek == '\'' || *peek =='"') {
9965         s = peek;
9966         term = *s++;
9967         s = delimcpy(d, e, s, PL_bufend, term, &len);
9968         if (s == PL_bufend)
9969             Perl_croak(aTHX_ "Unterminated delimiter for here document");
9970         d += len;
9971         s++;
9972     }
9973     else {
9974         if (*s == '\\')
9975             /* <<\FOO is equivalent to <<'FOO' */
9976             s++, term = '\'';
9977         else
9978             term = '"';
9979         if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9980             Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
9981         peek = s;
9982         while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
9983             peek += UTF ? UTF8SKIP(peek) : 1;
9984         }
9985         len = (peek - s >= e - d) ? (e - d) : (peek - s);
9986         Copy(s, d, len, char);
9987         s += len;
9988         d += len;
9989     }
9990     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9991         Perl_croak(aTHX_ "Delimiter for here document is too long");
9992     *d++ = '\n';
9993     *d = '\0';
9994     len = d - PL_tokenbuf;
9995
9996 #ifndef PERL_STRICT_CR
9997     d = (char *) memchr(s, '\r', PL_bufend - s);
9998     if (d) {
9999         char * const olds = s;
10000         s = d;
10001         while (s < PL_bufend) {
10002             if (*s == '\r') {
10003                 *d++ = '\n';
10004                 if (*++s == '\n')
10005                     s++;
10006             }
10007             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
10008                 *d++ = *s++;
10009                 s++;
10010             }
10011             else
10012                 *d++ = *s++;
10013         }
10014         *d = '\0';
10015         PL_bufend = d;
10016         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10017         s = olds;
10018     }
10019 #endif
10020
10021     tmpstr = newSV_type(SVt_PVIV);
10022     SvGROW(tmpstr, 80);
10023     if (term == '\'') {
10024         op_type = OP_CONST;
10025         SvIV_set(tmpstr, -1);
10026     }
10027     else if (term == '`') {
10028         op_type = OP_BACKTICK;
10029         SvIV_set(tmpstr, '\\');
10030     }
10031
10032     PL_multi_start = origline + 1 + PL_parser->herelines;
10033     PL_multi_open = PL_multi_close = '<';
10034     /* inside a string eval or quote-like operator */
10035     if (!infile || PL_lex_inwhat) {
10036         SV *linestr;
10037         char *bufend;
10038         char * const olds = s;
10039         PERL_CONTEXT * const cx = CX_CUR();
10040         /* These two fields are not set until an inner lexing scope is
10041            entered.  But we need them set here. */
10042         shared->ls_bufptr  = s;
10043         shared->ls_linestr = PL_linestr;
10044         if (PL_lex_inwhat)
10045           /* Look for a newline.  If the current buffer does not have one,
10046              peek into the line buffer of the parent lexing scope, going
10047              up as many levels as necessary to find one with a newline
10048              after bufptr.
10049            */
10050           while (!(s = (char *)memchr(
10051                     (void *)shared->ls_bufptr, '\n',
10052                     SvEND(shared->ls_linestr)-shared->ls_bufptr
10053                 ))) {
10054             shared = shared->ls_prev;
10055             /* shared is only null if we have gone beyond the outermost
10056                lexing scope.  In a file, we will have broken out of the
10057                loop in the previous iteration.  In an eval, the string buf-
10058                fer ends with "\n;", so the while condition above will have
10059                evaluated to false.  So shared can never be null.  Or so you
10060                might think.  Odd syntax errors like s;@{<<; can gobble up
10061                the implicit semicolon at the end of a flie, causing the
10062                file handle to be closed even when we are not in a string
10063                eval.  So shared may be null in that case.
10064                (Closing '}' here to balance the earlier open brace for
10065                editors that look for matched pairs.) */
10066             if (UNLIKELY(!shared))
10067                 goto interminable;
10068             /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10069                most lexing scope.  In a file, shared->ls_linestr at that
10070                level is just one line, so there is no body to steal. */
10071             if (infile && !shared->ls_prev) {
10072                 s = olds;
10073                 goto streaming;
10074             }
10075           }
10076         else {  /* eval or we've already hit EOF */
10077             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10078             if (!s)
10079                 goto interminable;
10080         }
10081         linestr = shared->ls_linestr;
10082         bufend = SvEND(linestr);
10083         d = s;
10084         if (indented) {
10085             char *myolds = s;
10086
10087             while (s < bufend - len + 1) {
10088                 if (*s++ == '\n')
10089                     ++PL_parser->herelines;
10090
10091                 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
10092                     char *backup = s;
10093                     indent_len = 0;
10094
10095                     /* Only valid if it's preceded by whitespace only */
10096                     while (backup != myolds && --backup >= myolds) {
10097                         if (! SPACE_OR_TAB(*backup)) {
10098                             break;
10099                         }
10100
10101                         indent_len++;
10102                     }
10103
10104                     /* No whitespace or all! */
10105                     if (backup == s || *backup == '\n') {
10106                         Newx(indent, indent_len + 1, char);
10107                         memcpy(indent, backup + 1, indent_len);
10108                         indent[indent_len] = 0;
10109                         s--; /* before our delimiter */
10110                         PL_parser->herelines--; /* this line doesn't count */
10111                         break;
10112                     }
10113                 }
10114             }
10115         } else {
10116             while (s < bufend - len + 1
10117                    && memNE(s,PL_tokenbuf,len) )
10118             {
10119                 if (*s++ == '\n')
10120                     ++PL_parser->herelines;
10121             }
10122         }
10123
10124         if (s >= bufend - len + 1) {
10125             goto interminable;
10126         }
10127         sv_setpvn(tmpstr,d+1,s-d);
10128         s += len - 1;
10129         /* the preceding stmt passes a newline */
10130         PL_parser->herelines++;
10131
10132         /* s now points to the newline after the heredoc terminator.
10133            d points to the newline before the body of the heredoc.
10134          */
10135
10136         /* We are going to modify linestr in place here, so set
10137            aside copies of the string if necessary for re-evals or
10138            (caller $n)[6]. */
10139         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10140            check shared->re_eval_str. */
10141         if (shared->re_eval_start || shared->re_eval_str) {
10142             /* Set aside the rest of the regexp */
10143             if (!shared->re_eval_str)
10144                 shared->re_eval_str =
10145                        newSVpvn(shared->re_eval_start,
10146                                 bufend - shared->re_eval_start);
10147             shared->re_eval_start -= s-d;
10148         }
10149         if (cxstack_ix >= 0
10150             && CxTYPE(cx) == CXt_EVAL
10151             && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10152             && cx->blk_eval.cur_text == linestr)
10153         {
10154             cx->blk_eval.cur_text = newSVsv(linestr);
10155             cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10156         }
10157         /* Copy everything from s onwards back to d. */
10158         Move(s,d,bufend-s + 1,char);
10159         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10160         /* Setting PL_bufend only applies when we have not dug deeper
10161            into other scopes, because sublex_done sets PL_bufend to
10162            SvEND(PL_linestr). */
10163         if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10164         s = olds;
10165     }
10166     else
10167     {
10168       SV *linestr_save;
10169       char *oldbufptr_save;
10170       char *oldoldbufptr_save;
10171      streaming:
10172       SvPVCLEAR(tmpstr);   /* avoid "uninitialized" warning */
10173       term = PL_tokenbuf[1];
10174       len--;
10175       linestr_save = PL_linestr; /* must restore this afterwards */
10176       d = s;                     /* and this */
10177       oldbufptr_save = PL_oldbufptr;
10178       oldoldbufptr_save = PL_oldoldbufptr;
10179       PL_linestr = newSVpvs("");
10180       PL_bufend = SvPVX(PL_linestr);
10181       while (1) {
10182         PL_bufptr = PL_bufend;
10183         CopLINE_set(PL_curcop,
10184                     origline + 1 + PL_parser->herelines);
10185         if (!lex_next_chunk(LEX_NO_TERM)
10186          && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10187             /* Simply freeing linestr_save might seem simpler here, as it
10188                does not matter what PL_linestr points to, since we are
10189                about to croak; but in a quote-like op, linestr_save
10190                will have been prospectively freed already, via
10191                SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
10192                restore PL_linestr. */
10193             SvREFCNT_dec_NN(PL_linestr);
10194             PL_linestr = linestr_save;
10195             PL_oldbufptr = oldbufptr_save;
10196             PL_oldoldbufptr = oldoldbufptr_save;
10197             goto interminable;
10198         }
10199         CopLINE_set(PL_curcop, origline);
10200         if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10201             s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10202             /* ^That should be enough to avoid this needing to grow:  */
10203             sv_catpvs(PL_linestr, "\n\0");
10204             assert(s == SvPVX(PL_linestr));
10205             PL_bufend = SvEND(PL_linestr);
10206         }
10207         s = PL_bufptr;
10208         PL_parser->herelines++;
10209         PL_last_lop = PL_last_uni = NULL;
10210 #ifndef PERL_STRICT_CR
10211         if (PL_bufend - PL_linestart >= 2) {
10212             if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10213                 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10214             {
10215                 PL_bufend[-2] = '\n';
10216                 PL_bufend--;
10217                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10218             }
10219             else if (PL_bufend[-1] == '\r')
10220                 PL_bufend[-1] = '\n';
10221         }
10222         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10223             PL_bufend[-1] = '\n';
10224 #endif
10225         if (indented && (PL_bufend-s) >= len) {
10226             char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
10227
10228             if (found) {
10229                 char *backup = found;
10230                 indent_len = 0;
10231
10232                 /* Only valid if it's preceded by whitespace only */
10233                 while (backup != s && --backup >= s) {
10234                     if (! SPACE_OR_TAB(*backup)) {
10235                         break;
10236                     }
10237                     indent_len++;
10238                 }
10239
10240                 /* All whitespace or none! */
10241                 if (backup == found || SPACE_OR_TAB(*backup)) {
10242                     Newx(indent, indent_len + 1, char);
10243                     memcpy(indent, backup, indent_len);
10244                     indent[indent_len] = 0;
10245                     SvREFCNT_dec(PL_linestr);
10246                     PL_linestr = linestr_save;
10247                     PL_linestart = SvPVX(linestr_save);
10248                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10249                     PL_oldbufptr = oldbufptr_save;
10250                     PL_oldoldbufptr = oldoldbufptr_save;
10251                     s = d;
10252                     break;
10253                 }
10254             }
10255
10256             /* Didn't find it */
10257             sv_catsv(tmpstr,PL_linestr);
10258         } else {
10259             if (*s == term && PL_bufend-s >= len
10260                 && memEQ(s,PL_tokenbuf + 1,len))
10261             {
10262                 SvREFCNT_dec(PL_linestr);
10263                 PL_linestr = linestr_save;
10264                 PL_linestart = SvPVX(linestr_save);
10265                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10266                 PL_oldbufptr = oldbufptr_save;
10267                 PL_oldoldbufptr = oldoldbufptr_save;
10268                 s = d;
10269                 break;
10270             } else {
10271                 sv_catsv(tmpstr,PL_linestr);
10272             }
10273         }
10274       }
10275     }
10276     PL_multi_end = origline + PL_parser->herelines;
10277     if (indented && indent) {
10278         STRLEN linecount = 1;
10279         STRLEN herelen = SvCUR(tmpstr);
10280         char *ss = SvPVX(tmpstr);
10281         char *se = ss + herelen;
10282         SV *newstr = newSV(herelen+1);
10283         SvPOK_on(newstr);
10284
10285         /* Trim leading whitespace */
10286         while (ss < se) {
10287             /* newline only? Copy and move on */
10288             if (*ss == '\n') {
10289                 sv_catpv(newstr,"\n");
10290                 ss++;
10291                 linecount++;
10292
10293             /* Found our indentation? Strip it */
10294             } else if (se - ss >= indent_len
10295                        && memEQ(ss, indent, indent_len))
10296             {
10297                 STRLEN le = 0;
10298
10299                 ss += indent_len;
10300
10301                 while ((ss + le) < se && *(ss + le) != '\n')
10302                     le++;
10303
10304                 sv_catpvn(newstr, ss, le);
10305
10306                 ss += le;
10307
10308             /* Line doesn't begin with our indentation? Croak */
10309             } else {
10310                 Perl_croak(aTHX_
10311                     "Indentation on line %d of here-doc doesn't match delimiter",
10312                     (int)linecount
10313                 );
10314             }
10315         }
10316         /* avoid sv_setsv() as we dont wan't to COW here */
10317         sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
10318         Safefree(indent);
10319         SvREFCNT_dec_NN(newstr);
10320     }
10321     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10322         SvPV_shrink_to_cur(tmpstr);
10323     }
10324     if (!IN_BYTES) {
10325         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10326             SvUTF8_on(tmpstr);
10327     }
10328     PL_lex_stuff = tmpstr;
10329     pl_yylval.ival = op_type;
10330     return s;
10331
10332   interminable:
10333     SvREFCNT_dec(tmpstr);
10334     CopLINE_set(PL_curcop, origline);
10335     missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
10336 }
10337
10338 /* scan_inputsymbol
10339    takes: position of first '<' in input buffer
10340    returns: position of first char following the matching '>' in
10341             input buffer
10342    side-effects: pl_yylval and lex_op are set.
10343
10344    This code handles:
10345
10346    <>           read from ARGV
10347    <<>>         read from ARGV without magic open
10348    <FH>         read from filehandle
10349    <pkg::FH>    read from package qualified filehandle
10350    <pkg'FH>     read from package qualified filehandle
10351    <$fh>        read from filehandle in $fh
10352    <*.h>        filename glob
10353
10354 */
10355
10356 STATIC char *
10357 S_scan_inputsymbol(pTHX_ char *start)
10358 {
10359     char *s = start;            /* current position in buffer */
10360     char *end;
10361     I32 len;
10362     bool nomagicopen = FALSE;
10363     char *d = PL_tokenbuf;                                      /* start of temp holding space */
10364     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
10365
10366     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10367
10368     end = (char *) memchr(s, '\n', PL_bufend - s);
10369     if (!end)
10370         end = PL_bufend;
10371     if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
10372         nomagicopen = TRUE;
10373         *d = '\0';
10374         len = 0;
10375         s += 3;
10376     }
10377     else
10378         s = delimcpy(d, e, s + 1, end, '>', &len);      /* extract until > */
10379
10380     /* die if we didn't have space for the contents of the <>,
10381        or if it didn't end, or if we see a newline
10382     */
10383
10384     if (len >= (I32)sizeof PL_tokenbuf)
10385         Perl_croak(aTHX_ "Excessively long <> operator");
10386     if (s >= end)
10387         Perl_croak(aTHX_ "Unterminated <> operator");
10388
10389     s++;
10390
10391     /* check for <$fh>
10392        Remember, only scalar variables are interpreted as filehandles by
10393        this code.  Anything more complex (e.g., <$fh{$num}>) will be
10394        treated as a glob() call.
10395        This code makes use of the fact that except for the $ at the front,
10396        a scalar variable and a filehandle look the same.
10397     */
10398     if (*d == '$' && d[1]) d++;
10399
10400     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10401     while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
10402         d += UTF ? UTF8SKIP(d) : 1;
10403     }
10404
10405     /* If we've tried to read what we allow filehandles to look like, and
10406        there's still text left, then it must be a glob() and not a getline.
10407        Use scan_str to pull out the stuff between the <> and treat it
10408        as nothing more than a string.
10409     */
10410
10411     if (d - PL_tokenbuf != len) {
10412         pl_yylval.ival = OP_GLOB;
10413         s = scan_str(start,FALSE,FALSE,FALSE,NULL);
10414         if (!s)
10415            Perl_croak(aTHX_ "Glob not terminated");
10416         return s;
10417     }
10418     else {
10419         bool readline_overriden = FALSE;
10420         GV *gv_readline;
10421         /* we're in a filehandle read situation */
10422         d = PL_tokenbuf;
10423
10424         /* turn <> into <ARGV> */
10425         if (!len)
10426             Copy("ARGV",d,5,char);
10427
10428         /* Check whether readline() is overriden */
10429         if ((gv_readline = gv_override("readline",8)))
10430             readline_overriden = TRUE;
10431
10432         /* if <$fh>, create the ops to turn the variable into a
10433            filehandle
10434         */
10435         if (*d == '$') {
10436             /* try to find it in the pad for this block, otherwise find
10437                add symbol table ops
10438             */
10439             const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
10440             if (tmp != NOT_IN_PAD) {
10441                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10442                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10443                     HEK * const stashname = HvNAME_HEK(stash);
10444                     SV * const sym = sv_2mortal(newSVhek(stashname));
10445                     sv_catpvs(sym, "::");
10446                     sv_catpv(sym, d+1);
10447                     d = SvPVX(sym);
10448                     goto intro_sym;
10449                 }
10450                 else {
10451                     OP * const o = newOP(OP_PADSV, 0);
10452                     o->op_targ = tmp;
10453                     PL_lex_op = readline_overriden
10454                         ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10455                                 op_append_elem(OP_LIST, o,
10456                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10457                         : newUNOP(OP_READLINE, 0, o);
10458                 }
10459             }
10460             else {
10461                 GV *gv;
10462                 ++d;
10463               intro_sym:
10464                 gv = gv_fetchpv(d,
10465                                 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
10466                                 SVt_PV);
10467                 PL_lex_op = readline_overriden
10468                     ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10469                             op_append_elem(OP_LIST,
10470                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10471                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10472                     : newUNOP(OP_READLINE, 0,
10473                             newUNOP(OP_RV2SV, 0,
10474                                 newGVOP(OP_GV, 0, gv)));
10475             }
10476             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10477             pl_yylval.ival = OP_NULL;
10478         }
10479
10480         /* If it's none of the above, it must be a literal filehandle
10481            (<Foo::BAR> or <FOO>) so build a simple readline OP */
10482         else {
10483             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10484             PL_lex_op = readline_overriden
10485                 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10486                         op_append_elem(OP_LIST,
10487                             newGVOP(OP_GV, 0, gv),
10488                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10489                 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
10490             pl_yylval.ival = OP_NULL;
10491         }
10492     }
10493
10494     return s;
10495 }
10496
10497
10498 /* scan_str
10499    takes:
10500         start                   position in buffer
10501         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
10502                                 only if they are of the open/close form
10503         keep_delims             preserve the delimiters around the string
10504         re_reparse              compiling a run-time /(?{})/:
10505                                    collapse // to /,  and skip encoding src
10506         delimp                  if non-null, this is set to the position of
10507                                 the closing delimiter, or just after it if
10508                                 the closing and opening delimiters differ
10509                                 (i.e., the opening delimiter of a substitu-
10510                                 tion replacement)
10511    returns: position to continue reading from buffer
10512    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10513         updates the read buffer.
10514
10515    This subroutine pulls a string out of the input.  It is called for:
10516         q               single quotes           q(literal text)
10517         '               single quotes           'literal text'
10518         qq              double quotes           qq(interpolate $here please)
10519         "               double quotes           "interpolate $here please"
10520         qx              backticks               qx(/bin/ls -l)
10521         `               backticks               `/bin/ls -l`
10522         qw              quote words             @EXPORT_OK = qw( func() $spam )
10523         m//             regexp match            m/this/
10524         s///            regexp substitute       s/this/that/
10525         tr///           string transliterate    tr/this/that/
10526         y///            string transliterate    y/this/that/
10527         ($*@)           sub prototypes          sub foo ($)
10528         (stuff)         sub attr parameters     sub foo : attr(stuff)
10529         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
10530
10531    In most of these cases (all but <>, patterns and transliterate)
10532    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
10533    calls scan_str().  s/// makes yylex() call scan_subst() which calls
10534    scan_str().  tr/// and y/// make yylex() call scan_trans() which
10535    calls scan_str().
10536
10537    It skips whitespace before the string starts, and treats the first
10538    character as the delimiter.  If the delimiter is one of ([{< then
10539    the corresponding "close" character )]}> is used as the closing
10540    delimiter.  It allows quoting of delimiters, and if the string has
10541    balanced delimiters ([{<>}]) it allows nesting.
10542
10543    On success, the SV with the resulting string is put into lex_stuff or,
10544    if that is already non-NULL, into lex_repl. The second case occurs only
10545    when parsing the RHS of the special constructs s/// and tr/// (y///).
10546    For convenience, the terminating delimiter character is stuffed into
10547    SvIVX of the SV.
10548 */
10549
10550 STATIC char *
10551 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
10552                  char **delimp
10553     )
10554 {
10555     SV *sv;                     /* scalar value: string */
10556     const char *tmps;           /* temp string, used for delimiter matching */
10557     char *s = start;            /* current position in the buffer */
10558     char term;                  /* terminating character */
10559     char *to;                   /* current position in the sv's data */
10560     I32 brackets = 1;           /* bracket nesting level */
10561     bool has_utf8 = FALSE;      /* is there any utf8 content? */
10562     IV termcode;                /* terminating char. code */
10563     U8 termstr[UTF8_MAXBYTES];  /* terminating string */
10564     STRLEN termlen;             /* length of terminating string */
10565     line_t herelines;
10566
10567     /* The delimiters that have a mirror-image closing one */
10568     const char * opening_delims = "([{<";
10569     const char * closing_delims = ")]}>";
10570
10571     const char * non_grapheme_msg = "Use of unassigned code point or"
10572                                     " non-standalone grapheme for a delimiter"
10573                                     " will be a fatal error starting in Perl"
10574                                     " 5.30";
10575     /* The only non-UTF character that isn't a stand alone grapheme is
10576      * white-space, hence can't be a delimiter.  So can skip for non-UTF-8 */
10577     bool check_grapheme = UTF && ckWARN_d(WARN_DEPRECATED);
10578
10579     PERL_ARGS_ASSERT_SCAN_STR;
10580
10581     /* skip space before the delimiter */
10582     if (isSPACE(*s)) {
10583         s = skipspace(s);
10584     }
10585
10586     /* mark where we are, in case we need to report errors */
10587     CLINE;
10588
10589     /* after skipping whitespace, the next character is the terminator */
10590     term = *s;
10591     if (!UTF || UTF8_IS_INVARIANT(term)) {
10592         termcode = termstr[0] = term;
10593         termlen = 1;
10594     }
10595     else {
10596         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10597         if (check_grapheme) {
10598             if (   UNLIKELY(UNICODE_IS_SUPER(termcode))
10599                 || UNLIKELY(UNICODE_IS_NONCHAR(termcode)))
10600             {
10601                 /* These are considered graphemes, and since the ending
10602                  * delimiter will be the same, we don't have to check the other
10603                  * end */
10604                 check_grapheme = FALSE;
10605             }
10606             else if (UNLIKELY(! _is_grapheme((U8 *) start,
10607                                              (U8 *) s,
10608                                              (U8 *) PL_bufend,
10609                                              termcode)))
10610             {
10611                 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "%s", non_grapheme_msg);
10612
10613                 /* Don't have to check the other end, as have already warned at
10614                  * this one */
10615                 check_grapheme = FALSE;
10616             }
10617         }
10618
10619         Copy(s, termstr, termlen, U8);
10620     }
10621
10622     /* mark where we are */
10623     PL_multi_start = CopLINE(PL_curcop);
10624     PL_multi_open = termcode;
10625     herelines = PL_parser->herelines;
10626
10627     /* If the delimiter has a mirror-image closing one, get it */
10628     if (term && (tmps = strchr(opening_delims, term))) {
10629         termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
10630     }
10631
10632     PL_multi_close = termcode;
10633
10634     if (PL_multi_open == PL_multi_close) {
10635         keep_bracketed_quoted = FALSE;
10636     }
10637
10638     /* create a new SV to hold the contents.  79 is the SV's initial length.
10639        What a random number. */
10640     sv = newSV_type(SVt_PVIV);
10641     SvGROW(sv, 80);
10642     SvIV_set(sv, termcode);
10643     (void)SvPOK_only(sv);               /* validate pointer */
10644
10645     /* move past delimiter and try to read a complete string */
10646     if (keep_delims)
10647         sv_catpvn(sv, s, termlen);
10648     s += termlen;
10649     for (;;) {
10650         /* extend sv if need be */
10651         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10652         /* set 'to' to the next character in the sv's string */
10653         to = SvPVX(sv)+SvCUR(sv);
10654
10655         /* if open delimiter is the close delimiter read unbridle */
10656         if (PL_multi_open == PL_multi_close) {
10657             for (; s < PL_bufend; s++,to++) {
10658                 /* embedded newlines increment the current line number */
10659                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10660                     COPLINE_INC_WITH_HERELINES;
10661                 /* handle quoted delimiters */
10662                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10663                     if (!keep_bracketed_quoted
10664                         && (s[1] == term
10665                             || (re_reparse && s[1] == '\\'))
10666                     )
10667                         s++;
10668                     else /* any other quotes are simply copied straight through */
10669                         *to++ = *s++;
10670                 }
10671                 /* terminate when run out of buffer (the for() condition), or
10672                    have found the terminator */
10673                 else if (*s == term) {  /* First byte of terminator matches */
10674                     if (termlen == 1)   /* If is the only byte, are done */
10675                         break;
10676
10677                     /* If the remainder of the terminator matches, also are
10678                      * done, after checking that is a separate grapheme */
10679                     if (   s + termlen <= PL_bufend
10680                         && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
10681                     {
10682                         if (   check_grapheme
10683                             && UNLIKELY(! _is_grapheme((U8 *) start,
10684                                                               (U8 *) s,
10685                                                               (U8 *) PL_bufend,
10686                                                               termcode)))
10687                         {
10688                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10689                                         "%s", non_grapheme_msg);
10690                         }
10691                         break;
10692                     }
10693                 }
10694                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
10695                     has_utf8 = TRUE;
10696                 }
10697
10698                 *to = *s;
10699             }
10700         }
10701
10702         /* if the terminator isn't the same as the start character (e.g.,
10703            matched brackets), we have to allow more in the quoting, and
10704            be prepared for nested brackets.
10705         */
10706         else {
10707             /* read until we run out of string, or we find the terminator */
10708             for (; s < PL_bufend; s++,to++) {
10709                 /* embedded newlines increment the line count */
10710                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10711                     COPLINE_INC_WITH_HERELINES;
10712                 /* backslashes can escape the open or closing characters */
10713                 if (*s == '\\' && s+1 < PL_bufend) {
10714                     if (!keep_bracketed_quoted
10715                        && ( ((UV)s[1] == PL_multi_open)
10716                          || ((UV)s[1] == PL_multi_close) ))
10717                     {
10718                         s++;
10719                     }
10720                     else
10721                         *to++ = *s++;
10722                 }
10723                 /* allow nested opens and closes */
10724                 else if ((UV)*s == PL_multi_close && --brackets <= 0)
10725                     break;
10726                 else if ((UV)*s == PL_multi_open)
10727                     brackets++;
10728                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10729                     has_utf8 = TRUE;
10730                 *to = *s;
10731             }
10732         }
10733         /* terminate the copied string and update the sv's end-of-string */
10734         *to = '\0';
10735         SvCUR_set(sv, to - SvPVX_const(sv));
10736
10737         /*
10738          * this next chunk reads more into the buffer if we're not done yet
10739          */
10740
10741         if (s < PL_bufend)
10742             break;              /* handle case where we are done yet :-) */
10743
10744 #ifndef PERL_STRICT_CR
10745         if (to - SvPVX_const(sv) >= 2) {
10746             if (   (to[-2] == '\r' && to[-1] == '\n')
10747                 || (to[-2] == '\n' && to[-1] == '\r'))
10748             {
10749                 to[-2] = '\n';
10750                 to--;
10751                 SvCUR_set(sv, to - SvPVX_const(sv));
10752             }
10753             else if (to[-1] == '\r')
10754                 to[-1] = '\n';
10755         }
10756         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10757             to[-1] = '\n';
10758 #endif
10759
10760         /* if we're out of file, or a read fails, bail and reset the current
10761            line marker so we can report where the unterminated string began
10762         */
10763         COPLINE_INC_WITH_HERELINES;
10764         PL_bufptr = PL_bufend;
10765         if (!lex_next_chunk(0)) {
10766             sv_free(sv);
10767             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10768             return NULL;
10769         }
10770         s = start = PL_bufptr;
10771     }
10772
10773     /* at this point, we have successfully read the delimited string */
10774
10775     if (keep_delims)
10776             sv_catpvn(sv, s, termlen);
10777     s += termlen;
10778
10779     if (has_utf8)
10780         SvUTF8_on(sv);
10781
10782     PL_multi_end = CopLINE(PL_curcop);
10783     CopLINE_set(PL_curcop, PL_multi_start);
10784     PL_parser->herelines = herelines;
10785
10786     /* if we allocated too much space, give some back */
10787     if (SvCUR(sv) + 5 < SvLEN(sv)) {
10788         SvLEN_set(sv, SvCUR(sv) + 1);
10789         SvPV_renew(sv, SvLEN(sv));
10790     }
10791
10792     /* decide whether this is the first or second quoted string we've read
10793        for this op
10794     */
10795
10796     if (PL_lex_stuff)
10797         PL_parser->lex_sub_repl = sv;
10798     else
10799         PL_lex_stuff = sv;
10800     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10801     return s;
10802 }
10803
10804 /*
10805   scan_num
10806   takes: pointer to position in buffer
10807   returns: pointer to new position in buffer
10808   side-effects: builds ops for the constant in pl_yylval.op
10809
10810   Read a number in any of the formats that Perl accepts:
10811
10812   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
10813   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
10814   0b[01](_?[01])*                                       binary integers
10815   0[0-7](_?[0-7])*                                      octal integers
10816   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
10817   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
10818
10819   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10820   thing it reads.
10821
10822   If it reads a number without a decimal point or an exponent, it will
10823   try converting the number to an integer and see if it can do so
10824   without loss of precision.
10825 */
10826
10827 char *
10828 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10829 {
10830     const char *s = start;      /* current position in buffer */
10831     char *d;                    /* destination in temp buffer */
10832     char *e;                    /* end of temp buffer */
10833     NV nv;                              /* number read, as a double */
10834     SV *sv = NULL;                      /* place to put the converted number */
10835     bool floatit;                       /* boolean: int or float? */
10836     const char *lastub = NULL;          /* position of last underbar */
10837     static const char* const number_too_long = "Number too long";
10838     bool warned_about_underscore = 0;
10839 #define WARN_ABOUT_UNDERSCORE() \
10840         do { \
10841             if (!warned_about_underscore) { \
10842                 warned_about_underscore = 1; \
10843                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
10844                                "Misplaced _ in number"); \
10845             } \
10846         } while(0)
10847     /* Hexadecimal floating point.
10848      *
10849      * In many places (where we have quads and NV is IEEE 754 double)
10850      * we can fit the mantissa bits of a NV into an unsigned quad.
10851      * (Note that UVs might not be quads even when we have quads.)
10852      * This will not work everywhere, though (either no quads, or
10853      * using long doubles), in which case we have to resort to NV,
10854      * which will probably mean horrible loss of precision due to
10855      * multiple fp operations. */
10856     bool hexfp = FALSE;
10857     int total_bits = 0;
10858     int significant_bits = 0;
10859 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
10860 #  define HEXFP_UQUAD
10861     Uquad_t hexfp_uquad = 0;
10862     int hexfp_frac_bits = 0;
10863 #else
10864 #  define HEXFP_NV
10865     NV hexfp_nv = 0.0;
10866 #endif
10867     NV hexfp_mult = 1.0;
10868     UV high_non_zero = 0; /* highest digit */
10869     int non_zero_integer_digits = 0;
10870
10871     PERL_ARGS_ASSERT_SCAN_NUM;
10872
10873     /* We use the first character to decide what type of number this is */
10874
10875     switch (*s) {
10876     default:
10877         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10878
10879     /* if it starts with a 0, it could be an octal number, a decimal in
10880        0.13 disguise, or a hexadecimal number, or a binary number. */
10881     case '0':
10882         {
10883           /* variables:
10884              u          holds the "number so far"
10885              shift      the power of 2 of the base
10886                         (hex == 4, octal == 3, binary == 1)
10887              overflowed was the number more than we can hold?
10888
10889              Shift is used when we add a digit.  It also serves as an "are
10890              we in octal/hex/binary?" indicator to disallow hex characters
10891              when in octal mode.
10892            */
10893             NV n = 0.0;
10894             UV u = 0;
10895             I32 shift;
10896             bool overflowed = FALSE;
10897             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
10898             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10899             static const char* const bases[5] =
10900               { "", "binary", "", "octal", "hexadecimal" };
10901             static const char* const Bases[5] =
10902               { "", "Binary", "", "Octal", "Hexadecimal" };
10903             static const char* const maxima[5] =
10904               { "",
10905                 "0b11111111111111111111111111111111",
10906                 "",
10907                 "037777777777",
10908                 "0xffffffff" };
10909             const char *base, *Base, *max;
10910
10911             /* check for hex */
10912             if (isALPHA_FOLD_EQ(s[1], 'x')) {
10913                 shift = 4;
10914                 s += 2;
10915                 just_zero = FALSE;
10916             } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
10917                 shift = 1;
10918                 s += 2;
10919                 just_zero = FALSE;
10920             }
10921             /* check for a decimal in disguise */
10922             else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
10923                 goto decimal;
10924             /* so it must be octal */
10925             else {
10926                 shift = 3;
10927                 s++;
10928             }
10929
10930             if (*s == '_') {
10931                 WARN_ABOUT_UNDERSCORE();
10932                lastub = s++;
10933             }
10934
10935             base = bases[shift];
10936             Base = Bases[shift];
10937             max  = maxima[shift];
10938
10939             /* read the rest of the number */
10940             for (;;) {
10941                 /* x is used in the overflow test,
10942                    b is the digit we're adding on. */
10943                 UV x, b;
10944
10945                 switch (*s) {
10946
10947                 /* if we don't mention it, we're done */
10948                 default:
10949                     goto out;
10950
10951                 /* _ are ignored -- but warned about if consecutive */
10952                 case '_':
10953                     if (lastub && s == lastub + 1)
10954                         WARN_ABOUT_UNDERSCORE();
10955                     lastub = s++;
10956                     break;
10957
10958                 /* 8 and 9 are not octal */
10959                 case '8': case '9':
10960                     if (shift == 3)
10961                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10962                     /* FALLTHROUGH */
10963
10964                 /* octal digits */
10965                 case '2': case '3': case '4':
10966                 case '5': case '6': case '7':
10967                     if (shift == 1)
10968                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10969                     /* FALLTHROUGH */
10970
10971                 case '0': case '1':
10972                     b = *s++ & 15;              /* ASCII digit -> value of digit */
10973                     goto digit;
10974
10975                 /* hex digits */
10976                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10977                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10978                     /* make sure they said 0x */
10979                     if (shift != 4)
10980                         goto out;
10981                     b = (*s++ & 7) + 9;
10982
10983                     /* Prepare to put the digit we have onto the end
10984                        of the number so far.  We check for overflows.
10985                     */
10986
10987                   digit:
10988                     just_zero = FALSE;
10989                     if (!overflowed) {
10990                         x = u << shift; /* make room for the digit */
10991
10992                         total_bits += shift;
10993
10994                         if ((x >> shift) != u
10995                             && !(PL_hints & HINT_NEW_BINARY)) {
10996                             overflowed = TRUE;
10997                             n = (NV) u;
10998                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10999                                              "Integer overflow in %s number",
11000                                              base);
11001                         } else
11002                             u = x | b;          /* add the digit to the end */
11003                     }
11004                     if (overflowed) {
11005                         n *= nvshift[shift];
11006                         /* If an NV has not enough bits in its
11007                          * mantissa to represent an UV this summing of
11008                          * small low-order numbers is a waste of time
11009                          * (because the NV cannot preserve the
11010                          * low-order bits anyway): we could just
11011                          * remember when did we overflow and in the
11012                          * end just multiply n by the right
11013                          * amount. */
11014                         n += (NV) b;
11015                     }
11016
11017                     if (high_non_zero == 0 && b > 0)
11018                         high_non_zero = b;
11019
11020                     if (high_non_zero)
11021                         non_zero_integer_digits++;
11022
11023                     /* this could be hexfp, but peek ahead
11024                      * to avoid matching ".." */
11025                     if (UNLIKELY(HEXFP_PEEK(s))) {
11026                         goto out;
11027                     }
11028
11029                     break;
11030                 }
11031             }
11032
11033           /* if we get here, we had success: make a scalar value from
11034              the number.
11035           */
11036           out:
11037
11038             /* final misplaced underbar check */
11039             if (s[-1] == '_')
11040                 WARN_ABOUT_UNDERSCORE();
11041
11042             if (UNLIKELY(HEXFP_PEEK(s))) {
11043                 /* Do sloppy (on the underbars) but quick detection
11044                  * (and value construction) for hexfp, the decimal
11045                  * detection will shortly be more thorough with the
11046                  * underbar checks. */
11047                 const char* h = s;
11048                 significant_bits = non_zero_integer_digits * shift;
11049 #ifdef HEXFP_UQUAD
11050                 hexfp_uquad = u;
11051 #else /* HEXFP_NV */
11052                 hexfp_nv = u;
11053 #endif
11054                 /* Ignore the leading zero bits of
11055                  * the high (first) non-zero digit. */
11056                 if (high_non_zero) {
11057                     if (high_non_zero < 0x8)
11058                         significant_bits--;
11059                     if (high_non_zero < 0x4)
11060                         significant_bits--;
11061                     if (high_non_zero < 0x2)
11062                         significant_bits--;
11063                 }
11064
11065                 if (*h == '.') {
11066 #ifdef HEXFP_NV
11067                     NV nv_mult = 1.0;
11068 #endif
11069                     bool accumulate = TRUE;
11070                     for (h++; (isXDIGIT(*h) || *h == '_'); h++) {
11071                         if (isXDIGIT(*h)) {
11072                             U8 b = XDIGIT_VALUE(*h);
11073                             significant_bits += shift;
11074 #ifdef HEXFP_UQUAD
11075                             if (accumulate) {
11076                                 if (significant_bits < NV_MANT_DIG) {
11077                                     /* We are in the long "run" of xdigits,
11078                                      * accumulate the full four bits. */
11079                                     hexfp_uquad <<= shift;
11080                                     hexfp_uquad |= b;
11081                                     hexfp_frac_bits += shift;
11082                                 } else {
11083                                     /* We are at a hexdigit either at,
11084                                      * or straddling, the edge of mantissa.
11085                                      * We will try grabbing as many as
11086                                      * possible bits. */
11087                                     int tail =
11088                                       significant_bits - NV_MANT_DIG;
11089                                     if (tail <= 0)
11090                                        tail += shift;
11091                                     hexfp_uquad <<= tail;
11092                                     hexfp_uquad |= b >> (shift - tail);
11093                                     hexfp_frac_bits += tail;
11094
11095                                     /* Ignore the trailing zero bits
11096                                      * of the last non-zero xdigit.
11097                                      *
11098                                      * The assumption here is that if
11099                                      * one has input of e.g. the xdigit
11100                                      * eight (0x8), there is only one
11101                                      * bit being input, not the full
11102                                      * four bits.  Conversely, if one
11103                                      * specifies a zero xdigit, the
11104                                      * assumption is that one really
11105                                      * wants all those bits to be zero. */
11106                                     if (b) {
11107                                         if ((b & 0x1) == 0x0) {
11108                                             significant_bits--;
11109                                             if ((b & 0x2) == 0x0) {
11110                                                 significant_bits--;
11111                                                 if ((b & 0x4) == 0x0) {
11112                                                     significant_bits--;
11113                                                 }
11114                                             }
11115                                         }
11116                                     }
11117
11118                                     accumulate = FALSE;
11119                                 }
11120                             } else {
11121                                 /* Keep skipping the xdigits, and
11122                                  * accumulating the significant bits,
11123                                  * but do not shift the uquad
11124                                  * (which would catastrophically drop
11125                                  * high-order bits) or accumulate the
11126                                  * xdigits anymore. */
11127                             }
11128 #else /* HEXFP_NV */
11129                             if (accumulate) {
11130                                 nv_mult /= 16.0;
11131                                 if (nv_mult > 0.0)
11132                                     hexfp_nv += b * nv_mult;
11133                                 else
11134                                     accumulate = FALSE;
11135                             }
11136 #endif
11137                         }
11138                         if (significant_bits >= NV_MANT_DIG)
11139                             accumulate = FALSE;
11140                     }
11141                 }
11142
11143                 if ((total_bits > 0 || significant_bits > 0) &&
11144                     isALPHA_FOLD_EQ(*h, 'p')) {
11145                     bool negexp = FALSE;
11146                     h++;
11147                     if (*h == '+')
11148                         h++;
11149                     else if (*h == '-') {
11150                         negexp = TRUE;
11151                         h++;
11152                     }
11153                     if (isDIGIT(*h)) {
11154                         I32 hexfp_exp = 0;
11155                         while (isDIGIT(*h) || *h == '_') {
11156                             if (isDIGIT(*h)) {
11157                                 hexfp_exp *= 10;
11158                                 hexfp_exp += *h - '0';
11159 #ifdef NV_MIN_EXP
11160                                 if (negexp
11161                                     && -hexfp_exp < NV_MIN_EXP - 1) {
11162                                     /* NOTE: this means that the exponent
11163                                      * underflow warning happens for
11164                                      * the IEEE 754 subnormals (denormals),
11165                                      * because DBL_MIN_EXP etc are the lowest
11166                                      * possible binary (or, rather, DBL_RADIX-base)
11167                                      * exponent for normals, not subnormals.
11168                                      *
11169                                      * This may or may not be a good thing. */
11170                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11171                                                    "Hexadecimal float: exponent underflow");
11172                                     break;
11173                                 }
11174 #endif
11175 #ifdef NV_MAX_EXP
11176                                 if (!negexp
11177                                     && hexfp_exp > NV_MAX_EXP - 1) {
11178                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11179                                                    "Hexadecimal float: exponent overflow");
11180                                     break;
11181                                 }
11182 #endif
11183                             }
11184                             h++;
11185                         }
11186                         if (negexp)
11187                             hexfp_exp = -hexfp_exp;
11188 #ifdef HEXFP_UQUAD
11189                         hexfp_exp -= hexfp_frac_bits;
11190 #endif
11191                         hexfp_mult = Perl_pow(2.0, hexfp_exp);
11192                         hexfp = TRUE;
11193                         goto decimal;
11194                     }
11195                 }
11196             }
11197
11198             if (overflowed) {
11199                 if (n > 4294967295.0)
11200                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11201                                    "%s number > %s non-portable",
11202                                    Base, max);
11203                 sv = newSVnv(n);
11204             }
11205             else {
11206 #if UVSIZE > 4
11207                 if (u > 0xffffffff)
11208                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11209                                    "%s number > %s non-portable",
11210                                    Base, max);
11211 #endif
11212                 sv = newSVuv(u);
11213             }
11214             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11215                 sv = new_constant(start, s - start, "integer",
11216                                   sv, NULL, NULL, 0);
11217             else if (PL_hints & HINT_NEW_BINARY)
11218                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
11219         }
11220         break;
11221
11222     /*
11223       handle decimal numbers.
11224       we're also sent here when we read a 0 as the first digit
11225     */
11226     case '1': case '2': case '3': case '4': case '5':
11227     case '6': case '7': case '8': case '9': case '.':
11228       decimal:
11229         d = PL_tokenbuf;
11230         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11231         floatit = FALSE;
11232         if (hexfp) {
11233             floatit = TRUE;
11234             *d++ = '0';
11235             *d++ = 'x';
11236             s = start + 2;
11237         }
11238
11239         /* read next group of digits and _ and copy into d */
11240         while (isDIGIT(*s)
11241                || *s == '_'
11242                || UNLIKELY(hexfp && isXDIGIT(*s)))
11243         {
11244             /* skip underscores, checking for misplaced ones
11245                if -w is on
11246             */
11247             if (*s == '_') {
11248                 if (lastub && s == lastub + 1)
11249                     WARN_ABOUT_UNDERSCORE();
11250                 lastub = s++;
11251             }
11252             else {
11253                 /* check for end of fixed-length buffer */
11254                 if (d >= e)
11255                     Perl_croak(aTHX_ "%s", number_too_long);
11256                 /* if we're ok, copy the character */
11257                 *d++ = *s++;
11258             }
11259         }
11260
11261         /* final misplaced underbar check */
11262         if (lastub && s == lastub + 1)
11263             WARN_ABOUT_UNDERSCORE();
11264
11265         /* read a decimal portion if there is one.  avoid
11266            3..5 being interpreted as the number 3. followed
11267            by .5
11268         */
11269         if (*s == '.' && s[1] != '.') {
11270             floatit = TRUE;
11271             *d++ = *s++;
11272
11273             if (*s == '_') {
11274                 WARN_ABOUT_UNDERSCORE();
11275                 lastub = s;
11276             }
11277
11278             /* copy, ignoring underbars, until we run out of digits.
11279             */
11280             for (; isDIGIT(*s)
11281                    || *s == '_'
11282                    || UNLIKELY(hexfp && isXDIGIT(*s));
11283                  s++)
11284             {
11285                 /* fixed length buffer check */
11286                 if (d >= e)
11287                     Perl_croak(aTHX_ "%s", number_too_long);
11288                 if (*s == '_') {
11289                    if (lastub && s == lastub + 1)
11290                         WARN_ABOUT_UNDERSCORE();
11291                    lastub = s;
11292                 }
11293                 else
11294                     *d++ = *s;
11295             }
11296             /* fractional part ending in underbar? */
11297             if (s[-1] == '_')
11298                 WARN_ABOUT_UNDERSCORE();
11299             if (*s == '.' && isDIGIT(s[1])) {
11300                 /* oops, it's really a v-string, but without the "v" */
11301                 s = start;
11302                 goto vstring;
11303             }
11304         }
11305
11306         /* read exponent part, if present */
11307         if ((isALPHA_FOLD_EQ(*s, 'e')
11308               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
11309             && strchr("+-0123456789_", s[1]))
11310         {
11311             int exp_digits = 0;
11312             const char *save_s = s;
11313             char * save_d = d;
11314
11315             /* regardless of whether user said 3E5 or 3e5, use lower 'e',
11316                ditto for p (hexfloats) */
11317             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
11318                 /* At least some Mach atof()s don't grok 'E' */
11319                 *d++ = 'e';
11320             }
11321             else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
11322                 *d++ = 'p';
11323             }
11324
11325             s++;
11326
11327
11328             /* stray preinitial _ */
11329             if (*s == '_') {
11330                 WARN_ABOUT_UNDERSCORE();
11331                 lastub = s++;
11332             }
11333
11334             /* allow positive or negative exponent */
11335             if (*s == '+' || *s == '-')
11336                 *d++ = *s++;
11337
11338             /* stray initial _ */
11339             if (*s == '_') {
11340                 WARN_ABOUT_UNDERSCORE();
11341                 lastub = s++;
11342             }
11343
11344             /* read digits of exponent */
11345             while (isDIGIT(*s) || *s == '_') {
11346                 if (isDIGIT(*s)) {
11347                     ++exp_digits;
11348                     if (d >= e)
11349                         Perl_croak(aTHX_ "%s", number_too_long);
11350                     *d++ = *s++;
11351                 }
11352                 else {
11353                    if (((lastub && s == lastub + 1)
11354                         || (!isDIGIT(s[1]) && s[1] != '_')))
11355                         WARN_ABOUT_UNDERSCORE();
11356                    lastub = s++;
11357                 }
11358             }
11359
11360             if (!exp_digits) {
11361                 /* no exponent digits, the [eEpP] could be for something else,
11362                  * though in practice we don't get here for p since that's preparsed
11363                  * earlier, and results in only the 0xX being consumed, so behave similarly
11364                  * for decimal floats and consume only the D.DD, leaving the [eE] to the
11365                  * next token.
11366                  */
11367                 s = save_s;
11368                 d = save_d;
11369             }
11370             else {
11371                 floatit = TRUE;
11372             }
11373         }
11374
11375
11376         /*
11377            We try to do an integer conversion first if no characters
11378            indicating "float" have been found.
11379          */
11380
11381         if (!floatit) {
11382             UV uv;
11383             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11384
11385             if (flags == IS_NUMBER_IN_UV) {
11386               if (uv <= IV_MAX)
11387                 sv = newSViv(uv); /* Prefer IVs over UVs. */
11388               else
11389                 sv = newSVuv(uv);
11390             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11391               if (uv <= (UV) IV_MIN)
11392                 sv = newSViv(-(IV)uv);
11393               else
11394                 floatit = TRUE;
11395             } else
11396               floatit = TRUE;
11397         }
11398         if (floatit) {
11399             STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD();
11400             /* terminate the string */
11401             *d = '\0';
11402             if (UNLIKELY(hexfp)) {
11403 #  ifdef NV_MANT_DIG
11404                 if (significant_bits > NV_MANT_DIG)
11405                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11406                                    "Hexadecimal float: mantissa overflow");
11407 #  endif
11408 #ifdef HEXFP_UQUAD
11409                 nv = hexfp_uquad * hexfp_mult;
11410 #else /* HEXFP_NV */
11411                 nv = hexfp_nv * hexfp_mult;
11412 #endif
11413             } else {
11414                 nv = Atof(PL_tokenbuf);
11415             }
11416             RESTORE_LC_NUMERIC_UNDERLYING();
11417             sv = newSVnv(nv);
11418         }
11419
11420         if ( floatit
11421              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11422             const char *const key = floatit ? "float" : "integer";
11423             const STRLEN keylen = floatit ? 5 : 7;
11424             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11425                                 key, keylen, sv, NULL, NULL, 0);
11426         }
11427         break;
11428
11429     /* if it starts with a v, it could be a v-string */
11430     case 'v':
11431     vstring:
11432                 sv = newSV(5); /* preallocate storage space */
11433                 ENTER_with_name("scan_vstring");
11434                 SAVEFREESV(sv);
11435                 s = scan_vstring(s, PL_bufend, sv);
11436                 SvREFCNT_inc_simple_void_NN(sv);
11437                 LEAVE_with_name("scan_vstring");
11438         break;
11439     }
11440
11441     /* make the op for the constant and return */
11442
11443     if (sv)
11444         lvalp->opval = newSVOP(OP_CONST, 0, sv);
11445     else
11446         lvalp->opval = NULL;
11447
11448     return (char *)s;
11449 }
11450
11451 STATIC char *
11452 S_scan_formline(pTHX_ char *s)
11453 {
11454     SV * const stuff = newSVpvs("");
11455     bool needargs = FALSE;
11456     bool eofmt = FALSE;
11457
11458     PERL_ARGS_ASSERT_SCAN_FORMLINE;
11459
11460     while (!needargs) {
11461         char *eol;
11462         if (*s == '.') {
11463             char *t = s+1;
11464 #ifdef PERL_STRICT_CR
11465             while (SPACE_OR_TAB(*t))
11466                 t++;
11467 #else
11468             while (SPACE_OR_TAB(*t) || *t == '\r')
11469                 t++;
11470 #endif
11471             if (*t == '\n' || t == PL_bufend) {
11472                 eofmt = TRUE;
11473                 break;
11474             }
11475         }
11476         eol = (char *) memchr(s,'\n',PL_bufend-s);
11477         if (!eol++)
11478                 eol = PL_bufend;
11479         if (*s != '#') {
11480             char *t;
11481             for (t = s; t < eol; t++) {
11482                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11483                     needargs = FALSE;
11484                     goto enough;        /* ~~ must be first line in formline */
11485                 }
11486                 if (*t == '@' || *t == '^')
11487                     needargs = TRUE;
11488             }
11489             if (eol > s) {
11490                 sv_catpvn(stuff, s, eol-s);
11491 #ifndef PERL_STRICT_CR
11492                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11493                     char *end = SvPVX(stuff) + SvCUR(stuff);
11494                     end[-2] = '\n';
11495                     end[-1] = '\0';
11496                     SvCUR_set(stuff, SvCUR(stuff) - 1);
11497                 }
11498 #endif
11499             }
11500             else
11501               break;
11502         }
11503         s = (char*)eol;
11504         if ((PL_rsfp || PL_parser->filtered)
11505          && PL_parser->form_lex_state == LEX_NORMAL) {
11506             bool got_some;
11507             PL_bufptr = PL_bufend;
11508             COPLINE_INC_WITH_HERELINES;
11509             got_some = lex_next_chunk(0);
11510             CopLINE_dec(PL_curcop);
11511             s = PL_bufptr;
11512             if (!got_some)
11513                 break;
11514         }
11515         incline(s, PL_bufend);
11516     }
11517   enough:
11518     if (!SvCUR(stuff) || needargs)
11519         PL_lex_state = PL_parser->form_lex_state;
11520     if (SvCUR(stuff)) {
11521         PL_expect = XSTATE;
11522         if (needargs) {
11523             const char *s2 = s;
11524             while (isSPACE(*s2) && *s2 != '\n')
11525                 s2++;
11526             if (*s2 == '{') {
11527                 PL_expect = XTERMBLOCK;
11528                 NEXTVAL_NEXTTOKE.ival = 0;
11529                 force_next(DO);
11530             }
11531             NEXTVAL_NEXTTOKE.ival = 0;
11532             force_next(FORMLBRACK);
11533         }
11534         if (!IN_BYTES) {
11535             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11536                 SvUTF8_on(stuff);
11537         }
11538         NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
11539         force_next(THING);
11540     }
11541     else {
11542         SvREFCNT_dec(stuff);
11543         if (eofmt)
11544             PL_lex_formbrack = 0;
11545     }
11546     return s;
11547 }
11548
11549 I32
11550 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11551 {
11552     const I32 oldsavestack_ix = PL_savestack_ix;
11553     CV* const outsidecv = PL_compcv;
11554
11555     SAVEI32(PL_subline);
11556     save_item(PL_subname);
11557     SAVESPTR(PL_compcv);
11558
11559     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11560     CvFLAGS(PL_compcv) |= flags;
11561
11562     PL_subline = CopLINE(PL_curcop);
11563     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11564     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11565     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11566     if (outsidecv && CvPADLIST(outsidecv))
11567         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
11568
11569     return oldsavestack_ix;
11570 }
11571
11572 static int
11573 S_yywarn(pTHX_ const char *const s, U32 flags)
11574 {
11575     PERL_ARGS_ASSERT_YYWARN;
11576
11577     PL_in_eval |= EVAL_WARNONLY;
11578     yyerror_pv(s, flags);
11579     return 0;
11580 }
11581
11582 void
11583 Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
11584 {
11585     PERL_ARGS_ASSERT_ABORT_EXECUTION;
11586
11587     if (PL_minus_c)
11588         Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
11589     else {
11590         Perl_croak(aTHX_
11591                 "%sExecution of %s aborted due to compilation errors.\n", msg, name);
11592     }
11593     NOT_REACHED; /* NOTREACHED */
11594 }
11595
11596 void
11597 Perl_yyquit(pTHX)
11598 {
11599     /* Called, after at least one error has been found, to abort the parse now,
11600      * instead of trying to forge ahead */
11601
11602     yyerror_pvn(NULL, 0, 0);
11603 }
11604
11605 int
11606 Perl_yyerror(pTHX_ const char *const s)
11607 {
11608     PERL_ARGS_ASSERT_YYERROR;
11609     return yyerror_pvn(s, strlen(s), 0);
11610 }
11611
11612 int
11613 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11614 {
11615     PERL_ARGS_ASSERT_YYERROR_PV;
11616     return yyerror_pvn(s, strlen(s), flags);
11617 }
11618
11619 int
11620 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11621 {
11622     const char *context = NULL;
11623     int contlen = -1;
11624     SV *msg;
11625     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11626     int yychar  = PL_parser->yychar;
11627
11628     /* Output error message 's' with length 'len'.  'flags' are SV flags that
11629      * apply.  If the number of errors found is large enough, it abandons
11630      * parsing.  If 's' is NULL, there is no message, and it abandons
11631      * processing unconditionally */
11632
11633     if (s != NULL) {
11634         if (!yychar || (yychar == ';' && !PL_rsfp))
11635             sv_catpvs(where_sv, "at EOF");
11636         else if (   PL_oldoldbufptr
11637                  && PL_bufptr > PL_oldoldbufptr
11638                  && PL_bufptr - PL_oldoldbufptr < 200
11639                  && PL_oldoldbufptr != PL_oldbufptr
11640                  && PL_oldbufptr != PL_bufptr)
11641         {
11642             /*
11643                     Only for NetWare:
11644                     The code below is removed for NetWare because it
11645                     abends/crashes on NetWare when the script has error such as
11646                     not having the closing quotes like:
11647                         if ($var eq "value)
11648                     Checking of white spaces is anyway done in NetWare code.
11649             */
11650 #ifndef NETWARE
11651             while (isSPACE(*PL_oldoldbufptr))
11652                 PL_oldoldbufptr++;
11653 #endif
11654             context = PL_oldoldbufptr;
11655             contlen = PL_bufptr - PL_oldoldbufptr;
11656         }
11657         else if (  PL_oldbufptr
11658                 && PL_bufptr > PL_oldbufptr
11659                 && PL_bufptr - PL_oldbufptr < 200
11660                 && PL_oldbufptr != PL_bufptr) {
11661             /*
11662                     Only for NetWare:
11663                     The code below is removed for NetWare because it
11664                     abends/crashes on NetWare when the script has error such as
11665                     not having the closing quotes like:
11666                         if ($var eq "value)
11667                     Checking of white spaces is anyway done in NetWare code.
11668             */
11669 #ifndef NETWARE
11670             while (isSPACE(*PL_oldbufptr))
11671                 PL_oldbufptr++;
11672 #endif
11673             context = PL_oldbufptr;
11674             contlen = PL_bufptr - PL_oldbufptr;
11675         }
11676         else if (yychar > 255)
11677             sv_catpvs(where_sv, "next token ???");
11678         else if (yychar == YYEMPTY) {
11679             if (PL_lex_state == LEX_NORMAL)
11680                 sv_catpvs(where_sv, "at end of line");
11681             else if (PL_lex_inpat)
11682                 sv_catpvs(where_sv, "within pattern");
11683             else
11684                 sv_catpvs(where_sv, "within string");
11685         }
11686         else {
11687             sv_catpvs(where_sv, "next char ");
11688             if (yychar < 32)
11689                 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11690             else if (isPRINT_LC(yychar)) {
11691                 const char string = yychar;
11692                 sv_catpvn(where_sv, &string, 1);
11693             }
11694             else
11695                 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11696         }
11697         msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11698         Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
11699             OutCopFILE(PL_curcop),
11700             (IV)(PL_parser->preambling == NOLINE
11701                    ? CopLINE(PL_curcop)
11702                    : PL_parser->preambling));
11703         if (context)
11704             Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
11705                                  UTF8fARG(UTF, contlen, context));
11706         else
11707             Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
11708         if (   PL_multi_start < PL_multi_end
11709             && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
11710         {
11711             Perl_sv_catpvf(aTHX_ msg,
11712             "  (Might be a runaway multi-line %c%c string starting on"
11713             " line %" IVdf ")\n",
11714                     (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11715             PL_multi_end = 0;
11716         }
11717         if (PL_in_eval & EVAL_WARNONLY) {
11718             PL_in_eval &= ~EVAL_WARNONLY;
11719             Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
11720         }
11721         else {
11722             qerror(msg);
11723         }
11724     }
11725     if (s == NULL || PL_error_count >= 10) {
11726         const char * msg = "";
11727         const char * const name = OutCopFILE(PL_curcop);
11728
11729         if (PL_in_eval) {
11730             SV * errsv = ERRSV;
11731             if (SvCUR(errsv)) {
11732                 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
11733             }
11734         }
11735
11736         if (s == NULL) {
11737             abort_execution(msg, name);
11738         }
11739         else {
11740             Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
11741         }
11742     }
11743     PL_in_my = 0;
11744     PL_in_my_stash = NULL;
11745     return 0;
11746 }
11747
11748 STATIC char*
11749 S_swallow_bom(pTHX_ U8 *s)
11750 {
11751     const STRLEN slen = SvCUR(PL_linestr);
11752
11753     PERL_ARGS_ASSERT_SWALLOW_BOM;
11754
11755     switch (s[0]) {
11756     case 0xFF:
11757         if (s[1] == 0xFE) {
11758             /* UTF-16 little-endian? (or UTF-32LE?) */
11759             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
11760                 /* diag_listed_as: Unsupported script encoding %s */
11761                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11762 #ifndef PERL_NO_UTF16_FILTER
11763 #ifdef DEBUGGING
11764             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11765 #endif
11766             s += 2;
11767             if (PL_bufend > (char*)s) {
11768                 s = add_utf16_textfilter(s, TRUE);
11769             }
11770 #else
11771             /* diag_listed_as: Unsupported script encoding %s */
11772             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11773 #endif
11774         }
11775         break;
11776     case 0xFE:
11777         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
11778 #ifndef PERL_NO_UTF16_FILTER
11779 #ifdef DEBUGGING
11780             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11781 #endif
11782             s += 2;
11783             if (PL_bufend > (char *)s) {
11784                 s = add_utf16_textfilter(s, FALSE);
11785             }
11786 #else
11787             /* diag_listed_as: Unsupported script encoding %s */
11788             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11789 #endif
11790         }
11791         break;
11792     case BOM_UTF8_FIRST_BYTE: {
11793         if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
11794 #ifdef DEBUGGING
11795             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11796 #endif
11797             s += sizeof(BOM_UTF8) - 1;                     /* UTF-8 */
11798         }
11799         break;
11800     }
11801     case 0:
11802         if (slen > 3) {
11803              if (s[1] == 0) {
11804                   if (s[2] == 0xFE && s[3] == 0xFF) {
11805                        /* UTF-32 big-endian */
11806                        /* diag_listed_as: Unsupported script encoding %s */
11807                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11808                   }
11809              }
11810              else if (s[2] == 0 && s[3] != 0) {
11811                   /* Leading bytes
11812                    * 00 xx 00 xx
11813                    * are a good indicator of UTF-16BE. */
11814 #ifndef PERL_NO_UTF16_FILTER
11815 #ifdef DEBUGGING
11816                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11817 #endif
11818                   s = add_utf16_textfilter(s, FALSE);
11819 #else
11820                   /* diag_listed_as: Unsupported script encoding %s */
11821                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11822 #endif
11823              }
11824         }
11825         break;
11826
11827     default:
11828          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11829                   /* Leading bytes
11830                    * xx 00 xx 00
11831                    * are a good indicator of UTF-16LE. */
11832 #ifndef PERL_NO_UTF16_FILTER
11833 #ifdef DEBUGGING
11834               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11835 #endif
11836               s = add_utf16_textfilter(s, TRUE);
11837 #else
11838               /* diag_listed_as: Unsupported script encoding %s */
11839               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11840 #endif
11841          }
11842     }
11843     return (char*)s;
11844 }
11845
11846
11847 #ifndef PERL_NO_UTF16_FILTER
11848 static I32
11849 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11850 {
11851     SV *const filter = FILTER_DATA(idx);
11852     /* We re-use this each time round, throwing the contents away before we
11853        return.  */
11854     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11855     SV *const utf8_buffer = filter;
11856     IV status = IoPAGE(filter);
11857     const bool reverse = cBOOL(IoLINES(filter));
11858     I32 retval;
11859
11860     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11861
11862     /* As we're automatically added, at the lowest level, and hence only called
11863        from this file, we can be sure that we're not called in block mode. Hence
11864        don't bother writing code to deal with block mode.  */
11865     if (maxlen) {
11866         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11867     }
11868     if (status < 0) {
11869         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
11870     }
11871     DEBUG_P(PerlIO_printf(Perl_debug_log,
11872                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
11873                           FPTR2DPTR(void *, S_utf16_textfilter),
11874                           reverse ? 'l' : 'b', idx, maxlen, status,
11875                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11876
11877     while (1) {
11878         STRLEN chars;
11879         STRLEN have;
11880         I32 newlen;
11881         U8 *end;
11882         /* First, look in our buffer of existing UTF-8 data:  */
11883         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11884
11885         if (nl) {
11886             ++nl;
11887         } else if (status == 0) {
11888             /* EOF */
11889             IoPAGE(filter) = 0;
11890             nl = SvEND(utf8_buffer);
11891         }
11892         if (nl) {
11893             STRLEN got = nl - SvPVX(utf8_buffer);
11894             /* Did we have anything to append?  */
11895             retval = got != 0;
11896             sv_catpvn(sv, SvPVX(utf8_buffer), got);
11897             /* Everything else in this code works just fine if SVp_POK isn't
11898                set.  This, however, needs it, and we need it to work, else
11899                we loop infinitely because the buffer is never consumed.  */
11900             sv_chop(utf8_buffer, nl);
11901             break;
11902         }
11903
11904         /* OK, not a complete line there, so need to read some more UTF-16.
11905            Read an extra octect if the buffer currently has an odd number. */
11906         while (1) {
11907             if (status <= 0)
11908                 break;
11909             if (SvCUR(utf16_buffer) >= 2) {
11910                 /* Location of the high octet of the last complete code point.
11911                    Gosh, UTF-16 is a pain. All the benefits of variable length,
11912                    *coupled* with all the benefits of partial reads and
11913                    endianness.  */
11914                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11915                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11916
11917                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11918                     break;
11919                 }
11920
11921                 /* We have the first half of a surrogate. Read more.  */
11922                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11923             }
11924
11925             status = FILTER_READ(idx + 1, utf16_buffer,
11926                                  160 + (SvCUR(utf16_buffer) & 1));
11927             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
11928             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11929             if (status < 0) {
11930                 /* Error */
11931                 IoPAGE(filter) = status;
11932                 return status;
11933             }
11934         }
11935
11936         /* 'chars' isn't quite the right name, as code points above 0xFFFF
11937          * require 4 bytes per char */
11938         chars = SvCUR(utf16_buffer) >> 1;
11939         have = SvCUR(utf8_buffer);
11940
11941         /* Assume the worst case size as noted by the functions: twice the
11942          * number of input bytes */
11943         SvGROW(utf8_buffer, have + chars * 4 + 1);
11944
11945         if (reverse) {
11946             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11947                                          (U8*)SvPVX_const(utf8_buffer) + have,
11948                                          chars * 2, &newlen);
11949         } else {
11950             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11951                                 (U8*)SvPVX_const(utf8_buffer) + have,
11952                                 chars * 2, &newlen);
11953         }
11954         SvCUR_set(utf8_buffer, have + newlen);
11955         *end = '\0';
11956
11957         /* No need to keep this SV "well-formed" with a '\0' after the end, as
11958            it's private to us, and utf16_to_utf8{,reversed} take a
11959            (pointer,length) pair, rather than a NUL-terminated string.  */
11960         if(SvCUR(utf16_buffer) & 1) {
11961             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11962             SvCUR_set(utf16_buffer, 1);
11963         } else {
11964             SvCUR_set(utf16_buffer, 0);
11965         }
11966     }
11967     DEBUG_P(PerlIO_printf(Perl_debug_log,
11968                           "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
11969                           status,
11970                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11971     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11972     return retval;
11973 }
11974
11975 static U8 *
11976 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11977 {
11978     SV *filter = filter_add(S_utf16_textfilter, NULL);
11979
11980     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11981
11982     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11983     SvPVCLEAR(filter);
11984     IoLINES(filter) = reversed;
11985     IoPAGE(filter) = 1; /* Not EOF */
11986
11987     /* Sadly, we have to return a valid pointer, come what may, so we have to
11988        ignore any error return from this.  */
11989     SvCUR_set(PL_linestr, 0);
11990     if (FILTER_READ(0, PL_linestr, 0)) {
11991         SvUTF8_on(PL_linestr);
11992     } else {
11993         SvUTF8_on(PL_linestr);
11994     }
11995     PL_bufend = SvEND(PL_linestr);
11996     return (U8*)SvPVX(PL_linestr);
11997 }
11998 #endif
11999
12000 /*
12001 Returns a pointer to the next character after the parsed
12002 vstring, as well as updating the passed in sv.
12003
12004 Function must be called like
12005
12006         sv = sv_2mortal(newSV(5));
12007         s = scan_vstring(s,e,sv);
12008
12009 where s and e are the start and end of the string.
12010 The sv should already be large enough to store the vstring
12011 passed in, for performance reasons.
12012
12013 This function may croak if fatal warnings are enabled in the
12014 calling scope, hence the sv_2mortal in the example (to prevent
12015 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
12016 sv_2mortal.
12017
12018 */
12019
12020 char *
12021 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12022 {
12023     const char *pos = s;
12024     const char *start = s;
12025
12026     PERL_ARGS_ASSERT_SCAN_VSTRING;
12027
12028     if (*pos == 'v') pos++;  /* get past 'v' */
12029     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12030         pos++;
12031     if ( *pos != '.') {
12032         /* this may not be a v-string if followed by => */
12033         const char *next = pos;
12034         while (next < e && isSPACE(*next))
12035             ++next;
12036         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12037             /* return string not v-string */
12038             sv_setpvn(sv,(char *)s,pos-s);
12039             return (char *)pos;
12040         }
12041     }
12042
12043     if (!isALPHA(*pos)) {
12044         U8 tmpbuf[UTF8_MAXBYTES+1];
12045
12046         if (*s == 'v')
12047             s++;  /* get past 'v' */
12048
12049         SvPVCLEAR(sv);
12050
12051         for (;;) {
12052             /* this is atoi() that tolerates underscores */
12053             U8 *tmpend;
12054             UV rev = 0;
12055             const char *end = pos;
12056             UV mult = 1;
12057             while (--end >= s) {
12058                 if (*end != '_') {
12059                     const UV orev = rev;
12060                     rev += (*end - '0') * mult;
12061                     mult *= 10;
12062                     if (orev > rev)
12063                         /* diag_listed_as: Integer overflow in %s number */
12064                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12065                                          "Integer overflow in decimal number");
12066                 }
12067             }
12068
12069             /* Append native character for the rev point */
12070             tmpend = uvchr_to_utf8(tmpbuf, rev);
12071             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12072             if (!UVCHR_IS_INVARIANT(rev))
12073                  SvUTF8_on(sv);
12074             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12075                  s = ++pos;
12076             else {
12077                  s = pos;
12078                  break;
12079             }
12080             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12081                  pos++;
12082         }
12083         SvPOK_on(sv);
12084         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12085         SvRMAGICAL_on(sv);
12086     }
12087     return (char *)s;
12088 }
12089
12090 int
12091 Perl_keyword_plugin_standard(pTHX_
12092         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12093 {
12094     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
12095     PERL_UNUSED_CONTEXT;
12096     PERL_UNUSED_ARG(keyword_ptr);
12097     PERL_UNUSED_ARG(keyword_len);
12098     PERL_UNUSED_ARG(op_ptr);
12099     return KEYWORD_PLUGIN_DECLINE;
12100 }
12101
12102 /*
12103 =for apidoc Amx|void|wrap_keyword_plugin|Perl_keyword_plugin_t new_plugin|Perl_keyword_plugin_t *old_plugin_p
12104
12105 Puts a C function into the chain of keyword plugins.  This is the
12106 preferred way to manipulate the L</PL_keyword_plugin> variable.
12107 C<new_plugin> is a pointer to the C function that is to be added to the
12108 keyword plugin chain, and C<old_plugin_p> points to the storage location
12109 where a pointer to the next function in the chain will be stored.  The
12110 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
12111 while the value previously stored there is written to C<*old_plugin_p>.
12112
12113 L</PL_keyword_plugin> is global to an entire process, and a module wishing
12114 to hook keyword parsing may find itself invoked more than once per
12115 process, typically in different threads.  To handle that situation, this
12116 function is idempotent.  The location C<*old_plugin_p> must initially
12117 (once per process) contain a null pointer.  A C variable of static
12118 duration (declared at file scope, typically also marked C<static> to give
12119 it internal linkage) will be implicitly initialised appropriately, if it
12120 does not have an explicit initialiser.  This function will only actually
12121 modify the plugin chain if it finds C<*old_plugin_p> to be null.  This
12122 function is also thread safe on the small scale.  It uses appropriate
12123 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
12124
12125 When this function is called, the function referenced by C<new_plugin>
12126 must be ready to be called, except for C<*old_plugin_p> being unfilled.
12127 In a threading situation, C<new_plugin> may be called immediately, even
12128 before this function has returned.  C<*old_plugin_p> will always be
12129 appropriately set before C<new_plugin> is called.  If C<new_plugin>
12130 decides not to do anything special with the identifier that it is given
12131 (which is the usual case for most calls to a keyword plugin), it must
12132 chain the plugin function referenced by C<*old_plugin_p>.
12133
12134 Taken all together, XS code to install a keyword plugin should typically
12135 look something like this:
12136
12137     static Perl_keyword_plugin_t next_keyword_plugin;
12138     static OP *my_keyword_plugin(pTHX_
12139         char *keyword_plugin, STRLEN keyword_len, OP **op_ptr)
12140     {
12141         if (memEQs(keyword_ptr, keyword_len,
12142                    "my_new_keyword")) {
12143             ...
12144         } else {
12145             return next_keyword_plugin(aTHX_
12146                 keyword_ptr, keyword_len, op_ptr);
12147         }
12148     }
12149     BOOT:
12150         wrap_keyword_plugin(my_keyword_plugin,
12151                             &next_keyword_plugin);
12152
12153 Direct access to L</PL_keyword_plugin> should be avoided.
12154
12155 =cut
12156 */
12157
12158 void
12159 Perl_wrap_keyword_plugin(pTHX_
12160     Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
12161 {
12162     dVAR;
12163
12164     PERL_UNUSED_CONTEXT;
12165     PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
12166     if (*old_plugin_p) return;
12167     KEYWORD_PLUGIN_MUTEX_LOCK;
12168     if (!*old_plugin_p) {
12169         *old_plugin_p = PL_keyword_plugin;
12170         PL_keyword_plugin = new_plugin;
12171     }
12172     KEYWORD_PLUGIN_MUTEX_UNLOCK;
12173 }
12174
12175 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12176 static void
12177 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12178 {
12179     SAVEI32(PL_lex_brackets);
12180     if (PL_lex_brackets > 100)
12181         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12182     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12183     SAVEI32(PL_lex_allbrackets);
12184     PL_lex_allbrackets = 0;
12185     SAVEI8(PL_lex_fakeeof);
12186     PL_lex_fakeeof = (U8)fakeeof;
12187     if(yyparse(gramtype) && !PL_parser->error_count)
12188         qerror(Perl_mess(aTHX_ "Parse error"));
12189 }
12190
12191 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12192 static OP *
12193 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12194 {
12195     OP *o;
12196     ENTER;
12197     SAVEVPTR(PL_eval_root);
12198     PL_eval_root = NULL;
12199     parse_recdescent(gramtype, fakeeof);
12200     o = PL_eval_root;
12201     LEAVE;
12202     return o;
12203 }
12204
12205 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12206 static OP *
12207 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12208 {
12209     OP *exprop;
12210     if (flags & ~PARSE_OPTIONAL)
12211         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12212     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12213     if (!exprop && !(flags & PARSE_OPTIONAL)) {
12214         if (!PL_parser->error_count)
12215             qerror(Perl_mess(aTHX_ "Parse error"));
12216         exprop = newOP(OP_NULL, 0);
12217     }
12218     return exprop;
12219 }
12220
12221 /*
12222 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
12223
12224 Parse a Perl arithmetic expression.  This may contain operators of precedence
12225 down to the bit shift operators.  The expression must be followed (and thus
12226 terminated) either by a comparison or lower-precedence operator or by
12227 something that would normally terminate an expression such as semicolon.
12228 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12229 otherwise it is mandatory.  It is up to the caller to ensure that the
12230 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12231 the source of the code to be parsed and the lexical context for the
12232 expression.
12233
12234 The op tree representing the expression is returned.  If an optional
12235 expression is absent, a null pointer is returned, otherwise the pointer
12236 will be non-null.
12237
12238 If an error occurs in parsing or compilation, in most cases a valid op
12239 tree is returned anyway.  The error is reflected in the parser state,
12240 normally resulting in a single exception at the top level of parsing
12241 which covers all the compilation errors that occurred.  Some compilation
12242 errors, however, will throw an exception immediately.
12243
12244 =cut
12245 */
12246
12247 OP *
12248 Perl_parse_arithexpr(pTHX_ U32 flags)
12249 {
12250     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12251 }
12252
12253 /*
12254 =for apidoc Amx|OP *|parse_termexpr|U32 flags
12255
12256 Parse a Perl term expression.  This may contain operators of precedence
12257 down to the assignment operators.  The expression must be followed (and thus
12258 terminated) either by a comma or lower-precedence operator or by
12259 something that would normally terminate an expression such as semicolon.
12260 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12261 otherwise it is mandatory.  It is up to the caller to ensure that the
12262 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12263 the source of the code to be parsed and the lexical context for the
12264 expression.
12265
12266 The op tree representing the expression is returned.  If an optional
12267 expression is absent, a null pointer is returned, otherwise the pointer
12268 will be non-null.
12269
12270 If an error occurs in parsing or compilation, in most cases a valid op
12271 tree is returned anyway.  The error is reflected in the parser state,
12272 normally resulting in a single exception at the top level of parsing
12273 which covers all the compilation errors that occurred.  Some compilation
12274 errors, however, will throw an exception immediately.
12275
12276 =cut
12277 */
12278
12279 OP *
12280 Perl_parse_termexpr(pTHX_ U32 flags)
12281 {
12282     return parse_expr(LEX_FAKEEOF_COMMA, flags);
12283 }
12284
12285 /*
12286 =for apidoc Amx|OP *|parse_listexpr|U32 flags
12287
12288 Parse a Perl list expression.  This may contain operators of precedence
12289 down to the comma operator.  The expression must be followed (and thus
12290 terminated) either by a low-precedence logic operator such as C<or> or by
12291 something that would normally terminate an expression such as semicolon.
12292 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12293 otherwise it is mandatory.  It is up to the caller to ensure that the
12294 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12295 the source of the code to be parsed and the lexical context for the
12296 expression.
12297
12298 The op tree representing the expression is returned.  If an optional
12299 expression is absent, a null pointer is returned, otherwise the pointer
12300 will be non-null.
12301
12302 If an error occurs in parsing or compilation, in most cases a valid op
12303 tree is returned anyway.  The error is reflected in the parser state,
12304 normally resulting in a single exception at the top level of parsing
12305 which covers all the compilation errors that occurred.  Some compilation
12306 errors, however, will throw an exception immediately.
12307
12308 =cut
12309 */
12310
12311 OP *
12312 Perl_parse_listexpr(pTHX_ U32 flags)
12313 {
12314     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12315 }
12316
12317 /*
12318 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
12319
12320 Parse a single complete Perl expression.  This allows the full
12321 expression grammar, including the lowest-precedence operators such
12322 as C<or>.  The expression must be followed (and thus terminated) by a
12323 token that an expression would normally be terminated by: end-of-file,
12324 closing bracketing punctuation, semicolon, or one of the keywords that
12325 signals a postfix expression-statement modifier.  If C<flags> has the
12326 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
12327 mandatory.  It is up to the caller to ensure that the dynamic parser
12328 state (L</PL_parser> et al) is correctly set to reflect the source of
12329 the code to be parsed and the lexical context for the expression.
12330
12331 The op tree representing the expression is returned.  If an optional
12332 expression is absent, a null pointer is returned, otherwise the pointer
12333 will be non-null.
12334
12335 If an error occurs in parsing or compilation, in most cases a valid op
12336 tree is returned anyway.  The error is reflected in the parser state,
12337 normally resulting in a single exception at the top level of parsing
12338 which covers all the compilation errors that occurred.  Some compilation
12339 errors, however, will throw an exception immediately.
12340
12341 =cut
12342 */
12343
12344 OP *
12345 Perl_parse_fullexpr(pTHX_ U32 flags)
12346 {
12347     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12348 }
12349
12350 /*
12351 =for apidoc Amx|OP *|parse_block|U32 flags
12352
12353 Parse a single complete Perl code block.  This consists of an opening
12354 brace, a sequence of statements, and a closing brace.  The block
12355 constitutes a lexical scope, so C<my> variables and various compile-time
12356 effects can be contained within it.  It is up to the caller to ensure
12357 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12358 reflect the source of the code to be parsed and the lexical context for
12359 the statement.
12360
12361 The op tree representing the code block is returned.  This is always a
12362 real op, never a null pointer.  It will normally be a C<lineseq> list,
12363 including C<nextstate> or equivalent ops.  No ops to construct any kind
12364 of runtime scope are included by virtue of it being a block.
12365
12366 If an error occurs in parsing or compilation, in most cases a valid op
12367 tree (most likely null) is returned anyway.  The error is reflected in
12368 the parser state, normally resulting in a single exception at the top
12369 level of parsing which covers all the compilation errors that occurred.
12370 Some compilation errors, however, will throw an exception immediately.
12371
12372 The C<flags> parameter is reserved for future use, and must always
12373 be zero.
12374
12375 =cut
12376 */
12377
12378 OP *
12379 Perl_parse_block(pTHX_ U32 flags)
12380 {
12381     if (flags)
12382         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12383     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12384 }
12385
12386 /*
12387 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12388
12389 Parse a single unadorned Perl statement.  This may be a normal imperative
12390 statement or a declaration that has compile-time effect.  It does not
12391 include any label or other affixture.  It is up to the caller to ensure
12392 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12393 reflect the source of the code to be parsed and the lexical context for
12394 the statement.
12395
12396 The op tree representing the statement is returned.  This may be a
12397 null pointer if the statement is null, for example if it was actually
12398 a subroutine definition (which has compile-time side effects).  If not
12399 null, it will be ops directly implementing the statement, suitable to
12400 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
12401 equivalent op (except for those embedded in a scope contained entirely
12402 within the statement).
12403
12404 If an error occurs in parsing or compilation, in most cases a valid op
12405 tree (most likely null) is returned anyway.  The error is reflected in
12406 the parser state, normally resulting in a single exception at the top
12407 level of parsing which covers all the compilation errors that occurred.
12408 Some compilation errors, however, will throw an exception immediately.
12409
12410 The C<flags> parameter is reserved for future use, and must always
12411 be zero.
12412
12413 =cut
12414 */
12415
12416 OP *
12417 Perl_parse_barestmt(pTHX_ U32 flags)
12418 {
12419     if (flags)
12420         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12421     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12422 }
12423
12424 /*
12425 =for apidoc Amx|SV *|parse_label|U32 flags
12426
12427 Parse a single label, possibly optional, of the type that may prefix a
12428 Perl statement.  It is up to the caller to ensure that the dynamic parser
12429 state (L</PL_parser> et al) is correctly set to reflect the source of
12430 the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
12431 label is optional, otherwise it is mandatory.
12432
12433 The name of the label is returned in the form of a fresh scalar.  If an
12434 optional label is absent, a null pointer is returned.
12435
12436 If an error occurs in parsing, which can only occur if the label is
12437 mandatory, a valid label is returned anyway.  The error is reflected in
12438 the parser state, normally resulting in a single exception at the top
12439 level of parsing which covers all the compilation errors that occurred.
12440
12441 =cut
12442 */
12443
12444 SV *
12445 Perl_parse_label(pTHX_ U32 flags)
12446 {
12447     if (flags & ~PARSE_OPTIONAL)
12448         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12449     if (PL_nexttoke) {
12450         PL_parser->yychar = yylex();
12451         if (PL_parser->yychar == LABEL) {
12452             char * const lpv = pl_yylval.pval;
12453             STRLEN llen = strlen(lpv);
12454             PL_parser->yychar = YYEMPTY;
12455             return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12456         } else {
12457             yyunlex();
12458             goto no_label;
12459         }
12460     } else {
12461         char *s, *t;
12462         STRLEN wlen, bufptr_pos;
12463         lex_read_space(0);
12464         t = s = PL_bufptr;
12465         if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
12466             goto no_label;
12467         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12468         if (word_takes_any_delimiter(s, wlen))
12469             goto no_label;
12470         bufptr_pos = s - SvPVX(PL_linestr);
12471         PL_bufptr = t;
12472         lex_read_space(LEX_KEEP_PREVIOUS);
12473         t = PL_bufptr;
12474         s = SvPVX(PL_linestr) + bufptr_pos;
12475         if (t[0] == ':' && t[1] != ':') {
12476             PL_oldoldbufptr = PL_oldbufptr;
12477             PL_oldbufptr = s;
12478             PL_bufptr = t+1;
12479             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12480         } else {
12481             PL_bufptr = s;
12482             no_label:
12483             if (flags & PARSE_OPTIONAL) {
12484                 return NULL;
12485             } else {
12486                 qerror(Perl_mess(aTHX_ "Parse error"));
12487                 return newSVpvs("x");
12488             }
12489         }
12490     }
12491 }
12492
12493 /*
12494 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12495
12496 Parse a single complete Perl statement.  This may be a normal imperative
12497 statement or a declaration that has compile-time effect, and may include
12498 optional labels.  It is up to the caller to ensure that the dynamic
12499 parser state (L</PL_parser> et al) is correctly set to reflect the source
12500 of the code to be parsed and the lexical context for the statement.
12501
12502 The op tree representing the statement is returned.  This may be a
12503 null pointer if the statement is null, for example if it was actually
12504 a subroutine definition (which has compile-time side effects).  If not
12505 null, it will be the result of a L</newSTATEOP> call, normally including
12506 a C<nextstate> or equivalent op.
12507
12508 If an error occurs in parsing or compilation, in most cases a valid op
12509 tree (most likely null) is returned anyway.  The error is reflected in
12510 the parser state, normally resulting in a single exception at the top
12511 level of parsing which covers all the compilation errors that occurred.
12512 Some compilation errors, however, will throw an exception immediately.
12513
12514 The C<flags> parameter is reserved for future use, and must always
12515 be zero.
12516
12517 =cut
12518 */
12519
12520 OP *
12521 Perl_parse_fullstmt(pTHX_ U32 flags)
12522 {
12523     if (flags)
12524         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12525     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12526 }
12527
12528 /*
12529 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12530
12531 Parse a sequence of zero or more Perl statements.  These may be normal
12532 imperative statements, including optional labels, or declarations
12533 that have compile-time effect, or any mixture thereof.  The statement
12534 sequence ends when a closing brace or end-of-file is encountered in a
12535 place where a new statement could have validly started.  It is up to
12536 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12537 is correctly set to reflect the source of the code to be parsed and the
12538 lexical context for the statements.
12539
12540 The op tree representing the statement sequence is returned.  This may
12541 be a null pointer if the statements were all null, for example if there
12542 were no statements or if there were only subroutine definitions (which
12543 have compile-time side effects).  If not null, it will be a C<lineseq>
12544 list, normally including C<nextstate> or equivalent ops.
12545
12546 If an error occurs in parsing or compilation, in most cases a valid op
12547 tree is returned anyway.  The error is reflected in the parser state,
12548 normally resulting in a single exception at the top level of parsing
12549 which covers all the compilation errors that occurred.  Some compilation
12550 errors, however, will throw an exception immediately.
12551
12552 The C<flags> parameter is reserved for future use, and must always
12553 be zero.
12554
12555 =cut
12556 */
12557
12558 OP *
12559 Perl_parse_stmtseq(pTHX_ U32 flags)
12560 {
12561     OP *stmtseqop;
12562     I32 c;
12563     if (flags)
12564         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12565     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12566     c = lex_peek_unichar(0);
12567     if (c != -1 && c != /*{*/'}')
12568         qerror(Perl_mess(aTHX_ "Parse error"));
12569     return stmtseqop;
12570 }
12571
12572 /*
12573  * ex: set ts=8 sts=4 sw=4 et:
12574  */